Ahmedik95316 commited on
Commit
dce7bfe
·
1 Parent(s): b8b6cad

Update app/fastapi_server.py

Browse files

Reverted previous working version

Files changed (1) hide show
  1. app/fastapi_server.py +88 -418
app/fastapi_server.py CHANGED
@@ -1,5 +1,4 @@
1
- # Add missing import
2
- import os
3
  import time
4
  import joblib
5
  import logging
@@ -24,59 +23,12 @@ from fastapi.security import HTTPBearer, HTTPAuthorizationCredentials
24
  from fastapi import FastAPI, HTTPException, Depends, Request, BackgroundTasks, status
25
 
26
 
27
- # =============================================================================
28
- # CENTRALIZED PATH CONFIGURATION - FIXES THE CRITICAL BUG
29
- # =============================================================================
30
- class PathConfig:
31
- """Centralized path management to ensure consistency across all components"""
32
-
33
- # Environment detection
34
- if os.getenv("HF_SPACES_BUILD") == "1" or os.getenv("SPACE_ID"):
35
- BASE_DIR = Path("/app/persistent")
36
- ENVIRONMENT = "huggingface_spaces"
37
- else:
38
- BASE_DIR = Path("/tmp")
39
- ENVIRONMENT = "local"
40
-
41
- # Base directories
42
- DATA_DIR = BASE_DIR / "data"
43
- MODEL_DIR = BASE_DIR / "model"
44
- LOGS_DIR = BASE_DIR / "logs"
45
-
46
- # Model files - CONSISTENT PATHS (matches train.py)
47
- MODEL_FILE = MODEL_DIR / "model.pkl" # ✅ /tmp/model/model.pkl
48
- VECTORIZER_FILE = MODEL_DIR / "vectorizer.pkl" # ✅ /tmp/model/vectorizer.pkl
49
- PIPELINE_FILE = MODEL_DIR / "pipeline.pkl" # ✅ /tmp/model/pipeline.pkl
50
- METADATA_FILE = BASE_DIR / "metadata.json" # ✅ /tmp/metadata.json
51
-
52
- # Log files
53
- SERVER_LOG = LOGS_DIR / "fastapi_server.log"
54
- PREDICTION_LOG = LOGS_DIR / "prediction_log.json"
55
-
56
- @classmethod
57
- def ensure_directories(cls):
58
- """Create all required directories with proper permissions"""
59
- for attr_name in dir(cls):
60
- attr = getattr(cls, attr_name)
61
- if isinstance(attr, Path) and attr_name.endswith('_DIR'):
62
- attr.mkdir(parents=True, exist_ok=True, mode=0o755)
63
-
64
- # Ensure log directory exists
65
- cls.LOGS_DIR.mkdir(parents=True, exist_ok=True, mode=0o755)
66
-
67
-
68
- # Initialize directories at startup
69
- PathConfig.ensure_directories()
70
-
71
-
72
- # =============================================================================
73
- # ENHANCED LOGGING CONFIGURATION
74
- # =============================================================================
75
  logging.basicConfig(
76
  level=logging.INFO,
77
- format='%(asctime)s - %(levelname)s - %(name)s - %(message)s',
78
  handlers=[
79
- logging.FileHandler(PathConfig.SERVER_LOG),
80
  logging.StreamHandler()
81
  ]
82
  )
@@ -89,57 +41,8 @@ security = HTTPBearer(auto_error=False)
89
  rate_limit_storage = defaultdict(list)
90
 
91
 
92
- # =============================================================================
93
- # CIRCUIT BREAKER FOR RESILIENCE
94
- # =============================================================================
95
- class CircuitBreaker:
96
- """Circuit breaker pattern for model operations"""
97
-
98
- def __init__(self, failure_threshold: int = 5, recovery_timeout: int = 60):
99
- self.failure_threshold = failure_threshold
100
- self.recovery_timeout = recovery_timeout
101
- self.failure_count = 0
102
- self.last_failure_time = None
103
- self.state = 'CLOSED' # CLOSED, OPEN, HALF_OPEN
104
-
105
- def call(self, func, *args, **kwargs):
106
- """Execute function with circuit breaker protection"""
107
- if self.state == 'OPEN':
108
- if time.time() - self.last_failure_time > self.recovery_timeout:
109
- self.state = 'HALF_OPEN'
110
- logger.info("Circuit breaker transitioning to HALF_OPEN")
111
- else:
112
- raise Exception("Circuit breaker is OPEN - service unavailable")
113
-
114
- try:
115
- result = func(*args, **kwargs)
116
- self.on_success()
117
- return result
118
- except Exception as e:
119
- self.on_failure()
120
- raise
121
-
122
- def on_success(self):
123
- """Reset circuit breaker on successful operation"""
124
- self.failure_count = 0
125
- if self.state == 'HALF_OPEN':
126
- self.state = 'CLOSED'
127
- logger.info("Circuit breaker reset to CLOSED")
128
-
129
- def on_failure(self):
130
- """Handle failure and potentially open circuit"""
131
- self.failure_count += 1
132
- self.last_failure_time = time.time()
133
- if self.failure_count >= self.failure_threshold:
134
- self.state = 'OPEN'
135
- logger.error(f"Circuit breaker OPENED after {self.failure_count} failures")
136
-
137
-
138
- # =============================================================================
139
- # ENHANCED MODEL MANAGER WITH FIXED PATHS
140
- # =============================================================================
141
  class ModelManager:
142
- """Enhanced model manager with circuit breaker and proper path handling"""
143
 
144
  def __init__(self):
145
  self.model = None
@@ -148,139 +51,68 @@ class ModelManager:
148
  self.model_metadata = {}
149
  self.last_health_check = None
150
  self.health_status = "unknown"
151
- self.circuit_breaker = CircuitBreaker()
152
  self.load_model()
153
 
154
  def load_model(self):
155
- """Load model with FIXED PATHS and comprehensive error handling"""
156
  try:
157
- logger.info("Loading ML model with corrected paths...")
158
-
159
- # FIXED: Use correct paths that match train.py
160
- pipeline_path = PathConfig.PIPELINE_FILE # ✅ /tmp/model/pipeline.pkl
161
- model_path = PathConfig.MODEL_FILE # ✅ /tmp/model/model.pkl
162
- vectorizer_path = PathConfig.VECTORIZER_FILE # ✅ /tmp/model/vectorizer.pkl
163
- metadata_path = PathConfig.METADATA_FILE # ✅ /tmp/metadata.json
164
-
165
- logger.info(f"Looking for pipeline at: {pipeline_path}")
166
- logger.info(f"Looking for model at: {model_path}")
167
- logger.info(f"Looking for vectorizer at: {vectorizer_path}")
168
-
169
- # Try to load pipeline first (preferred method)
170
  if pipeline_path.exists():
171
- logger.info("✅ Found pipeline file, loading...")
172
  self.pipeline = joblib.load(pipeline_path)
173
-
174
- # Extract components from pipeline
175
- if hasattr(self.pipeline, 'named_steps'):
176
- self.model = self.pipeline.named_steps.get('model')
177
- self.vectorizer = self.pipeline.named_steps.get('vectorize')
178
- logger.info("✅ Extracted model and vectorizer from pipeline")
179
- else:
180
- # Pipeline might be the complete model
181
- logger.info("✅ Pipeline loaded as complete model")
182
-
183
- logger.info("✅ Loaded model pipeline successfully")
184
-
185
- elif model_path.exists() and vectorizer_path.exists():
186
- # Fallback to individual components
187
- logger.info("📦 Loading individual model components...")
188
- self.model = joblib.load(model_path)
189
- self.vectorizer = joblib.load(vectorizer_path)
190
- logger.info("✅ Loaded individual model components successfully")
191
-
192
  else:
193
- # Detailed error message for debugging
194
- available_files = []
195
- for path in [pipeline_path, model_path, vectorizer_path]:
196
- if path.exists():
197
- available_files.append(str(path))
198
-
199
- error_msg = f"No model files found at expected paths. Available files: {available_files}"
200
- logger.error(error_msg)
201
- raise FileNotFoundError(error_msg)
202
-
203
- # Load metadata if available
 
 
204
  if metadata_path.exists():
205
- try:
206
- with open(metadata_path, 'r') as f:
207
- self.model_metadata = json.load(f)
208
- logger.info(f"Loaded model metadata: {self.model_metadata.get('model_version', 'Unknown')}")
209
- except Exception as e:
210
- logger.warning(f"Could not load metadata: {e}")
211
- self.model_metadata = {"model_version": "unknown"}
212
- else:
213
- logger.warning(f"Metadata file not found at {metadata_path}")
214
- self.model_metadata = {"model_version": "unknown"}
215
 
216
- # Verify model is functional
217
- self._verify_model_functionality()
218
-
219
  self.health_status = "healthy"
220
  self.last_health_check = datetime.now()
221
- logger.info("🎉 Model loaded successfully and is functional!")
222
 
223
  except Exception as e:
224
- logger.error(f"Failed to load model: {str(e)}")
225
- logger.error(f"❌ Exception details: {traceback.format_exc()}")
226
  self.health_status = "unhealthy"
227
  self.model = None
228
  self.vectorizer = None
229
  self.pipeline = None
230
-
231
- def _verify_model_functionality(self):
232
- """Verify that the loaded model can make predictions"""
233
- test_text = "This is a test article for verification purposes."
234
-
235
- try:
236
- if self.pipeline:
237
- # Test pipeline prediction
238
- prediction = self.pipeline.predict([test_text])
239
- probabilities = self.pipeline.predict_proba([test_text])
240
- logger.info("✅ Pipeline prediction test successful")
241
- elif self.model and self.vectorizer:
242
- # Test individual components
243
- X = self.vectorizer.transform([test_text])
244
- prediction = self.model.predict(X)
245
- probabilities = self.model.predict_proba(X)
246
- logger.info("✅ Individual components prediction test successful")
247
- else:
248
- raise ValueError("No functional model components available")
249
-
250
- except Exception as e:
251
- logger.error(f"❌ Model functionality verification failed: {e}")
252
- raise
253
 
254
  def predict(self, text: str) -> tuple[str, float]:
255
- """Make prediction with circuit breaker protection"""
256
- return self.circuit_breaker.call(self._predict_internal, text)
257
-
258
- def _predict_internal(self, text: str) -> tuple[str, float]:
259
- """Internal prediction method"""
260
  try:
261
  if self.pipeline:
262
  # Use pipeline for prediction
263
  prediction = self.pipeline.predict([text])[0]
264
  probabilities = self.pipeline.predict_proba([text])[0]
265
- logger.debug("Used pipeline for prediction")
266
-
267
  elif self.model and self.vectorizer:
268
  # Use individual components
269
  X = self.vectorizer.transform([text])
270
  prediction = self.model.predict(X)[0]
271
  probabilities = self.model.predict_proba(X)[0]
272
- logger.debug("Used individual components for prediction")
273
-
274
  else:
275
  raise ValueError("No model available for prediction")
276
 
277
- # Get confidence score (handle both binary and probability outputs)
278
- if isinstance(probabilities, (list, np.ndarray)) and len(probabilities) > 1:
279
- # Multi-class probabilities
280
- confidence = float(np.max(probabilities))
281
- else:
282
- # Binary classification
283
- confidence = float(probabilities[1] if len(probabilities) > 1 else probabilities[0])
284
 
285
  # Convert prediction to readable format
286
  label = "Fake" if prediction == 1 else "Real"
@@ -288,14 +120,14 @@ class ModelManager:
288
  return label, confidence
289
 
290
  except Exception as e:
291
- logger.error(f"Prediction failed: {str(e)}")
292
  raise HTTPException(
293
  status_code=500,
294
  detail=f"Prediction failed: {str(e)}"
295
  )
296
 
297
  def health_check(self) -> Dict[str, Any]:
298
- """Comprehensive health check with circuit breaker status"""
299
  try:
300
  # Test prediction with sample text
301
  test_text = "This is a test article for health check purposes."
@@ -310,15 +142,7 @@ class ModelManager:
310
  "model_available": self.model is not None,
311
  "vectorizer_available": self.vectorizer is not None,
312
  "pipeline_available": self.pipeline is not None,
313
- "circuit_breaker_state": self.circuit_breaker.state,
314
- "circuit_breaker_failures": self.circuit_breaker.failure_count,
315
- "test_prediction": {"label": label, "confidence": confidence},
316
- "model_paths": {
317
- "pipeline_exists": PathConfig.PIPELINE_FILE.exists(),
318
- "model_exists": PathConfig.MODEL_FILE.exists(),
319
- "vectorizer_exists": PathConfig.VECTORIZER_FILE.exists(),
320
- "metadata_exists": PathConfig.METADATA_FILE.exists()
321
- }
322
  }
323
 
324
  except Exception as e:
@@ -331,15 +155,7 @@ class ModelManager:
331
  "error": str(e),
332
  "model_available": self.model is not None,
333
  "vectorizer_available": self.vectorizer is not None,
334
- "pipeline_available": self.pipeline is not None,
335
- "circuit_breaker_state": self.circuit_breaker.state,
336
- "circuit_breaker_failures": self.circuit_breaker.failure_count,
337
- "model_paths": {
338
- "pipeline_exists": PathConfig.PIPELINE_FILE.exists(),
339
- "model_exists": PathConfig.MODEL_FILE.exists(),
340
- "vectorizer_exists": PathConfig.VECTORIZER_FILE.exists(),
341
- "metadata_exists": PathConfig.METADATA_FILE.exists()
342
- }
343
  }
344
 
345
 
@@ -347,17 +163,11 @@ class ModelManager:
347
  model_manager = ModelManager()
348
 
349
 
350
- # =============================================================================
351
- # FASTAPI APPLICATION SETUP
352
- # =============================================================================
353
  @asynccontextmanager
354
  async def lifespan(app: FastAPI):
355
- """Manage application lifespan with enhanced startup"""
356
- logger.info("🚀 Starting FastAPI application...")
357
 
358
- # Ensure directories exist
359
- PathConfig.ensure_directories()
360
-
361
  # Startup tasks
362
  model_manager.load_model()
363
 
@@ -367,14 +177,13 @@ async def lifespan(app: FastAPI):
367
  yield
368
 
369
  # Shutdown tasks
370
- logger.info("🛑 Shutting down FastAPI application...")
371
-
372
 
373
  # Create FastAPI app
374
  app = FastAPI(
375
  title="Fake News Detection API",
376
- description="Production-ready API for fake news detection with fixed path management and comprehensive monitoring",
377
- version="2.1.0",
378
  docs_url="/docs",
379
  redoc_url="/redoc",
380
  lifespan=lifespan
@@ -394,10 +203,9 @@ app.add_middleware(
394
  allowed_hosts=["*"] # Configure appropriately for production
395
  )
396
 
 
 
397
 
398
- # =============================================================================
399
- # REQUEST/RESPONSE MODELS (UNCHANGED)
400
- # =============================================================================
401
  class PredictionRequest(BaseModel):
402
  text: str = Field(..., min_length=1, max_length=10000,
403
  description="Text to analyze for fake news detection")
@@ -464,10 +272,9 @@ class HealthResponse(BaseModel):
464
  system_health: Dict[str, Any]
465
  api_health: Dict[str, Any]
466
 
 
 
467
 
468
- # =============================================================================
469
- # MIDDLEWARE AND RATE LIMITING (UNCHANGED)
470
- # =============================================================================
471
  async def rate_limit_check(request: Request):
472
  """Check rate limits"""
473
  client_ip = request.client.host
@@ -489,6 +296,8 @@ async def rate_limit_check(request: Request):
489
  # Add current request
490
  rate_limit_storage[client_ip].append(current_time)
491
 
 
 
492
 
493
  @app.middleware("http")
494
  async def log_requests(request: Request, call_next):
@@ -512,10 +321,9 @@ async def log_requests(request: Request, call_next):
512
 
513
  return response
514
 
 
 
515
 
516
- # =============================================================================
517
- # ERROR HANDLERS (UNCHANGED)
518
- # =============================================================================
519
  @app.exception_handler(HTTPException)
520
  async def http_exception_handler(request: Request, exc: HTTPException):
521
  """Handle HTTP exceptions"""
@@ -552,42 +360,35 @@ async def general_exception_handler(request: Request, exc: Exception):
552
  content=error_data
553
  )
554
 
 
 
555
 
556
- # =============================================================================
557
- # BACKGROUND TASKS
558
- # =============================================================================
559
  async def periodic_health_check():
560
- """Enhanced periodic health check"""
561
  while True:
562
  try:
563
  await asyncio.sleep(300) # Check every 5 minutes
564
  health_status = model_manager.health_check()
565
 
566
  if health_status["status"] == "unhealthy":
567
- logger.warning("⚠️ Model health check failed, attempting to reload...")
 
568
  model_manager.load_model()
569
 
570
  except Exception as e:
571
- logger.error(f"Periodic health check failed: {e}")
 
 
572
 
573
 
574
- # =============================================================================
575
- # API ROUTES (ENHANCED WITH BETTER ERROR HANDLING)
576
- # =============================================================================
577
  @app.get("/", response_model=Dict[str, str])
578
  async def root():
579
- """Root endpoint with path diagnostics"""
580
  return {
581
- "message": "Fake News Detection API - FIXED VERSION",
582
- "version": "2.1.0",
583
- "status": "Path management issues resolved",
584
  "documentation": "/docs",
585
- "health_check": "/health",
586
- "model_path_info": {
587
- "pipeline_path": str(PathConfig.PIPELINE_FILE),
588
- "model_path": str(PathConfig.MODEL_FILE),
589
- "vectorizer_path": str(PathConfig.VECTORIZER_FILE)
590
- }
591
  }
592
 
593
 
@@ -645,7 +446,7 @@ async def predict(
645
  except HTTPException:
646
  raise
647
  except Exception as e:
648
- logger.error(f"Prediction failed: {e}")
649
  raise HTTPException(
650
  status_code=500,
651
  detail=f"Prediction failed: {str(e)}"
@@ -693,7 +494,7 @@ async def predict_batch(
693
  predictions.append(prediction)
694
 
695
  except Exception as e:
696
- logger.error(f"Batch prediction failed for text: {e}")
697
  # Continue with other texts
698
  continue
699
 
@@ -725,7 +526,7 @@ async def predict_batch(
725
  except HTTPException:
726
  raise
727
  except Exception as e:
728
- logger.error(f"Batch prediction failed: {e}")
729
  raise HTTPException(
730
  status_code=500,
731
  detail=f"Batch prediction failed: {str(e)}"
@@ -735,7 +536,7 @@ async def predict_batch(
735
  @app.get("/health", response_model=HealthResponse)
736
  async def health_check():
737
  """
738
- Enhanced health check endpoint with path diagnostics
739
 
740
  - **returns**: Detailed health status of the API and model
741
  """
@@ -770,7 +571,7 @@ async def health_check():
770
  )
771
 
772
  except Exception as e:
773
- logger.error(f"Health check failed: {e}")
774
  return HealthResponse(
775
  status="unhealthy",
776
  timestamp=datetime.now().isoformat(),
@@ -783,7 +584,7 @@ async def health_check():
783
  @app.get("/metrics")
784
  async def get_metrics():
785
  """
786
- Get API metrics with enhanced path information
787
 
788
  - **returns**: Usage statistics and performance metrics
789
  """
@@ -799,23 +600,13 @@ async def get_metrics():
799
  "model_version": model_manager.model_metadata.get('model_version', 'unknown'),
800
  "model_health": model_manager.health_status,
801
  "last_health_check": model_manager.last_health_check.isoformat() if model_manager.last_health_check else None,
802
- "circuit_breaker": {
803
- "state": model_manager.circuit_breaker.state,
804
- "failure_count": model_manager.circuit_breaker.failure_count
805
- },
806
- "path_status": {
807
- "pipeline_exists": PathConfig.PIPELINE_FILE.exists(),
808
- "model_exists": PathConfig.MODEL_FILE.exists(),
809
- "vectorizer_exists": PathConfig.VECTORIZER_FILE.exists(),
810
- "metadata_exists": PathConfig.METADATA_FILE.exists()
811
- },
812
  "timestamp": datetime.now().isoformat()
813
  }
814
 
815
  return metrics
816
 
817
  except Exception as e:
818
- logger.error(f"Metrics retrieval failed: {e}")
819
  raise HTTPException(
820
  status_code=500,
821
  detail=f"Metrics retrieval failed: {str(e)}"
@@ -825,12 +616,12 @@ async def get_metrics():
825
  @app.post("/model/reload")
826
  async def reload_model():
827
  """
828
- Reload the ML model with enhanced feedback
829
 
830
  - **returns**: Status of model reload operation
831
  """
832
  try:
833
- logger.info("🔄 Manual model reload requested")
834
  model_manager.load_model()
835
 
836
  return {
@@ -838,29 +629,21 @@ async def reload_model():
838
  "message": "Model reloaded successfully",
839
  "model_health": model_manager.health_status,
840
  "model_version": model_manager.model_metadata.get('model_version', 'unknown'),
841
- "circuit_breaker_reset": model_manager.circuit_breaker.state,
842
- "path_verification": {
843
- "pipeline_exists": PathConfig.PIPELINE_FILE.exists(),
844
- "model_exists": PathConfig.MODEL_FILE.exists(),
845
- "vectorizer_exists": PathConfig.VECTORIZER_FILE.exists(),
846
- "metadata_exists": PathConfig.METADATA_FILE.exists()
847
- },
848
  "timestamp": datetime.now().isoformat()
849
  }
850
 
851
  except Exception as e:
852
- logger.error(f"Model reload failed: {e}")
853
  raise HTTPException(
854
  status_code=500,
855
  detail=f"Model reload failed: {str(e)}"
856
  )
857
 
 
 
858
 
859
- # =============================================================================
860
- # BACKGROUND TASK FUNCTIONS (ENHANCED)
861
- # =============================================================================
862
  async def log_prediction(text: str, prediction: str, confidence: float, client_ip: str, processing_time: float):
863
- """Log prediction details to structured log file"""
864
  try:
865
  log_entry = {
866
  "timestamp": datetime.now().isoformat(),
@@ -872,8 +655,8 @@ async def log_prediction(text: str, prediction: str, confidence: float, client_i
872
  "text_hash": hashlib.md5(text.encode()).hexdigest()
873
  }
874
 
875
- # Save to log file with proper path
876
- log_file = PathConfig.PREDICTION_LOG
877
 
878
  # Load existing logs
879
  logs = []
@@ -897,7 +680,7 @@ async def log_prediction(text: str, prediction: str, confidence: float, client_i
897
  await f.write(json.dumps(logs, indent=2))
898
 
899
  except Exception as e:
900
- logger.error(f"Failed to log prediction: {e}")
901
 
902
 
903
  async def log_batch_prediction(total_texts: int, successful_predictions: int, client_ip: str, processing_time: float):
@@ -913,114 +696,22 @@ async def log_batch_prediction(total_texts: int, successful_predictions: int, cl
913
  "success_rate": successful_predictions / total_texts if total_texts > 0 else 0
914
  }
915
 
916
- logger.info(f"📊 Batch prediction logged: {json.dumps(log_entry)}")
917
 
918
  except Exception as e:
919
- logger.error(f"Failed to log batch prediction: {e}")
920
 
921
-
922
- # =============================================================================
923
- # ENHANCED PATH DIAGNOSTICS ENDPOINT
924
- # =============================================================================
925
- @app.get("/debug/paths")
926
- async def debug_paths():
927
- """
928
- Debug endpoint to check all file paths and their existence
929
-
930
- - **returns**: Detailed path information for troubleshooting
931
- """
932
- try:
933
- path_info = {
934
- "base_directories": {
935
- "BASE_DIR": {
936
- "path": str(PathConfig.BASE_DIR),
937
- "exists": PathConfig.BASE_DIR.exists(),
938
- "is_dir": PathConfig.BASE_DIR.is_dir() if PathConfig.BASE_DIR.exists() else False
939
- },
940
- "MODEL_DIR": {
941
- "path": str(PathConfig.MODEL_DIR),
942
- "exists": PathConfig.MODEL_DIR.exists(),
943
- "is_dir": PathConfig.MODEL_DIR.is_dir() if PathConfig.MODEL_DIR.exists() else False
944
- },
945
- "DATA_DIR": {
946
- "path": str(PathConfig.DATA_DIR),
947
- "exists": PathConfig.DATA_DIR.exists(),
948
- "is_dir": PathConfig.DATA_DIR.is_dir() if PathConfig.DATA_DIR.exists() else False
949
- },
950
- "LOGS_DIR": {
951
- "path": str(PathConfig.LOGS_DIR),
952
- "exists": PathConfig.LOGS_DIR.exists(),
953
- "is_dir": PathConfig.LOGS_DIR.is_dir() if PathConfig.LOGS_DIR.exists() else False
954
- }
955
- },
956
- "model_files": {
957
- "PIPELINE_FILE": {
958
- "path": str(PathConfig.PIPELINE_FILE),
959
- "exists": PathConfig.PIPELINE_FILE.exists(),
960
- "size": PathConfig.PIPELINE_FILE.stat().st_size if PathConfig.PIPELINE_FILE.exists() else None
961
- },
962
- "MODEL_FILE": {
963
- "path": str(PathConfig.MODEL_FILE),
964
- "exists": PathConfig.MODEL_FILE.exists(),
965
- "size": PathConfig.MODEL_FILE.stat().st_size if PathConfig.MODEL_FILE.exists() else None
966
- },
967
- "VECTORIZER_FILE": {
968
- "path": str(PathConfig.VECTORIZER_FILE),
969
- "exists": PathConfig.VECTORIZER_FILE.exists(),
970
- "size": PathConfig.VECTORIZER_FILE.stat().st_size if PathConfig.VECTORIZER_FILE.exists() else None
971
- },
972
- "METADATA_FILE": {
973
- "path": str(PathConfig.METADATA_FILE),
974
- "exists": PathConfig.METADATA_FILE.exists(),
975
- "size": PathConfig.METADATA_FILE.stat().st_size if PathConfig.METADATA_FILE.exists() else None
976
- }
977
- },
978
- "model_manager_status": {
979
- "model_loaded": model_manager.model is not None,
980
- "vectorizer_loaded": model_manager.vectorizer is not None,
981
- "pipeline_loaded": model_manager.pipeline is not None,
982
- "health_status": model_manager.health_status,
983
- "circuit_breaker_state": model_manager.circuit_breaker.state
984
- },
985
- "directory_contents": {
986
- "base_dir_files": [str(f) for f in PathConfig.BASE_DIR.iterdir()] if PathConfig.BASE_DIR.exists() else [],
987
- "model_dir_files": [str(f) for f in PathConfig.MODEL_DIR.iterdir()] if PathConfig.MODEL_DIR.exists() else []
988
- }
989
- }
990
-
991
- return path_info
992
-
993
- except Exception as e:
994
- logger.error(f"❌ Path debug failed: {e}")
995
- return {
996
- "error": str(e),
997
- "timestamp": datetime.now().isoformat()
998
- }
999
 
1000
 
1001
- # =============================================================================
1002
- # CUSTOM OPENAPI CONFIGURATION
1003
- # =============================================================================
1004
  def custom_openapi():
1005
  if app.openapi_schema:
1006
  return app.openapi_schema
1007
 
1008
  openapi_schema = get_openapi(
1009
- title="Fake News Detection API - FIXED VERSION",
1010
- version="2.1.0",
1011
- description="""
1012
- Production-ready API for fake news detection with FIXED path management.
1013
-
1014
- **Key Fixes:**
1015
- - ✅ Centralized path configuration
1016
- - ✅ Circuit breaker pattern for resilience
1017
- - ✅ Enhanced error handling and logging
1018
- - ✅ Path diagnostics endpoints for debugging
1019
-
1020
- **Critical Bug Fix:**
1021
- The previous version had inconsistent paths between FastAPI server and training components.
1022
- This version uses consistent paths that match the training pipeline.
1023
- """,
1024
  routes=app.routes,
1025
  )
1026
 
@@ -1039,33 +730,12 @@ def custom_openapi():
1039
 
1040
  app.openapi = custom_openapi
1041
 
1042
-
1043
- # =============================================================================
1044
- # APPLICATION STARTUP
1045
- # =============================================================================
1046
  if __name__ == "__main__":
1047
- # Final path verification before starting
1048
- logger.info("🔍 Performing final path verification...")
1049
-
1050
- PathConfig.ensure_directories()
1051
-
1052
- logger.info(f"📁 Model directory: {PathConfig.MODEL_DIR}")
1053
- logger.info(f"📁 Pipeline file path: {PathConfig.PIPELINE_FILE}")
1054
- logger.info(f"📁 Model file path: {PathConfig.MODEL_FILE}")
1055
- logger.info(f"📁 Vectorizer file path: {PathConfig.VECTORIZER_FILE}")
1056
-
1057
- if PathConfig.MODEL_DIR.exists():
1058
- logger.info(f"✅ Model directory exists with {len(list(PathConfig.MODEL_DIR.iterdir()))} files")
1059
- for file in PathConfig.MODEL_DIR.iterdir():
1060
- logger.info(f" 📄 {file.name} ({file.stat().st_size} bytes)")
1061
- else:
1062
- logger.warning(f"⚠️ Model directory does not exist: {PathConfig.MODEL_DIR}")
1063
-
1064
  uvicorn.run(
1065
  "fastapi_server:app",
1066
- host="0.0.0.0", # Changed to 0.0.0.0 for containerized environments
1067
  port=8000,
1068
  log_level="info",
1069
  reload=False,
1070
  access_log=True
1071
- )
 
1
+ import json
 
2
  import time
3
  import joblib
4
  import logging
 
23
  from fastapi import FastAPI, HTTPException, Depends, Request, BackgroundTasks, status
24
 
25
 
26
+ # Configure logging
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
27
  logging.basicConfig(
28
  level=logging.INFO,
29
+ format='%(asctime)s - %(levelname)s - %(message)s',
30
  handlers=[
31
+ logging.FileHandler('/tmp/fastapi_server.log'),
32
  logging.StreamHandler()
33
  ]
34
  )
 
41
  rate_limit_storage = defaultdict(list)
42
 
43
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
44
  class ModelManager:
45
+ """Manages model loading and health checks"""
46
 
47
  def __init__(self):
48
  self.model = None
 
51
  self.model_metadata = {}
52
  self.last_health_check = None
53
  self.health_status = "unknown"
 
54
  self.load_model()
55
 
56
  def load_model(self):
57
+ """Load model with comprehensive error handling"""
58
  try:
59
+ logger.info("Loading ML model...")
60
+
61
+ # Try to load pipeline first (preferred)
62
+ # pipeline_path = Path("/tmp/model/pipeline.pkl")
63
+ pipeline_path = Path("/tmp/pipeline.pkl")
 
 
 
 
 
 
 
 
64
  if pipeline_path.exists():
 
65
  self.pipeline = joblib.load(pipeline_path)
66
+ self.model = self.pipeline.named_steps.get('model')
67
+ self.vectorizer = self.pipeline.named_steps.get('vectorize')
68
+ logger.info("Loaded model pipeline successfully")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
69
  else:
70
+ # Fallback to individual components
71
+ model_path = Path("/tmp/model.pkl")
72
+ vectorizer_path = Path("/tmp/vectorizer.pkl")
73
+
74
+ if model_path.exists() and vectorizer_path.exists():
75
+ self.model = joblib.load(model_path)
76
+ self.vectorizer = joblib.load(vectorizer_path)
77
+ logger.info("Loaded model components successfully")
78
+ else:
79
+ raise FileNotFoundError("No model files found")
80
+
81
+ # Load metadata
82
+ metadata_path = Path("/tmp/metadata.json")
83
  if metadata_path.exists():
84
+ with open(metadata_path, 'r') as f:
85
+ self.model_metadata = json.load(f)
86
+ logger.info(
87
+ f"Loaded model metadata: {self.model_metadata.get('model_version', 'Unknown')}")
 
 
 
 
 
 
88
 
 
 
 
89
  self.health_status = "healthy"
90
  self.last_health_check = datetime.now()
 
91
 
92
  except Exception as e:
93
+ logger.error(f"Failed to load model: {e}")
 
94
  self.health_status = "unhealthy"
95
  self.model = None
96
  self.vectorizer = None
97
  self.pipeline = None
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
98
 
99
  def predict(self, text: str) -> tuple[str, float]:
100
+ """Make prediction with error handling"""
 
 
 
 
101
  try:
102
  if self.pipeline:
103
  # Use pipeline for prediction
104
  prediction = self.pipeline.predict([text])[0]
105
  probabilities = self.pipeline.predict_proba([text])[0]
 
 
106
  elif self.model and self.vectorizer:
107
  # Use individual components
108
  X = self.vectorizer.transform([text])
109
  prediction = self.model.predict(X)[0]
110
  probabilities = self.model.predict_proba(X)[0]
 
 
111
  else:
112
  raise ValueError("No model available for prediction")
113
 
114
+ # Get confidence score
115
+ confidence = float(probabilities[prediction])
 
 
 
 
 
116
 
117
  # Convert prediction to readable format
118
  label = "Fake" if prediction == 1 else "Real"
 
120
  return label, confidence
121
 
122
  except Exception as e:
123
+ logger.error(f"Prediction failed: {e}")
124
  raise HTTPException(
125
  status_code=500,
126
  detail=f"Prediction failed: {str(e)}"
127
  )
128
 
129
  def health_check(self) -> Dict[str, Any]:
130
+ """Perform health check"""
131
  try:
132
  # Test prediction with sample text
133
  test_text = "This is a test article for health check purposes."
 
142
  "model_available": self.model is not None,
143
  "vectorizer_available": self.vectorizer is not None,
144
  "pipeline_available": self.pipeline is not None,
145
+ "test_prediction": {"label": label, "confidence": confidence}
 
 
 
 
 
 
 
 
146
  }
147
 
148
  except Exception as e:
 
155
  "error": str(e),
156
  "model_available": self.model is not None,
157
  "vectorizer_available": self.vectorizer is not None,
158
+ "pipeline_available": self.pipeline is not None
 
 
 
 
 
 
 
 
159
  }
160
 
161
 
 
163
  model_manager = ModelManager()
164
 
165
 
 
 
 
166
  @asynccontextmanager
167
  async def lifespan(app: FastAPI):
168
+ """Manage application lifespan"""
169
+ logger.info("Starting FastAPI application...")
170
 
 
 
 
171
  # Startup tasks
172
  model_manager.load_model()
173
 
 
177
  yield
178
 
179
  # Shutdown tasks
180
+ logger.info("Shutting down FastAPI application...")
 
181
 
182
  # Create FastAPI app
183
  app = FastAPI(
184
  title="Fake News Detection API",
185
+ description="Production-ready API for fake news detection with comprehensive monitoring and security features",
186
+ version="2.0.0",
187
  docs_url="/docs",
188
  redoc_url="/redoc",
189
  lifespan=lifespan
 
203
  allowed_hosts=["*"] # Configure appropriately for production
204
  )
205
 
206
+ # Request/Response models
207
+
208
 
 
 
 
209
  class PredictionRequest(BaseModel):
210
  text: str = Field(..., min_length=1, max_length=10000,
211
  description="Text to analyze for fake news detection")
 
272
  system_health: Dict[str, Any]
273
  api_health: Dict[str, Any]
274
 
275
+ # Rate limiting
276
+
277
 
 
 
 
278
  async def rate_limit_check(request: Request):
279
  """Check rate limits"""
280
  client_ip = request.client.host
 
296
  # Add current request
297
  rate_limit_storage[client_ip].append(current_time)
298
 
299
+ # Logging middleware
300
+
301
 
302
  @app.middleware("http")
303
  async def log_requests(request: Request, call_next):
 
321
 
322
  return response
323
 
324
+ # Error handlers
325
+
326
 
 
 
 
327
  @app.exception_handler(HTTPException)
328
  async def http_exception_handler(request: Request, exc: HTTPException):
329
  """Handle HTTP exceptions"""
 
360
  content=error_data
361
  )
362
 
363
+ # Background tasks
364
+
365
 
 
 
 
366
  async def periodic_health_check():
367
+ """Periodic health check"""
368
  while True:
369
  try:
370
  await asyncio.sleep(300) # Check every 5 minutes
371
  health_status = model_manager.health_check()
372
 
373
  if health_status["status"] == "unhealthy":
374
+ logger.warning(
375
+ "Model health check failed, attempting to reload...")
376
  model_manager.load_model()
377
 
378
  except Exception as e:
379
+ logger.error(f"Periodic health check failed: {e}")
380
+
381
+ # API Routes
382
 
383
 
 
 
 
384
  @app.get("/", response_model=Dict[str, str])
385
  async def root():
386
+ """Root endpoint"""
387
  return {
388
+ "message": "Fake News Detection API",
389
+ "version": "2.0.0",
 
390
  "documentation": "/docs",
391
+ "health_check": "/health"
 
 
 
 
 
392
  }
393
 
394
 
 
446
  except HTTPException:
447
  raise
448
  except Exception as e:
449
+ logger.error(f"Prediction failed: {e}")
450
  raise HTTPException(
451
  status_code=500,
452
  detail=f"Prediction failed: {str(e)}"
 
494
  predictions.append(prediction)
495
 
496
  except Exception as e:
497
+ logger.error(f"Batch prediction failed for text: {e}")
498
  # Continue with other texts
499
  continue
500
 
 
526
  except HTTPException:
527
  raise
528
  except Exception as e:
529
+ logger.error(f"Batch prediction failed: {e}")
530
  raise HTTPException(
531
  status_code=500,
532
  detail=f"Batch prediction failed: {str(e)}"
 
536
  @app.get("/health", response_model=HealthResponse)
537
  async def health_check():
538
  """
539
+ Comprehensive health check endpoint
540
 
541
  - **returns**: Detailed health status of the API and model
542
  """
 
571
  )
572
 
573
  except Exception as e:
574
+ logger.error(f"Health check failed: {e}")
575
  return HealthResponse(
576
  status="unhealthy",
577
  timestamp=datetime.now().isoformat(),
 
584
  @app.get("/metrics")
585
  async def get_metrics():
586
  """
587
+ Get API metrics
588
 
589
  - **returns**: Usage statistics and performance metrics
590
  """
 
600
  "model_version": model_manager.model_metadata.get('model_version', 'unknown'),
601
  "model_health": model_manager.health_status,
602
  "last_health_check": model_manager.last_health_check.isoformat() if model_manager.last_health_check else None,
 
 
 
 
 
 
 
 
 
 
603
  "timestamp": datetime.now().isoformat()
604
  }
605
 
606
  return metrics
607
 
608
  except Exception as e:
609
+ logger.error(f"Metrics retrieval failed: {e}")
610
  raise HTTPException(
611
  status_code=500,
612
  detail=f"Metrics retrieval failed: {str(e)}"
 
616
  @app.post("/model/reload")
617
  async def reload_model():
618
  """
619
+ Reload the ML model
620
 
621
  - **returns**: Status of model reload operation
622
  """
623
  try:
624
+ logger.info("Manual model reload requested")
625
  model_manager.load_model()
626
 
627
  return {
 
629
  "message": "Model reloaded successfully",
630
  "model_health": model_manager.health_status,
631
  "model_version": model_manager.model_metadata.get('model_version', 'unknown'),
 
 
 
 
 
 
 
632
  "timestamp": datetime.now().isoformat()
633
  }
634
 
635
  except Exception as e:
636
+ logger.error(f"Model reload failed: {e}")
637
  raise HTTPException(
638
  status_code=500,
639
  detail=f"Model reload failed: {str(e)}"
640
  )
641
 
642
+ # Background task functions
643
+
644
 
 
 
 
645
  async def log_prediction(text: str, prediction: str, confidence: float, client_ip: str, processing_time: float):
646
+ """Log prediction details"""
647
  try:
648
  log_entry = {
649
  "timestamp": datetime.now().isoformat(),
 
655
  "text_hash": hashlib.md5(text.encode()).hexdigest()
656
  }
657
 
658
+ # Save to log file
659
+ log_file = Path("/tmp/prediction_log.json")
660
 
661
  # Load existing logs
662
  logs = []
 
680
  await f.write(json.dumps(logs, indent=2))
681
 
682
  except Exception as e:
683
+ logger.error(f"Failed to log prediction: {e}")
684
 
685
 
686
  async def log_batch_prediction(total_texts: int, successful_predictions: int, client_ip: str, processing_time: float):
 
696
  "success_rate": successful_predictions / total_texts if total_texts > 0 else 0
697
  }
698
 
699
+ logger.info(f"Batch prediction logged: {json.dumps(log_entry)}")
700
 
701
  except Exception as e:
702
+ logger.error(f"Failed to log batch prediction: {e}")
703
 
704
+ # Custom OpenAPI
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
705
 
706
 
 
 
 
707
  def custom_openapi():
708
  if app.openapi_schema:
709
  return app.openapi_schema
710
 
711
  openapi_schema = get_openapi(
712
+ title="Fake News Detection API",
713
+ version="2.0.0",
714
+ description="Production-ready API for fake news detection with comprehensive monitoring and security features",
 
 
 
 
 
 
 
 
 
 
 
 
715
  routes=app.routes,
716
  )
717
 
 
730
 
731
  app.openapi = custom_openapi
732
 
 
 
 
 
733
  if __name__ == "__main__":
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
734
  uvicorn.run(
735
  "fastapi_server:app",
736
+ host="127.0.0.1",
737
  port=8000,
738
  log_level="info",
739
  reload=False,
740
  access_log=True
741
+ )