Commit
·
dce7bfe
1
Parent(s):
b8b6cad
Update app/fastapi_server.py
Browse filesReverted previous working version
- app/fastapi_server.py +88 -418
app/fastapi_server.py
CHANGED
|
@@ -1,5 +1,4 @@
|
|
| 1 |
-
|
| 2 |
-
import os
|
| 3 |
import time
|
| 4 |
import joblib
|
| 5 |
import logging
|
|
@@ -24,59 +23,12 @@ from fastapi.security import HTTPBearer, HTTPAuthorizationCredentials
|
|
| 24 |
from fastapi import FastAPI, HTTPException, Depends, Request, BackgroundTasks, status
|
| 25 |
|
| 26 |
|
| 27 |
-
#
|
| 28 |
-
# CENTRALIZED PATH CONFIGURATION - FIXES THE CRITICAL BUG
|
| 29 |
-
# =============================================================================
|
| 30 |
-
class PathConfig:
|
| 31 |
-
"""Centralized path management to ensure consistency across all components"""
|
| 32 |
-
|
| 33 |
-
# Environment detection
|
| 34 |
-
if os.getenv("HF_SPACES_BUILD") == "1" or os.getenv("SPACE_ID"):
|
| 35 |
-
BASE_DIR = Path("/app/persistent")
|
| 36 |
-
ENVIRONMENT = "huggingface_spaces"
|
| 37 |
-
else:
|
| 38 |
-
BASE_DIR = Path("/tmp")
|
| 39 |
-
ENVIRONMENT = "local"
|
| 40 |
-
|
| 41 |
-
# Base directories
|
| 42 |
-
DATA_DIR = BASE_DIR / "data"
|
| 43 |
-
MODEL_DIR = BASE_DIR / "model"
|
| 44 |
-
LOGS_DIR = BASE_DIR / "logs"
|
| 45 |
-
|
| 46 |
-
# Model files - CONSISTENT PATHS (matches train.py)
|
| 47 |
-
MODEL_FILE = MODEL_DIR / "model.pkl" # ✅ /tmp/model/model.pkl
|
| 48 |
-
VECTORIZER_FILE = MODEL_DIR / "vectorizer.pkl" # ✅ /tmp/model/vectorizer.pkl
|
| 49 |
-
PIPELINE_FILE = MODEL_DIR / "pipeline.pkl" # ✅ /tmp/model/pipeline.pkl
|
| 50 |
-
METADATA_FILE = BASE_DIR / "metadata.json" # ✅ /tmp/metadata.json
|
| 51 |
-
|
| 52 |
-
# Log files
|
| 53 |
-
SERVER_LOG = LOGS_DIR / "fastapi_server.log"
|
| 54 |
-
PREDICTION_LOG = LOGS_DIR / "prediction_log.json"
|
| 55 |
-
|
| 56 |
-
@classmethod
|
| 57 |
-
def ensure_directories(cls):
|
| 58 |
-
"""Create all required directories with proper permissions"""
|
| 59 |
-
for attr_name in dir(cls):
|
| 60 |
-
attr = getattr(cls, attr_name)
|
| 61 |
-
if isinstance(attr, Path) and attr_name.endswith('_DIR'):
|
| 62 |
-
attr.mkdir(parents=True, exist_ok=True, mode=0o755)
|
| 63 |
-
|
| 64 |
-
# Ensure log directory exists
|
| 65 |
-
cls.LOGS_DIR.mkdir(parents=True, exist_ok=True, mode=0o755)
|
| 66 |
-
|
| 67 |
-
|
| 68 |
-
# Initialize directories at startup
|
| 69 |
-
PathConfig.ensure_directories()
|
| 70 |
-
|
| 71 |
-
|
| 72 |
-
# =============================================================================
|
| 73 |
-
# ENHANCED LOGGING CONFIGURATION
|
| 74 |
-
# =============================================================================
|
| 75 |
logging.basicConfig(
|
| 76 |
level=logging.INFO,
|
| 77 |
-
format='%(asctime)s - %(levelname)s - %(
|
| 78 |
handlers=[
|
| 79 |
-
logging.FileHandler(
|
| 80 |
logging.StreamHandler()
|
| 81 |
]
|
| 82 |
)
|
|
@@ -89,57 +41,8 @@ security = HTTPBearer(auto_error=False)
|
|
| 89 |
rate_limit_storage = defaultdict(list)
|
| 90 |
|
| 91 |
|
| 92 |
-
# =============================================================================
|
| 93 |
-
# CIRCUIT BREAKER FOR RESILIENCE
|
| 94 |
-
# =============================================================================
|
| 95 |
-
class CircuitBreaker:
|
| 96 |
-
"""Circuit breaker pattern for model operations"""
|
| 97 |
-
|
| 98 |
-
def __init__(self, failure_threshold: int = 5, recovery_timeout: int = 60):
|
| 99 |
-
self.failure_threshold = failure_threshold
|
| 100 |
-
self.recovery_timeout = recovery_timeout
|
| 101 |
-
self.failure_count = 0
|
| 102 |
-
self.last_failure_time = None
|
| 103 |
-
self.state = 'CLOSED' # CLOSED, OPEN, HALF_OPEN
|
| 104 |
-
|
| 105 |
-
def call(self, func, *args, **kwargs):
|
| 106 |
-
"""Execute function with circuit breaker protection"""
|
| 107 |
-
if self.state == 'OPEN':
|
| 108 |
-
if time.time() - self.last_failure_time > self.recovery_timeout:
|
| 109 |
-
self.state = 'HALF_OPEN'
|
| 110 |
-
logger.info("Circuit breaker transitioning to HALF_OPEN")
|
| 111 |
-
else:
|
| 112 |
-
raise Exception("Circuit breaker is OPEN - service unavailable")
|
| 113 |
-
|
| 114 |
-
try:
|
| 115 |
-
result = func(*args, **kwargs)
|
| 116 |
-
self.on_success()
|
| 117 |
-
return result
|
| 118 |
-
except Exception as e:
|
| 119 |
-
self.on_failure()
|
| 120 |
-
raise
|
| 121 |
-
|
| 122 |
-
def on_success(self):
|
| 123 |
-
"""Reset circuit breaker on successful operation"""
|
| 124 |
-
self.failure_count = 0
|
| 125 |
-
if self.state == 'HALF_OPEN':
|
| 126 |
-
self.state = 'CLOSED'
|
| 127 |
-
logger.info("Circuit breaker reset to CLOSED")
|
| 128 |
-
|
| 129 |
-
def on_failure(self):
|
| 130 |
-
"""Handle failure and potentially open circuit"""
|
| 131 |
-
self.failure_count += 1
|
| 132 |
-
self.last_failure_time = time.time()
|
| 133 |
-
if self.failure_count >= self.failure_threshold:
|
| 134 |
-
self.state = 'OPEN'
|
| 135 |
-
logger.error(f"Circuit breaker OPENED after {self.failure_count} failures")
|
| 136 |
-
|
| 137 |
-
|
| 138 |
-
# =============================================================================
|
| 139 |
-
# ENHANCED MODEL MANAGER WITH FIXED PATHS
|
| 140 |
-
# =============================================================================
|
| 141 |
class ModelManager:
|
| 142 |
-
"""
|
| 143 |
|
| 144 |
def __init__(self):
|
| 145 |
self.model = None
|
|
@@ -148,139 +51,68 @@ class ModelManager:
|
|
| 148 |
self.model_metadata = {}
|
| 149 |
self.last_health_check = None
|
| 150 |
self.health_status = "unknown"
|
| 151 |
-
self.circuit_breaker = CircuitBreaker()
|
| 152 |
self.load_model()
|
| 153 |
|
| 154 |
def load_model(self):
|
| 155 |
-
"""Load model with
|
| 156 |
try:
|
| 157 |
-
logger.info("Loading ML model
|
| 158 |
-
|
| 159 |
-
#
|
| 160 |
-
pipeline_path =
|
| 161 |
-
|
| 162 |
-
vectorizer_path = PathConfig.VECTORIZER_FILE # ✅ /tmp/model/vectorizer.pkl
|
| 163 |
-
metadata_path = PathConfig.METADATA_FILE # ✅ /tmp/metadata.json
|
| 164 |
-
|
| 165 |
-
logger.info(f"Looking for pipeline at: {pipeline_path}")
|
| 166 |
-
logger.info(f"Looking for model at: {model_path}")
|
| 167 |
-
logger.info(f"Looking for vectorizer at: {vectorizer_path}")
|
| 168 |
-
|
| 169 |
-
# Try to load pipeline first (preferred method)
|
| 170 |
if pipeline_path.exists():
|
| 171 |
-
logger.info("✅ Found pipeline file, loading...")
|
| 172 |
self.pipeline = joblib.load(pipeline_path)
|
| 173 |
-
|
| 174 |
-
|
| 175 |
-
|
| 176 |
-
self.model = self.pipeline.named_steps.get('model')
|
| 177 |
-
self.vectorizer = self.pipeline.named_steps.get('vectorize')
|
| 178 |
-
logger.info("✅ Extracted model and vectorizer from pipeline")
|
| 179 |
-
else:
|
| 180 |
-
# Pipeline might be the complete model
|
| 181 |
-
logger.info("✅ Pipeline loaded as complete model")
|
| 182 |
-
|
| 183 |
-
logger.info("✅ Loaded model pipeline successfully")
|
| 184 |
-
|
| 185 |
-
elif model_path.exists() and vectorizer_path.exists():
|
| 186 |
-
# Fallback to individual components
|
| 187 |
-
logger.info("📦 Loading individual model components...")
|
| 188 |
-
self.model = joblib.load(model_path)
|
| 189 |
-
self.vectorizer = joblib.load(vectorizer_path)
|
| 190 |
-
logger.info("✅ Loaded individual model components successfully")
|
| 191 |
-
|
| 192 |
else:
|
| 193 |
-
#
|
| 194 |
-
|
| 195 |
-
|
| 196 |
-
|
| 197 |
-
|
| 198 |
-
|
| 199 |
-
|
| 200 |
-
|
| 201 |
-
|
| 202 |
-
|
| 203 |
-
|
|
|
|
|
|
|
| 204 |
if metadata_path.exists():
|
| 205 |
-
|
| 206 |
-
|
| 207 |
-
|
| 208 |
-
|
| 209 |
-
except Exception as e:
|
| 210 |
-
logger.warning(f"Could not load metadata: {e}")
|
| 211 |
-
self.model_metadata = {"model_version": "unknown"}
|
| 212 |
-
else:
|
| 213 |
-
logger.warning(f"Metadata file not found at {metadata_path}")
|
| 214 |
-
self.model_metadata = {"model_version": "unknown"}
|
| 215 |
|
| 216 |
-
# Verify model is functional
|
| 217 |
-
self._verify_model_functionality()
|
| 218 |
-
|
| 219 |
self.health_status = "healthy"
|
| 220 |
self.last_health_check = datetime.now()
|
| 221 |
-
logger.info("🎉 Model loaded successfully and is functional!")
|
| 222 |
|
| 223 |
except Exception as e:
|
| 224 |
-
logger.error(f"
|
| 225 |
-
logger.error(f"❌ Exception details: {traceback.format_exc()}")
|
| 226 |
self.health_status = "unhealthy"
|
| 227 |
self.model = None
|
| 228 |
self.vectorizer = None
|
| 229 |
self.pipeline = None
|
| 230 |
-
|
| 231 |
-
def _verify_model_functionality(self):
|
| 232 |
-
"""Verify that the loaded model can make predictions"""
|
| 233 |
-
test_text = "This is a test article for verification purposes."
|
| 234 |
-
|
| 235 |
-
try:
|
| 236 |
-
if self.pipeline:
|
| 237 |
-
# Test pipeline prediction
|
| 238 |
-
prediction = self.pipeline.predict([test_text])
|
| 239 |
-
probabilities = self.pipeline.predict_proba([test_text])
|
| 240 |
-
logger.info("✅ Pipeline prediction test successful")
|
| 241 |
-
elif self.model and self.vectorizer:
|
| 242 |
-
# Test individual components
|
| 243 |
-
X = self.vectorizer.transform([test_text])
|
| 244 |
-
prediction = self.model.predict(X)
|
| 245 |
-
probabilities = self.model.predict_proba(X)
|
| 246 |
-
logger.info("✅ Individual components prediction test successful")
|
| 247 |
-
else:
|
| 248 |
-
raise ValueError("No functional model components available")
|
| 249 |
-
|
| 250 |
-
except Exception as e:
|
| 251 |
-
logger.error(f"❌ Model functionality verification failed: {e}")
|
| 252 |
-
raise
|
| 253 |
|
| 254 |
def predict(self, text: str) -> tuple[str, float]:
|
| 255 |
-
"""Make prediction with
|
| 256 |
-
return self.circuit_breaker.call(self._predict_internal, text)
|
| 257 |
-
|
| 258 |
-
def _predict_internal(self, text: str) -> tuple[str, float]:
|
| 259 |
-
"""Internal prediction method"""
|
| 260 |
try:
|
| 261 |
if self.pipeline:
|
| 262 |
# Use pipeline for prediction
|
| 263 |
prediction = self.pipeline.predict([text])[0]
|
| 264 |
probabilities = self.pipeline.predict_proba([text])[0]
|
| 265 |
-
logger.debug("Used pipeline for prediction")
|
| 266 |
-
|
| 267 |
elif self.model and self.vectorizer:
|
| 268 |
# Use individual components
|
| 269 |
X = self.vectorizer.transform([text])
|
| 270 |
prediction = self.model.predict(X)[0]
|
| 271 |
probabilities = self.model.predict_proba(X)[0]
|
| 272 |
-
logger.debug("Used individual components for prediction")
|
| 273 |
-
|
| 274 |
else:
|
| 275 |
raise ValueError("No model available for prediction")
|
| 276 |
|
| 277 |
-
# Get confidence score
|
| 278 |
-
|
| 279 |
-
# Multi-class probabilities
|
| 280 |
-
confidence = float(np.max(probabilities))
|
| 281 |
-
else:
|
| 282 |
-
# Binary classification
|
| 283 |
-
confidence = float(probabilities[1] if len(probabilities) > 1 else probabilities[0])
|
| 284 |
|
| 285 |
# Convert prediction to readable format
|
| 286 |
label = "Fake" if prediction == 1 else "Real"
|
|
@@ -288,14 +120,14 @@ class ModelManager:
|
|
| 288 |
return label, confidence
|
| 289 |
|
| 290 |
except Exception as e:
|
| 291 |
-
logger.error(f"
|
| 292 |
raise HTTPException(
|
| 293 |
status_code=500,
|
| 294 |
detail=f"Prediction failed: {str(e)}"
|
| 295 |
)
|
| 296 |
|
| 297 |
def health_check(self) -> Dict[str, Any]:
|
| 298 |
-
"""
|
| 299 |
try:
|
| 300 |
# Test prediction with sample text
|
| 301 |
test_text = "This is a test article for health check purposes."
|
|
@@ -310,15 +142,7 @@ class ModelManager:
|
|
| 310 |
"model_available": self.model is not None,
|
| 311 |
"vectorizer_available": self.vectorizer is not None,
|
| 312 |
"pipeline_available": self.pipeline is not None,
|
| 313 |
-
"
|
| 314 |
-
"circuit_breaker_failures": self.circuit_breaker.failure_count,
|
| 315 |
-
"test_prediction": {"label": label, "confidence": confidence},
|
| 316 |
-
"model_paths": {
|
| 317 |
-
"pipeline_exists": PathConfig.PIPELINE_FILE.exists(),
|
| 318 |
-
"model_exists": PathConfig.MODEL_FILE.exists(),
|
| 319 |
-
"vectorizer_exists": PathConfig.VECTORIZER_FILE.exists(),
|
| 320 |
-
"metadata_exists": PathConfig.METADATA_FILE.exists()
|
| 321 |
-
}
|
| 322 |
}
|
| 323 |
|
| 324 |
except Exception as e:
|
|
@@ -331,15 +155,7 @@ class ModelManager:
|
|
| 331 |
"error": str(e),
|
| 332 |
"model_available": self.model is not None,
|
| 333 |
"vectorizer_available": self.vectorizer is not None,
|
| 334 |
-
"pipeline_available": self.pipeline is not None
|
| 335 |
-
"circuit_breaker_state": self.circuit_breaker.state,
|
| 336 |
-
"circuit_breaker_failures": self.circuit_breaker.failure_count,
|
| 337 |
-
"model_paths": {
|
| 338 |
-
"pipeline_exists": PathConfig.PIPELINE_FILE.exists(),
|
| 339 |
-
"model_exists": PathConfig.MODEL_FILE.exists(),
|
| 340 |
-
"vectorizer_exists": PathConfig.VECTORIZER_FILE.exists(),
|
| 341 |
-
"metadata_exists": PathConfig.METADATA_FILE.exists()
|
| 342 |
-
}
|
| 343 |
}
|
| 344 |
|
| 345 |
|
|
@@ -347,17 +163,11 @@ class ModelManager:
|
|
| 347 |
model_manager = ModelManager()
|
| 348 |
|
| 349 |
|
| 350 |
-
# =============================================================================
|
| 351 |
-
# FASTAPI APPLICATION SETUP
|
| 352 |
-
# =============================================================================
|
| 353 |
@asynccontextmanager
|
| 354 |
async def lifespan(app: FastAPI):
|
| 355 |
-
"""Manage application lifespan
|
| 356 |
-
logger.info("
|
| 357 |
|
| 358 |
-
# Ensure directories exist
|
| 359 |
-
PathConfig.ensure_directories()
|
| 360 |
-
|
| 361 |
# Startup tasks
|
| 362 |
model_manager.load_model()
|
| 363 |
|
|
@@ -367,14 +177,13 @@ async def lifespan(app: FastAPI):
|
|
| 367 |
yield
|
| 368 |
|
| 369 |
# Shutdown tasks
|
| 370 |
-
logger.info("
|
| 371 |
-
|
| 372 |
|
| 373 |
# Create FastAPI app
|
| 374 |
app = FastAPI(
|
| 375 |
title="Fake News Detection API",
|
| 376 |
-
description="Production-ready API for fake news detection with
|
| 377 |
-
version="2.
|
| 378 |
docs_url="/docs",
|
| 379 |
redoc_url="/redoc",
|
| 380 |
lifespan=lifespan
|
|
@@ -394,10 +203,9 @@ app.add_middleware(
|
|
| 394 |
allowed_hosts=["*"] # Configure appropriately for production
|
| 395 |
)
|
| 396 |
|
|
|
|
|
|
|
| 397 |
|
| 398 |
-
# =============================================================================
|
| 399 |
-
# REQUEST/RESPONSE MODELS (UNCHANGED)
|
| 400 |
-
# =============================================================================
|
| 401 |
class PredictionRequest(BaseModel):
|
| 402 |
text: str = Field(..., min_length=1, max_length=10000,
|
| 403 |
description="Text to analyze for fake news detection")
|
|
@@ -464,10 +272,9 @@ class HealthResponse(BaseModel):
|
|
| 464 |
system_health: Dict[str, Any]
|
| 465 |
api_health: Dict[str, Any]
|
| 466 |
|
|
|
|
|
|
|
| 467 |
|
| 468 |
-
# =============================================================================
|
| 469 |
-
# MIDDLEWARE AND RATE LIMITING (UNCHANGED)
|
| 470 |
-
# =============================================================================
|
| 471 |
async def rate_limit_check(request: Request):
|
| 472 |
"""Check rate limits"""
|
| 473 |
client_ip = request.client.host
|
|
@@ -489,6 +296,8 @@ async def rate_limit_check(request: Request):
|
|
| 489 |
# Add current request
|
| 490 |
rate_limit_storage[client_ip].append(current_time)
|
| 491 |
|
|
|
|
|
|
|
| 492 |
|
| 493 |
@app.middleware("http")
|
| 494 |
async def log_requests(request: Request, call_next):
|
|
@@ -512,10 +321,9 @@ async def log_requests(request: Request, call_next):
|
|
| 512 |
|
| 513 |
return response
|
| 514 |
|
|
|
|
|
|
|
| 515 |
|
| 516 |
-
# =============================================================================
|
| 517 |
-
# ERROR HANDLERS (UNCHANGED)
|
| 518 |
-
# =============================================================================
|
| 519 |
@app.exception_handler(HTTPException)
|
| 520 |
async def http_exception_handler(request: Request, exc: HTTPException):
|
| 521 |
"""Handle HTTP exceptions"""
|
|
@@ -552,42 +360,35 @@ async def general_exception_handler(request: Request, exc: Exception):
|
|
| 552 |
content=error_data
|
| 553 |
)
|
| 554 |
|
|
|
|
|
|
|
| 555 |
|
| 556 |
-
# =============================================================================
|
| 557 |
-
# BACKGROUND TASKS
|
| 558 |
-
# =============================================================================
|
| 559 |
async def periodic_health_check():
|
| 560 |
-
"""
|
| 561 |
while True:
|
| 562 |
try:
|
| 563 |
await asyncio.sleep(300) # Check every 5 minutes
|
| 564 |
health_status = model_manager.health_check()
|
| 565 |
|
| 566 |
if health_status["status"] == "unhealthy":
|
| 567 |
-
logger.warning(
|
|
|
|
| 568 |
model_manager.load_model()
|
| 569 |
|
| 570 |
except Exception as e:
|
| 571 |
-
logger.error(f"
|
|
|
|
|
|
|
| 572 |
|
| 573 |
|
| 574 |
-
# =============================================================================
|
| 575 |
-
# API ROUTES (ENHANCED WITH BETTER ERROR HANDLING)
|
| 576 |
-
# =============================================================================
|
| 577 |
@app.get("/", response_model=Dict[str, str])
|
| 578 |
async def root():
|
| 579 |
-
"""Root endpoint
|
| 580 |
return {
|
| 581 |
-
"message": "Fake News Detection API
|
| 582 |
-
"version": "2.
|
| 583 |
-
"status": "Path management issues resolved",
|
| 584 |
"documentation": "/docs",
|
| 585 |
-
"health_check": "/health"
|
| 586 |
-
"model_path_info": {
|
| 587 |
-
"pipeline_path": str(PathConfig.PIPELINE_FILE),
|
| 588 |
-
"model_path": str(PathConfig.MODEL_FILE),
|
| 589 |
-
"vectorizer_path": str(PathConfig.VECTORIZER_FILE)
|
| 590 |
-
}
|
| 591 |
}
|
| 592 |
|
| 593 |
|
|
@@ -645,7 +446,7 @@ async def predict(
|
|
| 645 |
except HTTPException:
|
| 646 |
raise
|
| 647 |
except Exception as e:
|
| 648 |
-
logger.error(f"
|
| 649 |
raise HTTPException(
|
| 650 |
status_code=500,
|
| 651 |
detail=f"Prediction failed: {str(e)}"
|
|
@@ -693,7 +494,7 @@ async def predict_batch(
|
|
| 693 |
predictions.append(prediction)
|
| 694 |
|
| 695 |
except Exception as e:
|
| 696 |
-
logger.error(f"
|
| 697 |
# Continue with other texts
|
| 698 |
continue
|
| 699 |
|
|
@@ -725,7 +526,7 @@ async def predict_batch(
|
|
| 725 |
except HTTPException:
|
| 726 |
raise
|
| 727 |
except Exception as e:
|
| 728 |
-
logger.error(f"
|
| 729 |
raise HTTPException(
|
| 730 |
status_code=500,
|
| 731 |
detail=f"Batch prediction failed: {str(e)}"
|
|
@@ -735,7 +536,7 @@ async def predict_batch(
|
|
| 735 |
@app.get("/health", response_model=HealthResponse)
|
| 736 |
async def health_check():
|
| 737 |
"""
|
| 738 |
-
|
| 739 |
|
| 740 |
- **returns**: Detailed health status of the API and model
|
| 741 |
"""
|
|
@@ -770,7 +571,7 @@ async def health_check():
|
|
| 770 |
)
|
| 771 |
|
| 772 |
except Exception as e:
|
| 773 |
-
logger.error(f"
|
| 774 |
return HealthResponse(
|
| 775 |
status="unhealthy",
|
| 776 |
timestamp=datetime.now().isoformat(),
|
|
@@ -783,7 +584,7 @@ async def health_check():
|
|
| 783 |
@app.get("/metrics")
|
| 784 |
async def get_metrics():
|
| 785 |
"""
|
| 786 |
-
Get API metrics
|
| 787 |
|
| 788 |
- **returns**: Usage statistics and performance metrics
|
| 789 |
"""
|
|
@@ -799,23 +600,13 @@ async def get_metrics():
|
|
| 799 |
"model_version": model_manager.model_metadata.get('model_version', 'unknown'),
|
| 800 |
"model_health": model_manager.health_status,
|
| 801 |
"last_health_check": model_manager.last_health_check.isoformat() if model_manager.last_health_check else None,
|
| 802 |
-
"circuit_breaker": {
|
| 803 |
-
"state": model_manager.circuit_breaker.state,
|
| 804 |
-
"failure_count": model_manager.circuit_breaker.failure_count
|
| 805 |
-
},
|
| 806 |
-
"path_status": {
|
| 807 |
-
"pipeline_exists": PathConfig.PIPELINE_FILE.exists(),
|
| 808 |
-
"model_exists": PathConfig.MODEL_FILE.exists(),
|
| 809 |
-
"vectorizer_exists": PathConfig.VECTORIZER_FILE.exists(),
|
| 810 |
-
"metadata_exists": PathConfig.METADATA_FILE.exists()
|
| 811 |
-
},
|
| 812 |
"timestamp": datetime.now().isoformat()
|
| 813 |
}
|
| 814 |
|
| 815 |
return metrics
|
| 816 |
|
| 817 |
except Exception as e:
|
| 818 |
-
logger.error(f"
|
| 819 |
raise HTTPException(
|
| 820 |
status_code=500,
|
| 821 |
detail=f"Metrics retrieval failed: {str(e)}"
|
|
@@ -825,12 +616,12 @@ async def get_metrics():
|
|
| 825 |
@app.post("/model/reload")
|
| 826 |
async def reload_model():
|
| 827 |
"""
|
| 828 |
-
Reload the ML model
|
| 829 |
|
| 830 |
- **returns**: Status of model reload operation
|
| 831 |
"""
|
| 832 |
try:
|
| 833 |
-
logger.info("
|
| 834 |
model_manager.load_model()
|
| 835 |
|
| 836 |
return {
|
|
@@ -838,29 +629,21 @@ async def reload_model():
|
|
| 838 |
"message": "Model reloaded successfully",
|
| 839 |
"model_health": model_manager.health_status,
|
| 840 |
"model_version": model_manager.model_metadata.get('model_version', 'unknown'),
|
| 841 |
-
"circuit_breaker_reset": model_manager.circuit_breaker.state,
|
| 842 |
-
"path_verification": {
|
| 843 |
-
"pipeline_exists": PathConfig.PIPELINE_FILE.exists(),
|
| 844 |
-
"model_exists": PathConfig.MODEL_FILE.exists(),
|
| 845 |
-
"vectorizer_exists": PathConfig.VECTORIZER_FILE.exists(),
|
| 846 |
-
"metadata_exists": PathConfig.METADATA_FILE.exists()
|
| 847 |
-
},
|
| 848 |
"timestamp": datetime.now().isoformat()
|
| 849 |
}
|
| 850 |
|
| 851 |
except Exception as e:
|
| 852 |
-
logger.error(f"
|
| 853 |
raise HTTPException(
|
| 854 |
status_code=500,
|
| 855 |
detail=f"Model reload failed: {str(e)}"
|
| 856 |
)
|
| 857 |
|
|
|
|
|
|
|
| 858 |
|
| 859 |
-
# =============================================================================
|
| 860 |
-
# BACKGROUND TASK FUNCTIONS (ENHANCED)
|
| 861 |
-
# =============================================================================
|
| 862 |
async def log_prediction(text: str, prediction: str, confidence: float, client_ip: str, processing_time: float):
|
| 863 |
-
"""Log prediction details
|
| 864 |
try:
|
| 865 |
log_entry = {
|
| 866 |
"timestamp": datetime.now().isoformat(),
|
|
@@ -872,8 +655,8 @@ async def log_prediction(text: str, prediction: str, confidence: float, client_i
|
|
| 872 |
"text_hash": hashlib.md5(text.encode()).hexdigest()
|
| 873 |
}
|
| 874 |
|
| 875 |
-
# Save to log file
|
| 876 |
-
log_file =
|
| 877 |
|
| 878 |
# Load existing logs
|
| 879 |
logs = []
|
|
@@ -897,7 +680,7 @@ async def log_prediction(text: str, prediction: str, confidence: float, client_i
|
|
| 897 |
await f.write(json.dumps(logs, indent=2))
|
| 898 |
|
| 899 |
except Exception as e:
|
| 900 |
-
logger.error(f"
|
| 901 |
|
| 902 |
|
| 903 |
async def log_batch_prediction(total_texts: int, successful_predictions: int, client_ip: str, processing_time: float):
|
|
@@ -913,114 +696,22 @@ async def log_batch_prediction(total_texts: int, successful_predictions: int, cl
|
|
| 913 |
"success_rate": successful_predictions / total_texts if total_texts > 0 else 0
|
| 914 |
}
|
| 915 |
|
| 916 |
-
logger.info(f"
|
| 917 |
|
| 918 |
except Exception as e:
|
| 919 |
-
logger.error(f"
|
| 920 |
|
| 921 |
-
|
| 922 |
-
# =============================================================================
|
| 923 |
-
# ENHANCED PATH DIAGNOSTICS ENDPOINT
|
| 924 |
-
# =============================================================================
|
| 925 |
-
@app.get("/debug/paths")
|
| 926 |
-
async def debug_paths():
|
| 927 |
-
"""
|
| 928 |
-
Debug endpoint to check all file paths and their existence
|
| 929 |
-
|
| 930 |
-
- **returns**: Detailed path information for troubleshooting
|
| 931 |
-
"""
|
| 932 |
-
try:
|
| 933 |
-
path_info = {
|
| 934 |
-
"base_directories": {
|
| 935 |
-
"BASE_DIR": {
|
| 936 |
-
"path": str(PathConfig.BASE_DIR),
|
| 937 |
-
"exists": PathConfig.BASE_DIR.exists(),
|
| 938 |
-
"is_dir": PathConfig.BASE_DIR.is_dir() if PathConfig.BASE_DIR.exists() else False
|
| 939 |
-
},
|
| 940 |
-
"MODEL_DIR": {
|
| 941 |
-
"path": str(PathConfig.MODEL_DIR),
|
| 942 |
-
"exists": PathConfig.MODEL_DIR.exists(),
|
| 943 |
-
"is_dir": PathConfig.MODEL_DIR.is_dir() if PathConfig.MODEL_DIR.exists() else False
|
| 944 |
-
},
|
| 945 |
-
"DATA_DIR": {
|
| 946 |
-
"path": str(PathConfig.DATA_DIR),
|
| 947 |
-
"exists": PathConfig.DATA_DIR.exists(),
|
| 948 |
-
"is_dir": PathConfig.DATA_DIR.is_dir() if PathConfig.DATA_DIR.exists() else False
|
| 949 |
-
},
|
| 950 |
-
"LOGS_DIR": {
|
| 951 |
-
"path": str(PathConfig.LOGS_DIR),
|
| 952 |
-
"exists": PathConfig.LOGS_DIR.exists(),
|
| 953 |
-
"is_dir": PathConfig.LOGS_DIR.is_dir() if PathConfig.LOGS_DIR.exists() else False
|
| 954 |
-
}
|
| 955 |
-
},
|
| 956 |
-
"model_files": {
|
| 957 |
-
"PIPELINE_FILE": {
|
| 958 |
-
"path": str(PathConfig.PIPELINE_FILE),
|
| 959 |
-
"exists": PathConfig.PIPELINE_FILE.exists(),
|
| 960 |
-
"size": PathConfig.PIPELINE_FILE.stat().st_size if PathConfig.PIPELINE_FILE.exists() else None
|
| 961 |
-
},
|
| 962 |
-
"MODEL_FILE": {
|
| 963 |
-
"path": str(PathConfig.MODEL_FILE),
|
| 964 |
-
"exists": PathConfig.MODEL_FILE.exists(),
|
| 965 |
-
"size": PathConfig.MODEL_FILE.stat().st_size if PathConfig.MODEL_FILE.exists() else None
|
| 966 |
-
},
|
| 967 |
-
"VECTORIZER_FILE": {
|
| 968 |
-
"path": str(PathConfig.VECTORIZER_FILE),
|
| 969 |
-
"exists": PathConfig.VECTORIZER_FILE.exists(),
|
| 970 |
-
"size": PathConfig.VECTORIZER_FILE.stat().st_size if PathConfig.VECTORIZER_FILE.exists() else None
|
| 971 |
-
},
|
| 972 |
-
"METADATA_FILE": {
|
| 973 |
-
"path": str(PathConfig.METADATA_FILE),
|
| 974 |
-
"exists": PathConfig.METADATA_FILE.exists(),
|
| 975 |
-
"size": PathConfig.METADATA_FILE.stat().st_size if PathConfig.METADATA_FILE.exists() else None
|
| 976 |
-
}
|
| 977 |
-
},
|
| 978 |
-
"model_manager_status": {
|
| 979 |
-
"model_loaded": model_manager.model is not None,
|
| 980 |
-
"vectorizer_loaded": model_manager.vectorizer is not None,
|
| 981 |
-
"pipeline_loaded": model_manager.pipeline is not None,
|
| 982 |
-
"health_status": model_manager.health_status,
|
| 983 |
-
"circuit_breaker_state": model_manager.circuit_breaker.state
|
| 984 |
-
},
|
| 985 |
-
"directory_contents": {
|
| 986 |
-
"base_dir_files": [str(f) for f in PathConfig.BASE_DIR.iterdir()] if PathConfig.BASE_DIR.exists() else [],
|
| 987 |
-
"model_dir_files": [str(f) for f in PathConfig.MODEL_DIR.iterdir()] if PathConfig.MODEL_DIR.exists() else []
|
| 988 |
-
}
|
| 989 |
-
}
|
| 990 |
-
|
| 991 |
-
return path_info
|
| 992 |
-
|
| 993 |
-
except Exception as e:
|
| 994 |
-
logger.error(f"❌ Path debug failed: {e}")
|
| 995 |
-
return {
|
| 996 |
-
"error": str(e),
|
| 997 |
-
"timestamp": datetime.now().isoformat()
|
| 998 |
-
}
|
| 999 |
|
| 1000 |
|
| 1001 |
-
# =============================================================================
|
| 1002 |
-
# CUSTOM OPENAPI CONFIGURATION
|
| 1003 |
-
# =============================================================================
|
| 1004 |
def custom_openapi():
|
| 1005 |
if app.openapi_schema:
|
| 1006 |
return app.openapi_schema
|
| 1007 |
|
| 1008 |
openapi_schema = get_openapi(
|
| 1009 |
-
title="Fake News Detection API
|
| 1010 |
-
version="2.
|
| 1011 |
-
description=""
|
| 1012 |
-
Production-ready API for fake news detection with FIXED path management.
|
| 1013 |
-
|
| 1014 |
-
**Key Fixes:**
|
| 1015 |
-
- ✅ Centralized path configuration
|
| 1016 |
-
- ✅ Circuit breaker pattern for resilience
|
| 1017 |
-
- ✅ Enhanced error handling and logging
|
| 1018 |
-
- ✅ Path diagnostics endpoints for debugging
|
| 1019 |
-
|
| 1020 |
-
**Critical Bug Fix:**
|
| 1021 |
-
The previous version had inconsistent paths between FastAPI server and training components.
|
| 1022 |
-
This version uses consistent paths that match the training pipeline.
|
| 1023 |
-
""",
|
| 1024 |
routes=app.routes,
|
| 1025 |
)
|
| 1026 |
|
|
@@ -1039,33 +730,12 @@ def custom_openapi():
|
|
| 1039 |
|
| 1040 |
app.openapi = custom_openapi
|
| 1041 |
|
| 1042 |
-
|
| 1043 |
-
# =============================================================================
|
| 1044 |
-
# APPLICATION STARTUP
|
| 1045 |
-
# =============================================================================
|
| 1046 |
if __name__ == "__main__":
|
| 1047 |
-
# Final path verification before starting
|
| 1048 |
-
logger.info("🔍 Performing final path verification...")
|
| 1049 |
-
|
| 1050 |
-
PathConfig.ensure_directories()
|
| 1051 |
-
|
| 1052 |
-
logger.info(f"📁 Model directory: {PathConfig.MODEL_DIR}")
|
| 1053 |
-
logger.info(f"📁 Pipeline file path: {PathConfig.PIPELINE_FILE}")
|
| 1054 |
-
logger.info(f"📁 Model file path: {PathConfig.MODEL_FILE}")
|
| 1055 |
-
logger.info(f"📁 Vectorizer file path: {PathConfig.VECTORIZER_FILE}")
|
| 1056 |
-
|
| 1057 |
-
if PathConfig.MODEL_DIR.exists():
|
| 1058 |
-
logger.info(f"✅ Model directory exists with {len(list(PathConfig.MODEL_DIR.iterdir()))} files")
|
| 1059 |
-
for file in PathConfig.MODEL_DIR.iterdir():
|
| 1060 |
-
logger.info(f" 📄 {file.name} ({file.stat().st_size} bytes)")
|
| 1061 |
-
else:
|
| 1062 |
-
logger.warning(f"⚠️ Model directory does not exist: {PathConfig.MODEL_DIR}")
|
| 1063 |
-
|
| 1064 |
uvicorn.run(
|
| 1065 |
"fastapi_server:app",
|
| 1066 |
-
host="
|
| 1067 |
port=8000,
|
| 1068 |
log_level="info",
|
| 1069 |
reload=False,
|
| 1070 |
access_log=True
|
| 1071 |
-
)
|
|
|
|
| 1 |
+
import json
|
|
|
|
| 2 |
import time
|
| 3 |
import joblib
|
| 4 |
import logging
|
|
|
|
| 23 |
from fastapi import FastAPI, HTTPException, Depends, Request, BackgroundTasks, status
|
| 24 |
|
| 25 |
|
| 26 |
+
# Configure logging
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 27 |
logging.basicConfig(
|
| 28 |
level=logging.INFO,
|
| 29 |
+
format='%(asctime)s - %(levelname)s - %(message)s',
|
| 30 |
handlers=[
|
| 31 |
+
logging.FileHandler('/tmp/fastapi_server.log'),
|
| 32 |
logging.StreamHandler()
|
| 33 |
]
|
| 34 |
)
|
|
|
|
| 41 |
rate_limit_storage = defaultdict(list)
|
| 42 |
|
| 43 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 44 |
class ModelManager:
|
| 45 |
+
"""Manages model loading and health checks"""
|
| 46 |
|
| 47 |
def __init__(self):
|
| 48 |
self.model = None
|
|
|
|
| 51 |
self.model_metadata = {}
|
| 52 |
self.last_health_check = None
|
| 53 |
self.health_status = "unknown"
|
|
|
|
| 54 |
self.load_model()
|
| 55 |
|
| 56 |
def load_model(self):
|
| 57 |
+
"""Load model with comprehensive error handling"""
|
| 58 |
try:
|
| 59 |
+
logger.info("Loading ML model...")
|
| 60 |
+
|
| 61 |
+
# Try to load pipeline first (preferred)
|
| 62 |
+
# pipeline_path = Path("/tmp/model/pipeline.pkl")
|
| 63 |
+
pipeline_path = Path("/tmp/pipeline.pkl")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 64 |
if pipeline_path.exists():
|
|
|
|
| 65 |
self.pipeline = joblib.load(pipeline_path)
|
| 66 |
+
self.model = self.pipeline.named_steps.get('model')
|
| 67 |
+
self.vectorizer = self.pipeline.named_steps.get('vectorize')
|
| 68 |
+
logger.info("Loaded model pipeline successfully")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 69 |
else:
|
| 70 |
+
# Fallback to individual components
|
| 71 |
+
model_path = Path("/tmp/model.pkl")
|
| 72 |
+
vectorizer_path = Path("/tmp/vectorizer.pkl")
|
| 73 |
+
|
| 74 |
+
if model_path.exists() and vectorizer_path.exists():
|
| 75 |
+
self.model = joblib.load(model_path)
|
| 76 |
+
self.vectorizer = joblib.load(vectorizer_path)
|
| 77 |
+
logger.info("Loaded model components successfully")
|
| 78 |
+
else:
|
| 79 |
+
raise FileNotFoundError("No model files found")
|
| 80 |
+
|
| 81 |
+
# Load metadata
|
| 82 |
+
metadata_path = Path("/tmp/metadata.json")
|
| 83 |
if metadata_path.exists():
|
| 84 |
+
with open(metadata_path, 'r') as f:
|
| 85 |
+
self.model_metadata = json.load(f)
|
| 86 |
+
logger.info(
|
| 87 |
+
f"Loaded model metadata: {self.model_metadata.get('model_version', 'Unknown')}")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 88 |
|
|
|
|
|
|
|
|
|
|
| 89 |
self.health_status = "healthy"
|
| 90 |
self.last_health_check = datetime.now()
|
|
|
|
| 91 |
|
| 92 |
except Exception as e:
|
| 93 |
+
logger.error(f"Failed to load model: {e}")
|
|
|
|
| 94 |
self.health_status = "unhealthy"
|
| 95 |
self.model = None
|
| 96 |
self.vectorizer = None
|
| 97 |
self.pipeline = None
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 98 |
|
| 99 |
def predict(self, text: str) -> tuple[str, float]:
|
| 100 |
+
"""Make prediction with error handling"""
|
|
|
|
|
|
|
|
|
|
|
|
|
| 101 |
try:
|
| 102 |
if self.pipeline:
|
| 103 |
# Use pipeline for prediction
|
| 104 |
prediction = self.pipeline.predict([text])[0]
|
| 105 |
probabilities = self.pipeline.predict_proba([text])[0]
|
|
|
|
|
|
|
| 106 |
elif self.model and self.vectorizer:
|
| 107 |
# Use individual components
|
| 108 |
X = self.vectorizer.transform([text])
|
| 109 |
prediction = self.model.predict(X)[0]
|
| 110 |
probabilities = self.model.predict_proba(X)[0]
|
|
|
|
|
|
|
| 111 |
else:
|
| 112 |
raise ValueError("No model available for prediction")
|
| 113 |
|
| 114 |
+
# Get confidence score
|
| 115 |
+
confidence = float(probabilities[prediction])
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 116 |
|
| 117 |
# Convert prediction to readable format
|
| 118 |
label = "Fake" if prediction == 1 else "Real"
|
|
|
|
| 120 |
return label, confidence
|
| 121 |
|
| 122 |
except Exception as e:
|
| 123 |
+
logger.error(f"Prediction failed: {e}")
|
| 124 |
raise HTTPException(
|
| 125 |
status_code=500,
|
| 126 |
detail=f"Prediction failed: {str(e)}"
|
| 127 |
)
|
| 128 |
|
| 129 |
def health_check(self) -> Dict[str, Any]:
|
| 130 |
+
"""Perform health check"""
|
| 131 |
try:
|
| 132 |
# Test prediction with sample text
|
| 133 |
test_text = "This is a test article for health check purposes."
|
|
|
|
| 142 |
"model_available": self.model is not None,
|
| 143 |
"vectorizer_available": self.vectorizer is not None,
|
| 144 |
"pipeline_available": self.pipeline is not None,
|
| 145 |
+
"test_prediction": {"label": label, "confidence": confidence}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 146 |
}
|
| 147 |
|
| 148 |
except Exception as e:
|
|
|
|
| 155 |
"error": str(e),
|
| 156 |
"model_available": self.model is not None,
|
| 157 |
"vectorizer_available": self.vectorizer is not None,
|
| 158 |
+
"pipeline_available": self.pipeline is not None
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 159 |
}
|
| 160 |
|
| 161 |
|
|
|
|
| 163 |
model_manager = ModelManager()
|
| 164 |
|
| 165 |
|
|
|
|
|
|
|
|
|
|
| 166 |
@asynccontextmanager
|
| 167 |
async def lifespan(app: FastAPI):
|
| 168 |
+
"""Manage application lifespan"""
|
| 169 |
+
logger.info("Starting FastAPI application...")
|
| 170 |
|
|
|
|
|
|
|
|
|
|
| 171 |
# Startup tasks
|
| 172 |
model_manager.load_model()
|
| 173 |
|
|
|
|
| 177 |
yield
|
| 178 |
|
| 179 |
# Shutdown tasks
|
| 180 |
+
logger.info("Shutting down FastAPI application...")
|
|
|
|
| 181 |
|
| 182 |
# Create FastAPI app
|
| 183 |
app = FastAPI(
|
| 184 |
title="Fake News Detection API",
|
| 185 |
+
description="Production-ready API for fake news detection with comprehensive monitoring and security features",
|
| 186 |
+
version="2.0.0",
|
| 187 |
docs_url="/docs",
|
| 188 |
redoc_url="/redoc",
|
| 189 |
lifespan=lifespan
|
|
|
|
| 203 |
allowed_hosts=["*"] # Configure appropriately for production
|
| 204 |
)
|
| 205 |
|
| 206 |
+
# Request/Response models
|
| 207 |
+
|
| 208 |
|
|
|
|
|
|
|
|
|
|
| 209 |
class PredictionRequest(BaseModel):
|
| 210 |
text: str = Field(..., min_length=1, max_length=10000,
|
| 211 |
description="Text to analyze for fake news detection")
|
|
|
|
| 272 |
system_health: Dict[str, Any]
|
| 273 |
api_health: Dict[str, Any]
|
| 274 |
|
| 275 |
+
# Rate limiting
|
| 276 |
+
|
| 277 |
|
|
|
|
|
|
|
|
|
|
| 278 |
async def rate_limit_check(request: Request):
|
| 279 |
"""Check rate limits"""
|
| 280 |
client_ip = request.client.host
|
|
|
|
| 296 |
# Add current request
|
| 297 |
rate_limit_storage[client_ip].append(current_time)
|
| 298 |
|
| 299 |
+
# Logging middleware
|
| 300 |
+
|
| 301 |
|
| 302 |
@app.middleware("http")
|
| 303 |
async def log_requests(request: Request, call_next):
|
|
|
|
| 321 |
|
| 322 |
return response
|
| 323 |
|
| 324 |
+
# Error handlers
|
| 325 |
+
|
| 326 |
|
|
|
|
|
|
|
|
|
|
| 327 |
@app.exception_handler(HTTPException)
|
| 328 |
async def http_exception_handler(request: Request, exc: HTTPException):
|
| 329 |
"""Handle HTTP exceptions"""
|
|
|
|
| 360 |
content=error_data
|
| 361 |
)
|
| 362 |
|
| 363 |
+
# Background tasks
|
| 364 |
+
|
| 365 |
|
|
|
|
|
|
|
|
|
|
| 366 |
async def periodic_health_check():
|
| 367 |
+
"""Periodic health check"""
|
| 368 |
while True:
|
| 369 |
try:
|
| 370 |
await asyncio.sleep(300) # Check every 5 minutes
|
| 371 |
health_status = model_manager.health_check()
|
| 372 |
|
| 373 |
if health_status["status"] == "unhealthy":
|
| 374 |
+
logger.warning(
|
| 375 |
+
"Model health check failed, attempting to reload...")
|
| 376 |
model_manager.load_model()
|
| 377 |
|
| 378 |
except Exception as e:
|
| 379 |
+
logger.error(f"Periodic health check failed: {e}")
|
| 380 |
+
|
| 381 |
+
# API Routes
|
| 382 |
|
| 383 |
|
|
|
|
|
|
|
|
|
|
| 384 |
@app.get("/", response_model=Dict[str, str])
|
| 385 |
async def root():
|
| 386 |
+
"""Root endpoint"""
|
| 387 |
return {
|
| 388 |
+
"message": "Fake News Detection API",
|
| 389 |
+
"version": "2.0.0",
|
|
|
|
| 390 |
"documentation": "/docs",
|
| 391 |
+
"health_check": "/health"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 392 |
}
|
| 393 |
|
| 394 |
|
|
|
|
| 446 |
except HTTPException:
|
| 447 |
raise
|
| 448 |
except Exception as e:
|
| 449 |
+
logger.error(f"Prediction failed: {e}")
|
| 450 |
raise HTTPException(
|
| 451 |
status_code=500,
|
| 452 |
detail=f"Prediction failed: {str(e)}"
|
|
|
|
| 494 |
predictions.append(prediction)
|
| 495 |
|
| 496 |
except Exception as e:
|
| 497 |
+
logger.error(f"Batch prediction failed for text: {e}")
|
| 498 |
# Continue with other texts
|
| 499 |
continue
|
| 500 |
|
|
|
|
| 526 |
except HTTPException:
|
| 527 |
raise
|
| 528 |
except Exception as e:
|
| 529 |
+
logger.error(f"Batch prediction failed: {e}")
|
| 530 |
raise HTTPException(
|
| 531 |
status_code=500,
|
| 532 |
detail=f"Batch prediction failed: {str(e)}"
|
|
|
|
| 536 |
@app.get("/health", response_model=HealthResponse)
|
| 537 |
async def health_check():
|
| 538 |
"""
|
| 539 |
+
Comprehensive health check endpoint
|
| 540 |
|
| 541 |
- **returns**: Detailed health status of the API and model
|
| 542 |
"""
|
|
|
|
| 571 |
)
|
| 572 |
|
| 573 |
except Exception as e:
|
| 574 |
+
logger.error(f"Health check failed: {e}")
|
| 575 |
return HealthResponse(
|
| 576 |
status="unhealthy",
|
| 577 |
timestamp=datetime.now().isoformat(),
|
|
|
|
| 584 |
@app.get("/metrics")
|
| 585 |
async def get_metrics():
|
| 586 |
"""
|
| 587 |
+
Get API metrics
|
| 588 |
|
| 589 |
- **returns**: Usage statistics and performance metrics
|
| 590 |
"""
|
|
|
|
| 600 |
"model_version": model_manager.model_metadata.get('model_version', 'unknown'),
|
| 601 |
"model_health": model_manager.health_status,
|
| 602 |
"last_health_check": model_manager.last_health_check.isoformat() if model_manager.last_health_check else None,
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 603 |
"timestamp": datetime.now().isoformat()
|
| 604 |
}
|
| 605 |
|
| 606 |
return metrics
|
| 607 |
|
| 608 |
except Exception as e:
|
| 609 |
+
logger.error(f"Metrics retrieval failed: {e}")
|
| 610 |
raise HTTPException(
|
| 611 |
status_code=500,
|
| 612 |
detail=f"Metrics retrieval failed: {str(e)}"
|
|
|
|
| 616 |
@app.post("/model/reload")
|
| 617 |
async def reload_model():
|
| 618 |
"""
|
| 619 |
+
Reload the ML model
|
| 620 |
|
| 621 |
- **returns**: Status of model reload operation
|
| 622 |
"""
|
| 623 |
try:
|
| 624 |
+
logger.info("Manual model reload requested")
|
| 625 |
model_manager.load_model()
|
| 626 |
|
| 627 |
return {
|
|
|
|
| 629 |
"message": "Model reloaded successfully",
|
| 630 |
"model_health": model_manager.health_status,
|
| 631 |
"model_version": model_manager.model_metadata.get('model_version', 'unknown'),
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 632 |
"timestamp": datetime.now().isoformat()
|
| 633 |
}
|
| 634 |
|
| 635 |
except Exception as e:
|
| 636 |
+
logger.error(f"Model reload failed: {e}")
|
| 637 |
raise HTTPException(
|
| 638 |
status_code=500,
|
| 639 |
detail=f"Model reload failed: {str(e)}"
|
| 640 |
)
|
| 641 |
|
| 642 |
+
# Background task functions
|
| 643 |
+
|
| 644 |
|
|
|
|
|
|
|
|
|
|
| 645 |
async def log_prediction(text: str, prediction: str, confidence: float, client_ip: str, processing_time: float):
|
| 646 |
+
"""Log prediction details"""
|
| 647 |
try:
|
| 648 |
log_entry = {
|
| 649 |
"timestamp": datetime.now().isoformat(),
|
|
|
|
| 655 |
"text_hash": hashlib.md5(text.encode()).hexdigest()
|
| 656 |
}
|
| 657 |
|
| 658 |
+
# Save to log file
|
| 659 |
+
log_file = Path("/tmp/prediction_log.json")
|
| 660 |
|
| 661 |
# Load existing logs
|
| 662 |
logs = []
|
|
|
|
| 680 |
await f.write(json.dumps(logs, indent=2))
|
| 681 |
|
| 682 |
except Exception as e:
|
| 683 |
+
logger.error(f"Failed to log prediction: {e}")
|
| 684 |
|
| 685 |
|
| 686 |
async def log_batch_prediction(total_texts: int, successful_predictions: int, client_ip: str, processing_time: float):
|
|
|
|
| 696 |
"success_rate": successful_predictions / total_texts if total_texts > 0 else 0
|
| 697 |
}
|
| 698 |
|
| 699 |
+
logger.info(f"Batch prediction logged: {json.dumps(log_entry)}")
|
| 700 |
|
| 701 |
except Exception as e:
|
| 702 |
+
logger.error(f"Failed to log batch prediction: {e}")
|
| 703 |
|
| 704 |
+
# Custom OpenAPI
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 705 |
|
| 706 |
|
|
|
|
|
|
|
|
|
|
| 707 |
def custom_openapi():
|
| 708 |
if app.openapi_schema:
|
| 709 |
return app.openapi_schema
|
| 710 |
|
| 711 |
openapi_schema = get_openapi(
|
| 712 |
+
title="Fake News Detection API",
|
| 713 |
+
version="2.0.0",
|
| 714 |
+
description="Production-ready API for fake news detection with comprehensive monitoring and security features",
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 715 |
routes=app.routes,
|
| 716 |
)
|
| 717 |
|
|
|
|
| 730 |
|
| 731 |
app.openapi = custom_openapi
|
| 732 |
|
|
|
|
|
|
|
|
|
|
|
|
|
| 733 |
if __name__ == "__main__":
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 734 |
uvicorn.run(
|
| 735 |
"fastapi_server:app",
|
| 736 |
+
host="127.0.0.1",
|
| 737 |
port=8000,
|
| 738 |
log_level="info",
|
| 739 |
reload=False,
|
| 740 |
access_log=True
|
| 741 |
+
)
|