Update app/fastapi_server.py
Browse files- app/fastapi_server.py +260 -129
app/fastapi_server.py
CHANGED
|
@@ -678,6 +678,57 @@ async def predict(
|
|
| 678 |
detail="Model is not available. Please try again later."
|
| 679 |
)
|
| 680 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 681 |
# Prepare request data for routing
|
| 682 |
request_data = {
|
| 683 |
'client_id': client_ip,
|
|
@@ -738,48 +789,15 @@ async def predict(
|
|
| 738 |
processing_time=processing_time
|
| 739 |
)
|
| 740 |
|
| 741 |
-
#
|
| 742 |
-
validation_entry = {
|
| 743 |
-
'timestamp': datetime.now().isoformat(),
|
| 744 |
-
'text_length': len(request.text),
|
| 745 |
-
'prediction': label,
|
| 746 |
-
'confidence': confidence,
|
| 747 |
-
'validation_passed': confidence > 0.6, # Define validation threshold
|
| 748 |
-
'quality_score': confidence,
|
| 749 |
-
'model_version': model_manager.model_metadata.get('model_version', 'unknown'),
|
| 750 |
-
'processing_time': processing_time,
|
| 751 |
-
'client_ip': client_ip,
|
| 752 |
-
'environment': environment
|
| 753 |
-
}
|
| 754 |
-
|
| 755 |
-
# Save to validation log
|
| 756 |
-
try:
|
| 757 |
-
validation_log_path = path_manager.get_logs_path("validation_log.json")
|
| 758 |
-
if validation_log_path.exists():
|
| 759 |
-
with open(validation_log_path, 'r') as f:
|
| 760 |
-
validation_data = json.load(f)
|
| 761 |
-
else:
|
| 762 |
-
validation_data = []
|
| 763 |
-
|
| 764 |
-
validation_data.append(validation_entry)
|
| 765 |
-
|
| 766 |
-
# Keep only last 1000 entries to prevent file from growing too large
|
| 767 |
-
if len(validation_data) > 1000:
|
| 768 |
-
validation_data = validation_data[-1000:]
|
| 769 |
-
|
| 770 |
-
with open(validation_log_path, 'w') as f:
|
| 771 |
-
json.dump(validation_data, f, indent=2)
|
| 772 |
-
except Exception as e:
|
| 773 |
-
logger.warning(f"Could not save validation log: {e}")
|
| 774 |
-
|
| 775 |
-
# Log prediction (background task)
|
| 776 |
background_tasks.add_task(
|
| 777 |
-
|
| 778 |
request.text,
|
| 779 |
label,
|
| 780 |
confidence,
|
| 781 |
client_ip,
|
| 782 |
-
processing_time
|
|
|
|
| 783 |
)
|
| 784 |
|
| 785 |
return response
|
|
@@ -825,6 +843,50 @@ async def predict(
|
|
| 825 |
)
|
| 826 |
|
| 827 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 828 |
@app.post("/predict/batch", response_model=BatchPredictionResponse)
|
| 829 |
async def predict_batch(
|
| 830 |
request: BatchPredictionRequest,
|
|
@@ -1338,79 +1400,119 @@ async def get_metrics():
|
|
| 1338 |
)
|
| 1339 |
|
| 1340 |
def get_validation_stats():
|
| 1341 |
-
|
| 1342 |
-
|
| 1343 |
-
|
| 1344 |
-
|
| 1345 |
-
|
| 1346 |
-
|
| 1347 |
-
|
| 1348 |
-
|
| 1349 |
-
|
| 1350 |
-
|
| 1351 |
-
|
| 1352 |
-
|
| 1353 |
-
|
| 1354 |
-
|
| 1355 |
-
|
| 1356 |
-
|
| 1357 |
-
|
| 1358 |
-
|
| 1359 |
-
|
| 1360 |
-
|
| 1361 |
-
|
| 1362 |
-
|
| 1363 |
-
|
| 1364 |
-
|
| 1365 |
-
|
| 1366 |
-
|
| 1367 |
-
|
| 1368 |
-
|
| 1369 |
-
|
| 1370 |
-
|
| 1371 |
-
|
| 1372 |
-
|
| 1373 |
-
|
| 1374 |
-
|
| 1375 |
-
|
| 1376 |
-
|
| 1377 |
-
|
| 1378 |
-
|
| 1379 |
-
|
| 1380 |
-
|
| 1381 |
-
|
| 1382 |
-
|
| 1383 |
-
|
| 1384 |
-
|
| 1385 |
-
|
| 1386 |
-
|
| 1387 |
-
|
| 1388 |
-
|
| 1389 |
-
|
| 1390 |
-
|
| 1391 |
-
|
| 1392 |
-
|
| 1393 |
-
|
| 1394 |
-
|
| 1395 |
-
|
| 1396 |
-
|
| 1397 |
-
|
| 1398 |
-
|
| 1399 |
-
|
| 1400 |
-
|
| 1401 |
-
|
| 1402 |
-
|
| 1403 |
-
|
| 1404 |
-
|
| 1405 |
-
|
| 1406 |
-
|
| 1407 |
-
|
| 1408 |
-
|
| 1409 |
-
|
| 1410 |
-
|
| 1411 |
-
|
| 1412 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1413 |
|
|
|
|
| 1414 |
@app.get("/validation/statistics")
|
| 1415 |
async def get_validation_statistics():
|
| 1416 |
"""Get comprehensive validation statistics"""
|
|
@@ -1454,18 +1556,56 @@ async def get_validation_statistics():
|
|
| 1454 |
# Adding fallback to build quality report from metadata if generate_quality_report fails; improved error handling, logging, and richer report structure
|
| 1455 |
@app.get("/validation/quality-report")
|
| 1456 |
async def get_quality_report():
|
| 1457 |
-
"""Get comprehensive data quality report"""
|
| 1458 |
try:
|
| 1459 |
-
#
|
| 1460 |
-
|
| 1461 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1462 |
|
| 1463 |
-
|
| 1464 |
-
|
| 1465 |
-
|
| 1466 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1467 |
|
| 1468 |
-
# Fallback
|
| 1469 |
metadata_path = path_manager.get_metadata_path()
|
| 1470 |
|
| 1471 |
if not metadata_path.exists():
|
|
@@ -1477,9 +1617,10 @@ async def get_quality_report():
|
|
| 1477 |
with open(metadata_path, 'r') as f:
|
| 1478 |
metadata = json.load(f)
|
| 1479 |
|
| 1480 |
-
# Create quality report from metadata
|
| 1481 |
quality_report = {
|
| 1482 |
"report_timestamp": datetime.now().isoformat(),
|
|
|
|
| 1483 |
"overall_statistics": {
|
| 1484 |
"total_articles": (metadata.get('train_size', 0) + metadata.get('test_size', 0)),
|
| 1485 |
"overall_success_rate": 0.85 if metadata.get('test_f1', 0) > 0.7 else 0.65
|
|
@@ -1509,16 +1650,6 @@ async def get_quality_report():
|
|
| 1509 |
|
| 1510 |
except HTTPException:
|
| 1511 |
raise
|
| 1512 |
-
except FileNotFoundError:
|
| 1513 |
-
raise HTTPException(
|
| 1514 |
-
status_code=404,
|
| 1515 |
-
detail="No validation statistics available"
|
| 1516 |
-
)
|
| 1517 |
-
except json.JSONDecodeError:
|
| 1518 |
-
raise HTTPException(
|
| 1519 |
-
status_code=500,
|
| 1520 |
-
detail="Invalid metadata format"
|
| 1521 |
-
)
|
| 1522 |
except Exception as e:
|
| 1523 |
logger.error(f"Failed to generate quality report: {e}")
|
| 1524 |
raise HTTPException(
|
|
|
|
| 678 |
detail="Model is not available. Please try again later."
|
| 679 |
)
|
| 680 |
|
| 681 |
+
# NEW: Data validation before prediction
|
| 682 |
+
try:
|
| 683 |
+
from data.data_validator import DataValidator
|
| 684 |
+
from data.validation_schemas import ValidationLevel
|
| 685 |
+
|
| 686 |
+
validator = DataValidator()
|
| 687 |
+
validation_result = validator.validate_text(request.text)
|
| 688 |
+
|
| 689 |
+
# Log validation result
|
| 690 |
+
validation_entry = {
|
| 691 |
+
'timestamp': datetime.now().isoformat(),
|
| 692 |
+
'text_length': len(request.text),
|
| 693 |
+
'validation_level': validation_result.validation_level.value,
|
| 694 |
+
'quality_score': validation_result.quality_score,
|
| 695 |
+
'issues': [issue.dict() for issue in validation_result.issues],
|
| 696 |
+
'passed_validation': validation_result.validation_level != ValidationLevel.INVALID,
|
| 697 |
+
'client_ip': client_ip,
|
| 698 |
+
'user_agent': user_agent
|
| 699 |
+
}
|
| 700 |
+
|
| 701 |
+
# Save validation results
|
| 702 |
+
try:
|
| 703 |
+
validation_log_path = path_manager.get_logs_path("validation_log.json")
|
| 704 |
+
if validation_log_path.exists():
|
| 705 |
+
with open(validation_log_path, 'r') as f:
|
| 706 |
+
validation_data = json.load(f)
|
| 707 |
+
else:
|
| 708 |
+
validation_data = []
|
| 709 |
+
|
| 710 |
+
validation_data.append(validation_entry)
|
| 711 |
+
|
| 712 |
+
# Keep only last 1000 entries
|
| 713 |
+
if len(validation_data) > 1000:
|
| 714 |
+
validation_data = validation_data[-1000:]
|
| 715 |
+
|
| 716 |
+
with open(validation_log_path, 'w') as f:
|
| 717 |
+
json.dump(validation_data, f, indent=2)
|
| 718 |
+
except Exception as e:
|
| 719 |
+
logger.warning(f"Could not save validation log: {e}")
|
| 720 |
+
|
| 721 |
+
# Block invalid inputs
|
| 722 |
+
if validation_result.validation_level == ValidationLevel.INVALID:
|
| 723 |
+
raise HTTPException(
|
| 724 |
+
status_code=400,
|
| 725 |
+
detail=f"Input validation failed: {validation_result.issues[0].message if validation_result.issues else 'Invalid input'}"
|
| 726 |
+
)
|
| 727 |
+
|
| 728 |
+
except ImportError:
|
| 729 |
+
logger.warning("Data validation components not available, proceeding without validation")
|
| 730 |
+
validation_result = None
|
| 731 |
+
|
| 732 |
# Prepare request data for routing
|
| 733 |
request_data = {
|
| 734 |
'client_id': client_ip,
|
|
|
|
| 789 |
processing_time=processing_time
|
| 790 |
)
|
| 791 |
|
| 792 |
+
# Log prediction (background task) - ENHANCED with validation info
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 793 |
background_tasks.add_task(
|
| 794 |
+
log_prediction_with_validation,
|
| 795 |
request.text,
|
| 796 |
label,
|
| 797 |
confidence,
|
| 798 |
client_ip,
|
| 799 |
+
processing_time,
|
| 800 |
+
validation_result
|
| 801 |
)
|
| 802 |
|
| 803 |
return response
|
|
|
|
| 843 |
)
|
| 844 |
|
| 845 |
|
| 846 |
+
async def log_prediction_with_validation(text: str, prediction: str, confidence: float,
|
| 847 |
+
client_ip: str, processing_time: float,
|
| 848 |
+
validation_result=None):
|
| 849 |
+
"""Enhanced logging function that includes validation data"""
|
| 850 |
+
try:
|
| 851 |
+
prediction_entry = {
|
| 852 |
+
'timestamp': datetime.now().isoformat(),
|
| 853 |
+
'prediction': prediction,
|
| 854 |
+
'confidence': confidence,
|
| 855 |
+
'processing_time': processing_time,
|
| 856 |
+
'client_ip': client_ip,
|
| 857 |
+
'text_length': len(text),
|
| 858 |
+
'text_preview': text[:100] + "..." if len(text) > 100 else text
|
| 859 |
+
}
|
| 860 |
+
|
| 861 |
+
# Add validation information if available
|
| 862 |
+
if validation_result:
|
| 863 |
+
prediction_entry.update({
|
| 864 |
+
'validation_level': validation_result.validation_level.value,
|
| 865 |
+
'quality_score': validation_result.quality_score,
|
| 866 |
+
'validation_issues_count': len(validation_result.issues)
|
| 867 |
+
})
|
| 868 |
+
|
| 869 |
+
prediction_log_path = path_manager.get_logs_path("prediction_log.json")
|
| 870 |
+
|
| 871 |
+
if prediction_log_path.exists():
|
| 872 |
+
with open(prediction_log_path, 'r') as f:
|
| 873 |
+
prediction_data = json.load(f)
|
| 874 |
+
else:
|
| 875 |
+
prediction_data = []
|
| 876 |
+
|
| 877 |
+
prediction_data.append(prediction_entry)
|
| 878 |
+
|
| 879 |
+
# Keep only last 1000 entries
|
| 880 |
+
if len(prediction_data) > 1000:
|
| 881 |
+
prediction_data = prediction_data[-1000:]
|
| 882 |
+
|
| 883 |
+
with open(prediction_log_path, 'w') as f:
|
| 884 |
+
json.dump(prediction_data, f, indent=2)
|
| 885 |
+
|
| 886 |
+
except Exception as e:
|
| 887 |
+
logger.error(f"Failed to log prediction: {e}")
|
| 888 |
+
|
| 889 |
+
|
| 890 |
@app.post("/predict/batch", response_model=BatchPredictionResponse)
|
| 891 |
async def predict_batch(
|
| 892 |
request: BatchPredictionRequest,
|
|
|
|
| 1400 |
)
|
| 1401 |
|
| 1402 |
def get_validation_stats():
|
| 1403 |
+
"""Get validation statistics from actual validation logs"""
|
| 1404 |
+
try:
|
| 1405 |
+
stats = {
|
| 1406 |
+
'last_updated': datetime.now().isoformat(),
|
| 1407 |
+
'total_validations': 0,
|
| 1408 |
+
'total_articles': 0,
|
| 1409 |
+
'total_valid_articles': 0,
|
| 1410 |
+
'average_quality_score': 0.0,
|
| 1411 |
+
'validation_breakdown': {},
|
| 1412 |
+
'source_statistics': {},
|
| 1413 |
+
'validation_history': [],
|
| 1414 |
+
'quality_trends': []
|
| 1415 |
+
}
|
| 1416 |
+
|
| 1417 |
+
# Load actual validation data
|
| 1418 |
+
validation_log_path = path_manager.get_logs_path("validation_log.json")
|
| 1419 |
+
if validation_log_path.exists():
|
| 1420 |
+
with open(validation_log_path, 'r') as f:
|
| 1421 |
+
validation_data = json.load(f)
|
| 1422 |
+
|
| 1423 |
+
if validation_data:
|
| 1424 |
+
stats['total_validations'] = len(validation_data)
|
| 1425 |
+
stats['total_articles'] = len(validation_data)
|
| 1426 |
+
|
| 1427 |
+
# Analyze validation levels
|
| 1428 |
+
level_counts = {}
|
| 1429 |
+
quality_scores = []
|
| 1430 |
+
|
| 1431 |
+
for entry in validation_data:
|
| 1432 |
+
level = entry.get('validation_level', 'unknown')
|
| 1433 |
+
level_counts[level] = level_counts.get(level, 0) + 1
|
| 1434 |
+
|
| 1435 |
+
if entry.get('quality_score'):
|
| 1436 |
+
quality_scores.append(entry['quality_score'])
|
| 1437 |
+
|
| 1438 |
+
if entry.get('passed_validation', False):
|
| 1439 |
+
stats['total_valid_articles'] += 1
|
| 1440 |
+
|
| 1441 |
+
stats['validation_breakdown'] = level_counts
|
| 1442 |
+
stats['average_quality_score'] = sum(quality_scores) / len(quality_scores) if quality_scores else 0.0
|
| 1443 |
+
stats['validation_history'] = validation_data[-10:] # Last 10
|
| 1444 |
+
|
| 1445 |
+
# Quality trends over time
|
| 1446 |
+
for entry in validation_data[-20:]: # Last 20 for trends
|
| 1447 |
+
if entry.get('quality_score') is not None:
|
| 1448 |
+
stats['quality_trends'].append({
|
| 1449 |
+
'timestamp': entry.get('timestamp'),
|
| 1450 |
+
'quality_score': entry.get('quality_score')
|
| 1451 |
+
})
|
| 1452 |
+
|
| 1453 |
+
return stats if stats['total_validations'] > 0 else None
|
| 1454 |
+
|
| 1455 |
+
except Exception as e:
|
| 1456 |
+
logger.warning(f"Could not load validation stats: {e}")
|
| 1457 |
+
return None
|
| 1458 |
+
|
| 1459 |
+
|
| 1460 |
+
# Data Quality Report Endpoint
|
| 1461 |
+
@app.get("/validation/quality-report")
|
| 1462 |
+
async def get_validation_quality_report():
|
| 1463 |
+
"""Get detailed validation quality report"""
|
| 1464 |
+
try:
|
| 1465 |
+
stats = get_validation_stats()
|
| 1466 |
+
|
| 1467 |
+
if not stats:
|
| 1468 |
+
return {
|
| 1469 |
+
'error': 'No validation data available',
|
| 1470 |
+
'message': 'No validation statistics available yet'
|
| 1471 |
+
}
|
| 1472 |
+
|
| 1473 |
+
# Generate quality assessment
|
| 1474 |
+
avg_quality = stats.get('average_quality_score', 0)
|
| 1475 |
+
validation_breakdown = stats.get('validation_breakdown', {})
|
| 1476 |
+
|
| 1477 |
+
quality_level = 'poor'
|
| 1478 |
+
if avg_quality > 0.8:
|
| 1479 |
+
quality_level = 'excellent'
|
| 1480 |
+
elif avg_quality > 0.6:
|
| 1481 |
+
quality_level = 'good'
|
| 1482 |
+
elif avg_quality > 0.4:
|
| 1483 |
+
quality_level = 'fair'
|
| 1484 |
+
|
| 1485 |
+
# Generate recommendations
|
| 1486 |
+
recommendations = []
|
| 1487 |
+
invalid_count = validation_breakdown.get('INVALID', 0)
|
| 1488 |
+
total = stats.get('total_validations', 1)
|
| 1489 |
+
|
| 1490 |
+
if invalid_count / total > 0.1:
|
| 1491 |
+
recommendations.append("High rate of invalid inputs detected - consider input preprocessing")
|
| 1492 |
+
|
| 1493 |
+
if avg_quality < 0.5:
|
| 1494 |
+
recommendations.append("Low average quality scores - review data sources")
|
| 1495 |
+
|
| 1496 |
+
return {
|
| 1497 |
+
'overall_statistics': {
|
| 1498 |
+
'total_articles': stats.get('total_articles', 0),
|
| 1499 |
+
'overall_success_rate': stats.get('total_valid_articles', 0) / max(stats.get('total_articles', 1), 1)
|
| 1500 |
+
},
|
| 1501 |
+
'quality_assessment': {
|
| 1502 |
+
'quality_level': quality_level,
|
| 1503 |
+
'average_quality_score': avg_quality
|
| 1504 |
+
},
|
| 1505 |
+
'validation_breakdown': validation_breakdown,
|
| 1506 |
+
'recommendations': recommendations,
|
| 1507 |
+
'timestamp': datetime.now().isoformat()
|
| 1508 |
+
}
|
| 1509 |
+
|
| 1510 |
+
except Exception as e:
|
| 1511 |
+
logger.error(f"Quality report generation failed: {e}")
|
| 1512 |
+
raise HTTPException(status_code=500, detail="Failed to generate quality report")
|
| 1513 |
+
|
| 1514 |
|
| 1515 |
+
# Statistics Validation Endpoint
|
| 1516 |
@app.get("/validation/statistics")
|
| 1517 |
async def get_validation_statistics():
|
| 1518 |
"""Get comprehensive validation statistics"""
|
|
|
|
| 1556 |
# Adding fallback to build quality report from metadata if generate_quality_report fails; improved error handling, logging, and richer report structure
|
| 1557 |
@app.get("/validation/quality-report")
|
| 1558 |
async def get_quality_report():
|
| 1559 |
+
"""Get comprehensive data quality report with real validation data"""
|
| 1560 |
try:
|
| 1561 |
+
# Try to get real validation statistics
|
| 1562 |
+
validation_stats = get_validation_stats()
|
| 1563 |
+
|
| 1564 |
+
if validation_stats and validation_stats.get('total_validations', 0) > 0:
|
| 1565 |
+
# Generate report from real validation data
|
| 1566 |
+
avg_quality = validation_stats.get('average_quality_score', 0.0)
|
| 1567 |
+
breakdown = validation_stats.get('validation_breakdown', {})
|
| 1568 |
+
total_validations = validation_stats.get('total_validations', 0)
|
| 1569 |
|
| 1570 |
+
# Assess quality level
|
| 1571 |
+
if avg_quality > 0.8:
|
| 1572 |
+
quality_level = "excellent"
|
| 1573 |
+
elif avg_quality > 0.6:
|
| 1574 |
+
quality_level = "good"
|
| 1575 |
+
elif avg_quality > 0.4:
|
| 1576 |
+
quality_level = "fair"
|
| 1577 |
+
else:
|
| 1578 |
+
quality_level = "poor"
|
| 1579 |
+
|
| 1580 |
+
# Generate recommendations
|
| 1581 |
+
recommendations = []
|
| 1582 |
+
invalid_rate = breakdown.get('INVALID', 0) / max(total_validations, 1)
|
| 1583 |
+
|
| 1584 |
+
if invalid_rate > 0.1:
|
| 1585 |
+
recommendations.append("High rate of invalid inputs - consider input preprocessing")
|
| 1586 |
+
if avg_quality < 0.5:
|
| 1587 |
+
recommendations.append("Low average quality scores - review data sources")
|
| 1588 |
+
if breakdown.get('LOW', 0) / max(total_validations, 1) > 0.2:
|
| 1589 |
+
recommendations.append("Many low-quality inputs detected - implement content filtering")
|
| 1590 |
+
|
| 1591 |
+
return {
|
| 1592 |
+
"report_timestamp": datetime.now().isoformat(),
|
| 1593 |
+
"data_source": "real_validation_logs",
|
| 1594 |
+
"overall_statistics": {
|
| 1595 |
+
"total_articles": validation_stats.get('total_articles', 0),
|
| 1596 |
+
"total_validations": total_validations,
|
| 1597 |
+
"overall_success_rate": validation_stats.get('total_valid_articles', 0) / max(validation_stats.get('total_articles', 1), 1)
|
| 1598 |
+
},
|
| 1599 |
+
"quality_assessment": {
|
| 1600 |
+
"quality_level": quality_level,
|
| 1601 |
+
"average_quality_score": avg_quality
|
| 1602 |
+
},
|
| 1603 |
+
"validation_breakdown": breakdown,
|
| 1604 |
+
"recommendations": recommendations,
|
| 1605 |
+
"quality_trends": validation_stats.get('quality_trends', [])
|
| 1606 |
+
}
|
| 1607 |
|
| 1608 |
+
# Fallback to existing metadata-based approach
|
| 1609 |
metadata_path = path_manager.get_metadata_path()
|
| 1610 |
|
| 1611 |
if not metadata_path.exists():
|
|
|
|
| 1617 |
with open(metadata_path, 'r') as f:
|
| 1618 |
metadata = json.load(f)
|
| 1619 |
|
| 1620 |
+
# Create quality report from metadata (existing code)
|
| 1621 |
quality_report = {
|
| 1622 |
"report_timestamp": datetime.now().isoformat(),
|
| 1623 |
+
"data_source": "model_metadata",
|
| 1624 |
"overall_statistics": {
|
| 1625 |
"total_articles": (metadata.get('train_size', 0) + metadata.get('test_size', 0)),
|
| 1626 |
"overall_success_rate": 0.85 if metadata.get('test_f1', 0) > 0.7 else 0.65
|
|
|
|
| 1650 |
|
| 1651 |
except HTTPException:
|
| 1652 |
raise
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1653 |
except Exception as e:
|
| 1654 |
logger.error(f"Failed to generate quality report: {e}")
|
| 1655 |
raise HTTPException(
|