Commit
·
3f56aad
1
Parent(s):
4798f78
Create tests/test_retrain.py
Browse filesAdding Tests for MLOps Infrastructure Enhancement
- tests/test_retrain.py +645 -0
tests/test_retrain.py
ADDED
|
@@ -0,0 +1,645 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# tests/test_retrain.py
|
| 2 |
+
# Comprehensive test suite for enhanced retraining pipeline with LightGBM + ensemble
|
| 3 |
+
|
| 4 |
+
import pytest
|
| 5 |
+
import numpy as np
|
| 6 |
+
import pandas as pd
|
| 7 |
+
import joblib
|
| 8 |
+
import tempfile
|
| 9 |
+
import json
|
| 10 |
+
from pathlib import Path
|
| 11 |
+
from unittest.mock import Mock, patch, MagicMock
|
| 12 |
+
import sys
|
| 13 |
+
import os
|
| 14 |
+
|
| 15 |
+
# Add project root to path for imports
|
| 16 |
+
sys.path.insert(0, os.path.join(os.path.dirname(__file__), '..'))
|
| 17 |
+
|
| 18 |
+
from model.retrain import (
|
| 19 |
+
EnhancedModelRetrainer, CVModelComparator, EnsembleManager,
|
| 20 |
+
preprocess_text_function, AutomatedRetrainingManager
|
| 21 |
+
)
|
| 22 |
+
from sklearn.ensemble import RandomForestClassifier, VotingClassifier
|
| 23 |
+
from sklearn.linear_model import LogisticRegression
|
| 24 |
+
from sklearn.pipeline import Pipeline
|
| 25 |
+
from sklearn.model_selection import cross_val_score
|
| 26 |
+
import lightgbm as lgb
|
| 27 |
+
|
| 28 |
+
|
| 29 |
+
class TestPreprocessing:
|
| 30 |
+
"""Test preprocessing and data handling functionality"""
|
| 31 |
+
|
| 32 |
+
def test_preprocess_text_function_basic(self):
|
| 33 |
+
"""Test basic text preprocessing functionality"""
|
| 34 |
+
texts = [
|
| 35 |
+
"Check out this link: https://example.com and email me@test.com",
|
| 36 |
+
"Multiple!!! question marks??? and dots...",
|
| 37 |
+
"Mixed123 characters456 and symbols@#$",
|
| 38 |
+
""
|
| 39 |
+
]
|
| 40 |
+
|
| 41 |
+
processed = preprocess_text_function(texts)
|
| 42 |
+
|
| 43 |
+
# Should remove URLs and emails
|
| 44 |
+
assert "https://example.com" not in processed[0]
|
| 45 |
+
assert "me@test.com" not in processed[0]
|
| 46 |
+
|
| 47 |
+
# Should normalize punctuation
|
| 48 |
+
assert "!!!" not in processed[1]
|
| 49 |
+
assert "???" not in processed[1]
|
| 50 |
+
|
| 51 |
+
# Should remove non-alphabetic chars except basic punctuation
|
| 52 |
+
assert "123" not in processed[2]
|
| 53 |
+
assert "@#$" not in processed[2]
|
| 54 |
+
|
| 55 |
+
# Should handle empty strings
|
| 56 |
+
assert processed[3] == ""
|
| 57 |
+
|
| 58 |
+
def test_preprocess_text_function_edge_cases(self):
|
| 59 |
+
"""Test preprocessing with edge cases"""
|
| 60 |
+
edge_cases = [None, 123, [], {"text": "test"}]
|
| 61 |
+
|
| 62 |
+
# Should convert all inputs to strings without crashing
|
| 63 |
+
processed = preprocess_text_function(edge_cases)
|
| 64 |
+
assert len(processed) == 4
|
| 65 |
+
for result in processed:
|
| 66 |
+
assert isinstance(result, str)
|
| 67 |
+
|
| 68 |
+
|
| 69 |
+
class TestCVModelComparator:
|
| 70 |
+
"""Test cross-validation and model comparison functionality"""
|
| 71 |
+
|
| 72 |
+
@pytest.fixture
|
| 73 |
+
def sample_data(self):
|
| 74 |
+
"""Create sample training data"""
|
| 75 |
+
np.random.seed(42)
|
| 76 |
+
X = np.random.randn(100, 10)
|
| 77 |
+
y = np.random.randint(0, 2, 100)
|
| 78 |
+
return X, y
|
| 79 |
+
|
| 80 |
+
@pytest.fixture
|
| 81 |
+
def cv_comparator(self):
|
| 82 |
+
"""Create CV comparator instance"""
|
| 83 |
+
return CVModelComparator(cv_folds=3, random_state=42)
|
| 84 |
+
|
| 85 |
+
def test_create_cv_strategy(self, cv_comparator, sample_data):
|
| 86 |
+
"""Test CV strategy creation with different data sizes"""
|
| 87 |
+
X, y = sample_data
|
| 88 |
+
|
| 89 |
+
# Normal case
|
| 90 |
+
cv_strategy = cv_comparator.create_cv_strategy(X, y)
|
| 91 |
+
assert cv_strategy.n_splits <= 3
|
| 92 |
+
assert cv_strategy.n_splits >= 2
|
| 93 |
+
|
| 94 |
+
# Small dataset case
|
| 95 |
+
X_small = X[:8]
|
| 96 |
+
y_small = y[:8]
|
| 97 |
+
cv_strategy_small = cv_comparator.create_cv_strategy(X_small, y_small)
|
| 98 |
+
assert cv_strategy_small.n_splits >= 2
|
| 99 |
+
assert cv_strategy_small.n_splits <= len(np.unique(y_small))
|
| 100 |
+
|
| 101 |
+
def test_perform_model_cv_evaluation(self, cv_comparator, sample_data):
|
| 102 |
+
"""Test CV evaluation of individual models"""
|
| 103 |
+
X, y = sample_data
|
| 104 |
+
|
| 105 |
+
# Create simple pipeline for testing
|
| 106 |
+
model = Pipeline([
|
| 107 |
+
('model', LogisticRegression(random_state=42, max_iter=100))
|
| 108 |
+
])
|
| 109 |
+
|
| 110 |
+
results = cv_comparator.perform_model_cv_evaluation(model, X, y)
|
| 111 |
+
|
| 112 |
+
# Should return comprehensive CV results
|
| 113 |
+
assert 'test_scores' in results
|
| 114 |
+
assert 'train_scores' in results
|
| 115 |
+
assert 'fold_results' in results
|
| 116 |
+
assert 'n_splits' in results
|
| 117 |
+
|
| 118 |
+
# Should have all metrics
|
| 119 |
+
expected_metrics = ['accuracy', 'precision', 'recall', 'f1', 'roc_auc']
|
| 120 |
+
for metric in expected_metrics:
|
| 121 |
+
assert metric in results['test_scores']
|
| 122 |
+
assert 'mean' in results['test_scores'][metric]
|
| 123 |
+
assert 'std' in results['test_scores'][metric]
|
| 124 |
+
assert 'scores' in results['test_scores'][metric]
|
| 125 |
+
|
| 126 |
+
def test_compare_models_with_cv(self, cv_comparator, sample_data):
|
| 127 |
+
"""Test statistical comparison between two models"""
|
| 128 |
+
X, y = sample_data
|
| 129 |
+
|
| 130 |
+
# Create two different models
|
| 131 |
+
model1 = Pipeline([('model', LogisticRegression(random_state=42, max_iter=100))])
|
| 132 |
+
model2 = Pipeline([('model', RandomForestClassifier(random_state=42, n_estimators=10))])
|
| 133 |
+
|
| 134 |
+
comparison = cv_comparator.compare_models_with_cv(model1, model2, X, y)
|
| 135 |
+
|
| 136 |
+
# Should return comprehensive comparison
|
| 137 |
+
assert 'metric_comparisons' in comparison
|
| 138 |
+
assert 'promotion_decision' in comparison
|
| 139 |
+
assert 'feature_engineering_comparison' in comparison
|
| 140 |
+
|
| 141 |
+
# Should have statistical tests for each metric
|
| 142 |
+
for metric in ['accuracy', 'f1', 'precision', 'recall']:
|
| 143 |
+
if metric in comparison['metric_comparisons']:
|
| 144 |
+
metric_comp = comparison['metric_comparisons'][metric]
|
| 145 |
+
assert 'improvement' in metric_comp
|
| 146 |
+
assert 'tests' in metric_comp
|
| 147 |
+
if 'paired_ttest' in metric_comp['tests']:
|
| 148 |
+
assert 'p_value' in metric_comp['tests']['paired_ttest']
|
| 149 |
+
assert 'significant' in metric_comp['tests']['paired_ttest']
|
| 150 |
+
|
| 151 |
+
def test_feature_upgrade_assessment(self, cv_comparator):
|
| 152 |
+
"""Test feature engineering upgrade detection"""
|
| 153 |
+
# Mock results with different feature types
|
| 154 |
+
results1 = {'feature_engineering_type': 'standard_tfidf'}
|
| 155 |
+
results2 = {'feature_engineering_type': 'enhanced'}
|
| 156 |
+
|
| 157 |
+
upgrade = cv_comparator._assess_feature_upgrade(results1, results2)
|
| 158 |
+
|
| 159 |
+
assert upgrade['is_upgrade'] == True
|
| 160 |
+
assert upgrade['upgrade_type'] == 'standard_to_enhanced'
|
| 161 |
+
assert 'upgrade' in upgrade['description'].lower()
|
| 162 |
+
|
| 163 |
+
|
| 164 |
+
class TestEnsembleManager:
|
| 165 |
+
"""Test ensemble creation and validation"""
|
| 166 |
+
|
| 167 |
+
@pytest.fixture
|
| 168 |
+
def ensemble_manager(self):
|
| 169 |
+
"""Create ensemble manager instance"""
|
| 170 |
+
return EnsembleManager(random_state=42)
|
| 171 |
+
|
| 172 |
+
@pytest.fixture
|
| 173 |
+
def individual_models(self, sample_data):
|
| 174 |
+
"""Create individual trained models"""
|
| 175 |
+
X, y = sample_data
|
| 176 |
+
|
| 177 |
+
models = {
|
| 178 |
+
'logistic_regression': Pipeline([
|
| 179 |
+
('model', LogisticRegression(random_state=42, max_iter=100))
|
| 180 |
+
]),
|
| 181 |
+
'random_forest': Pipeline([
|
| 182 |
+
('model', RandomForestClassifier(random_state=42, n_estimators=10))
|
| 183 |
+
])
|
| 184 |
+
}
|
| 185 |
+
|
| 186 |
+
# Fit models
|
| 187 |
+
for model in models.values():
|
| 188 |
+
model.fit(X, y)
|
| 189 |
+
|
| 190 |
+
return models
|
| 191 |
+
|
| 192 |
+
@pytest.fixture
|
| 193 |
+
def sample_data(self):
|
| 194 |
+
"""Create sample data for ensemble testing"""
|
| 195 |
+
np.random.seed(42)
|
| 196 |
+
X = np.random.randn(100, 5)
|
| 197 |
+
y = np.random.randint(0, 2, 100)
|
| 198 |
+
return X, y
|
| 199 |
+
|
| 200 |
+
def test_create_ensemble(self, ensemble_manager, individual_models):
|
| 201 |
+
"""Test ensemble creation from individual models"""
|
| 202 |
+
ensemble = ensemble_manager.create_ensemble(individual_models)
|
| 203 |
+
|
| 204 |
+
assert isinstance(ensemble, VotingClassifier)
|
| 205 |
+
assert len(ensemble.estimators) == len(individual_models)
|
| 206 |
+
assert ensemble.voting == 'soft'
|
| 207 |
+
|
| 208 |
+
# Check estimator names match
|
| 209 |
+
estimator_names = [name for name, _ in ensemble.estimators]
|
| 210 |
+
assert set(estimator_names) == set(individual_models.keys())
|
| 211 |
+
|
| 212 |
+
def test_evaluate_ensemble_vs_individuals(self, ensemble_manager, individual_models, sample_data):
|
| 213 |
+
"""Test ensemble performance comparison"""
|
| 214 |
+
X, y = sample_data
|
| 215 |
+
X_train, X_test, y_train, y_test = X[:80], X[80:], y[:80], y[80:]
|
| 216 |
+
|
| 217 |
+
# Create and fit ensemble
|
| 218 |
+
ensemble = ensemble_manager.create_ensemble(individual_models)
|
| 219 |
+
ensemble.fit(X_train, y_train)
|
| 220 |
+
|
| 221 |
+
# Evaluate
|
| 222 |
+
results = ensemble_manager.evaluate_ensemble_vs_individuals(
|
| 223 |
+
ensemble, individual_models, X_test, y_test
|
| 224 |
+
)
|
| 225 |
+
|
| 226 |
+
# Should have results for all models plus ensemble
|
| 227 |
+
expected_keys = set(individual_models.keys()) | {'ensemble', 'ensemble_analysis'}
|
| 228 |
+
assert set(results.keys()) == expected_keys
|
| 229 |
+
|
| 230 |
+
# Should have all metrics for each model
|
| 231 |
+
for model_name in individual_models.keys():
|
| 232 |
+
assert 'accuracy' in results[model_name]
|
| 233 |
+
assert 'f1' in results[model_name]
|
| 234 |
+
assert 'precision' in results[model_name]
|
| 235 |
+
assert 'recall' in results[model_name]
|
| 236 |
+
assert 'roc_auc' in results[model_name]
|
| 237 |
+
|
| 238 |
+
# Should have ensemble analysis
|
| 239 |
+
assert 'best_individual_f1' in results['ensemble_analysis']
|
| 240 |
+
assert 'ensemble_f1' in results['ensemble_analysis']
|
| 241 |
+
assert 'improvement' in results['ensemble_analysis']
|
| 242 |
+
|
| 243 |
+
def test_statistical_ensemble_comparison(self, ensemble_manager, individual_models, sample_data):
|
| 244 |
+
"""Test statistical comparison for ensemble recommendation"""
|
| 245 |
+
X, y = sample_data
|
| 246 |
+
cv_manager = CVModelComparator(cv_folds=3, random_state=42)
|
| 247 |
+
|
| 248 |
+
ensemble = ensemble_manager.create_ensemble(individual_models)
|
| 249 |
+
|
| 250 |
+
results = ensemble_manager.statistical_ensemble_comparison(
|
| 251 |
+
ensemble, individual_models, X, y, cv_manager
|
| 252 |
+
)
|
| 253 |
+
|
| 254 |
+
# Should have comprehensive statistical comparison
|
| 255 |
+
assert 'ensemble_recommendation' in results
|
| 256 |
+
assert 'statistical_comparisons' in results
|
| 257 |
+
|
| 258 |
+
recommendation = results['ensemble_recommendation']
|
| 259 |
+
assert 'use_ensemble' in recommendation
|
| 260 |
+
assert 'confidence' in recommendation
|
| 261 |
+
assert 'significantly_better_than' in recommendation
|
| 262 |
+
|
| 263 |
+
|
| 264 |
+
class TestEnhancedModelRetrainer:
|
| 265 |
+
"""Test main retraining functionality"""
|
| 266 |
+
|
| 267 |
+
@pytest.fixture
|
| 268 |
+
def temp_dir(self):
|
| 269 |
+
"""Create temporary directory for testing"""
|
| 270 |
+
with tempfile.TemporaryDirectory() as temp_dir:
|
| 271 |
+
yield Path(temp_dir)
|
| 272 |
+
|
| 273 |
+
@pytest.fixture
|
| 274 |
+
def retrainer(self, temp_dir):
|
| 275 |
+
"""Create retrainer instance with temporary paths"""
|
| 276 |
+
retrainer = EnhancedModelRetrainer()
|
| 277 |
+
|
| 278 |
+
# Override paths to use temp directory
|
| 279 |
+
retrainer.base_dir = temp_dir
|
| 280 |
+
retrainer.data_dir = temp_dir / "data"
|
| 281 |
+
retrainer.model_dir = temp_dir / "model"
|
| 282 |
+
retrainer.logs_dir = temp_dir / "logs"
|
| 283 |
+
retrainer.backup_dir = temp_dir / "backups"
|
| 284 |
+
retrainer.features_dir = temp_dir / "features"
|
| 285 |
+
|
| 286 |
+
# Recreate paths
|
| 287 |
+
for dir_path in [retrainer.data_dir, retrainer.model_dir, retrainer.logs_dir,
|
| 288 |
+
retrainer.backup_dir, retrainer.features_dir]:
|
| 289 |
+
dir_path.mkdir(parents=True, exist_ok=True)
|
| 290 |
+
|
| 291 |
+
# Update file paths
|
| 292 |
+
retrainer.combined_data_path = retrainer.data_dir / "combined_dataset.csv"
|
| 293 |
+
retrainer.metadata_path = temp_dir / "metadata.json"
|
| 294 |
+
retrainer.prod_pipeline_path = retrainer.model_dir / "pipeline.pkl"
|
| 295 |
+
|
| 296 |
+
return retrainer
|
| 297 |
+
|
| 298 |
+
@pytest.fixture
|
| 299 |
+
def sample_dataset(self, temp_dir):
|
| 300 |
+
"""Create sample dataset for testing"""
|
| 301 |
+
data = {
|
| 302 |
+
'text': [
|
| 303 |
+
'This is a real news article about politics and government.',
|
| 304 |
+
'Fake news alert: celebrities do crazy things for attention.',
|
| 305 |
+
'Scientific breakthrough in renewable energy technology announced.',
|
| 306 |
+
'Conspiracy theory about secret government mind control programs.',
|
| 307 |
+
'Local weather update: sunny skies expected this weekend.',
|
| 308 |
+
'Breaking: major financial market crash predicted by experts.'
|
| 309 |
+
] * 20, # Repeat to get enough samples
|
| 310 |
+
'label': [0, 1, 0, 1, 0, 1] * 20 # 0=real, 1=fake
|
| 311 |
+
}
|
| 312 |
+
|
| 313 |
+
df = pd.DataFrame(data)
|
| 314 |
+
dataset_path = temp_dir / "data" / "combined_dataset.csv"
|
| 315 |
+
dataset_path.parent.mkdir(exist_ok=True)
|
| 316 |
+
df.to_csv(dataset_path, index=False)
|
| 317 |
+
|
| 318 |
+
return dataset_path, df
|
| 319 |
+
|
| 320 |
+
def test_setup_models(self, retrainer):
|
| 321 |
+
"""Test model configuration setup"""
|
| 322 |
+
# Should have all three models configured
|
| 323 |
+
expected_models = {'logistic_regression', 'random_forest', 'lightgbm'}
|
| 324 |
+
assert set(retrainer.models.keys()) == expected_models
|
| 325 |
+
|
| 326 |
+
# Should have LightGBM properly configured
|
| 327 |
+
lgb_config = retrainer.models['lightgbm']
|
| 328 |
+
assert isinstance(lgb_config['model'], lgb.LGBMClassifier)
|
| 329 |
+
assert lgb_config['model'].n_jobs == 1 # CPU optimization
|
| 330 |
+
assert 'param_grid' in lgb_config
|
| 331 |
+
|
| 332 |
+
# All models should have CPU-friendly settings
|
| 333 |
+
for model_config in retrainer.models.values():
|
| 334 |
+
model = model_config['model']
|
| 335 |
+
if hasattr(model, 'n_jobs'):
|
| 336 |
+
assert model.n_jobs == 1
|
| 337 |
+
|
| 338 |
+
def test_load_new_data(self, retrainer, sample_dataset):
|
| 339 |
+
"""Test data loading and validation"""
|
| 340 |
+
dataset_path, expected_df = sample_dataset
|
| 341 |
+
|
| 342 |
+
success, df, message = retrainer.load_new_data()
|
| 343 |
+
|
| 344 |
+
assert success == True
|
| 345 |
+
assert df is not None
|
| 346 |
+
assert len(df) == len(expected_df)
|
| 347 |
+
assert 'text' in df.columns
|
| 348 |
+
assert 'label' in df.columns
|
| 349 |
+
assert set(df['label'].unique()) == {0, 1}
|
| 350 |
+
|
| 351 |
+
def test_clean_and_validate_data(self, retrainer):
|
| 352 |
+
"""Test data cleaning and validation"""
|
| 353 |
+
# Create test data with various issues
|
| 354 |
+
dirty_data = pd.DataFrame({
|
| 355 |
+
'text': [
|
| 356 |
+
'Valid text sample',
|
| 357 |
+
'Short', # Too short
|
| 358 |
+
'', # Empty
|
| 359 |
+
None, # Null
|
| 360 |
+
'Valid longer text sample for testing',
|
| 361 |
+
'x' * 15000, # Too long
|
| 362 |
+
'Another valid text sample'
|
| 363 |
+
],
|
| 364 |
+
'label': [0, 1, 0, 2, 1, 1, 0] # Invalid label (2)
|
| 365 |
+
})
|
| 366 |
+
|
| 367 |
+
clean_df = retrainer.clean_and_validate_data(dirty_data)
|
| 368 |
+
|
| 369 |
+
# Should filter out problematic rows
|
| 370 |
+
assert len(clean_df) < len(dirty_data)
|
| 371 |
+
assert all(clean_df['text'].str.len() > 10)
|
| 372 |
+
assert all(clean_df['text'].str.len() < 10000)
|
| 373 |
+
assert set(clean_df['label'].unique()).issubset({0, 1})
|
| 374 |
+
assert not clean_df.isnull().any().any()
|
| 375 |
+
|
| 376 |
+
def test_create_preprocessing_pipeline_standard(self, retrainer):
|
| 377 |
+
"""Test standard TF-IDF pipeline creation"""
|
| 378 |
+
retrainer.use_enhanced_features = False
|
| 379 |
+
|
| 380 |
+
pipeline = retrainer.create_preprocessing_pipeline()
|
| 381 |
+
|
| 382 |
+
assert isinstance(pipeline, Pipeline)
|
| 383 |
+
step_names = [name for name, _ in pipeline.steps]
|
| 384 |
+
|
| 385 |
+
# Should have standard pipeline steps
|
| 386 |
+
expected_steps = ['preprocess', 'vectorize', 'feature_select', 'model']
|
| 387 |
+
assert step_names == expected_steps
|
| 388 |
+
|
| 389 |
+
# Model step should be None (set later)
|
| 390 |
+
assert pipeline.named_steps['model'] is None
|
| 391 |
+
|
| 392 |
+
@patch('model.retrain.ENHANCED_FEATURES_AVAILABLE', True)
|
| 393 |
+
def test_create_preprocessing_pipeline_enhanced(self, retrainer):
|
| 394 |
+
"""Test enhanced feature pipeline creation (mocked)"""
|
| 395 |
+
retrainer.use_enhanced_features = True
|
| 396 |
+
|
| 397 |
+
with patch('model.retrain.AdvancedFeatureEngineer') as mock_fe:
|
| 398 |
+
pipeline = retrainer.create_preprocessing_pipeline()
|
| 399 |
+
|
| 400 |
+
assert isinstance(pipeline, Pipeline)
|
| 401 |
+
step_names = [name for name, _ in pipeline.steps]
|
| 402 |
+
|
| 403 |
+
# Should have enhanced pipeline steps
|
| 404 |
+
expected_steps = ['enhanced_features', 'model']
|
| 405 |
+
assert step_names == expected_steps
|
| 406 |
+
|
| 407 |
+
# Should create feature engineer with correct parameters
|
| 408 |
+
mock_fe.assert_called_once()
|
| 409 |
+
call_kwargs = mock_fe.call_args[1]
|
| 410 |
+
assert call_kwargs['feature_selection_k'] == retrainer.feature_selection_k
|
| 411 |
+
assert call_kwargs['tfidf_max_features'] == retrainer.max_features
|
| 412 |
+
|
| 413 |
+
def test_hyperparameter_tuning_small_dataset(self, retrainer):
|
| 414 |
+
"""Test hyperparameter tuning with very small dataset"""
|
| 415 |
+
# Create minimal dataset that should skip tuning
|
| 416 |
+
X = np.random.randn(15, 5)
|
| 417 |
+
y = np.random.randint(0, 2, 15)
|
| 418 |
+
|
| 419 |
+
pipeline = retrainer.create_preprocessing_pipeline()
|
| 420 |
+
|
| 421 |
+
best_model, results = retrainer.hyperparameter_tuning_with_cv(
|
| 422 |
+
pipeline, X, y, 'logistic_regression'
|
| 423 |
+
)
|
| 424 |
+
|
| 425 |
+
# Should skip tuning and use default parameters
|
| 426 |
+
assert 'note' in results
|
| 427 |
+
assert 'skipped' in results['note'].lower()
|
| 428 |
+
assert results['best_params'] == 'default_parameters'
|
| 429 |
+
assert best_model is not None
|
| 430 |
+
|
| 431 |
+
def test_detect_production_feature_type(self, retrainer, temp_dir):
|
| 432 |
+
"""Test production model feature type detection"""
|
| 433 |
+
# Test with no existing model
|
| 434 |
+
feature_type = retrainer.detect_production_feature_type()
|
| 435 |
+
assert feature_type in ['standard_tfidf', 'unknown']
|
| 436 |
+
|
| 437 |
+
# Test with metadata indicating enhanced features
|
| 438 |
+
metadata = {
|
| 439 |
+
'feature_engineering': {
|
| 440 |
+
'type': 'enhanced'
|
| 441 |
+
}
|
| 442 |
+
}
|
| 443 |
+
with open(retrainer.metadata_path, 'w') as f:
|
| 444 |
+
json.dump(metadata, f)
|
| 445 |
+
|
| 446 |
+
feature_type = retrainer.detect_production_feature_type()
|
| 447 |
+
assert feature_type == 'enhanced'
|
| 448 |
+
|
| 449 |
+
def test_error_handling_invalid_data(self, retrainer, temp_dir):
|
| 450 |
+
"""Test error handling with invalid data scenarios"""
|
| 451 |
+
# Test with no data files
|
| 452 |
+
success, df, message = retrainer.load_new_data()
|
| 453 |
+
assert success == False
|
| 454 |
+
assert 'No data files found' in message
|
| 455 |
+
|
| 456 |
+
# Test with empty dataset
|
| 457 |
+
empty_df = pd.DataFrame({'text': [], 'label': []})
|
| 458 |
+
empty_path = temp_dir / "data" / "combined_dataset.csv"
|
| 459 |
+
empty_path.parent.mkdir(exist_ok=True)
|
| 460 |
+
empty_df.to_csv(empty_path, index=False)
|
| 461 |
+
|
| 462 |
+
success, df, message = retrainer.load_new_data()
|
| 463 |
+
assert success == False
|
| 464 |
+
assert 'Insufficient data' in message
|
| 465 |
+
|
| 466 |
+
|
| 467 |
+
class TestIntegration:
|
| 468 |
+
"""Integration tests for complete retraining workflow"""
|
| 469 |
+
|
| 470 |
+
@pytest.fixture
|
| 471 |
+
def complete_setup(self):
|
| 472 |
+
"""Set up complete testing environment"""
|
| 473 |
+
with tempfile.TemporaryDirectory() as temp_dir:
|
| 474 |
+
temp_path = Path(temp_dir)
|
| 475 |
+
|
| 476 |
+
# Create retrainer
|
| 477 |
+
retrainer = EnhancedModelRetrainer()
|
| 478 |
+
retrainer.base_dir = temp_path
|
| 479 |
+
retrainer.setup_paths()
|
| 480 |
+
|
| 481 |
+
# Create sample data
|
| 482 |
+
data = pd.DataFrame({
|
| 483 |
+
'text': [
|
| 484 |
+
f'Real news article number {i} with substantial content for testing.'
|
| 485 |
+
for i in range(30)
|
| 486 |
+
] + [
|
| 487 |
+
f'Fake news article number {i} with misleading information and content.'
|
| 488 |
+
for i in range(30)
|
| 489 |
+
],
|
| 490 |
+
'label': [0] * 30 + [1] * 30
|
| 491 |
+
})
|
| 492 |
+
|
| 493 |
+
data.to_csv(retrainer.combined_data_path, index=False)
|
| 494 |
+
|
| 495 |
+
# Create mock production model
|
| 496 |
+
mock_model = Pipeline([
|
| 497 |
+
('vectorize', Mock()),
|
| 498 |
+
('model', LogisticRegression(random_state=42))
|
| 499 |
+
])
|
| 500 |
+
joblib.dump(mock_model, retrainer.prod_pipeline_path)
|
| 501 |
+
|
| 502 |
+
yield retrainer, data
|
| 503 |
+
|
| 504 |
+
def test_end_to_end_retraining_workflow(self, complete_setup):
|
| 505 |
+
"""Test complete retraining workflow"""
|
| 506 |
+
retrainer, data = complete_setup
|
| 507 |
+
|
| 508 |
+
# Disable ensemble for faster testing
|
| 509 |
+
retrainer.enable_ensemble = False
|
| 510 |
+
retrainer.use_enhanced_features = False
|
| 511 |
+
|
| 512 |
+
# Should complete without errors
|
| 513 |
+
success, message = retrainer.retrain_model()
|
| 514 |
+
|
| 515 |
+
# Should either promote or keep current model
|
| 516 |
+
assert success == True
|
| 517 |
+
assert 'enhanced' in message.lower() or 'keeping' in message.lower() or 'promoted' in message.lower()
|
| 518 |
+
|
| 519 |
+
# Should create proper logs
|
| 520 |
+
assert retrainer.retraining_log_path.exists()
|
| 521 |
+
|
| 522 |
+
@patch('model.retrain.ENHANCED_FEATURES_AVAILABLE', True)
|
| 523 |
+
def test_ensemble_selection_workflow(self, complete_setup):
|
| 524 |
+
"""Test ensemble selection in complete workflow"""
|
| 525 |
+
retrainer, data = complete_setup
|
| 526 |
+
|
| 527 |
+
# Enable ensemble and enhanced features (mocked)
|
| 528 |
+
retrainer.enable_ensemble = True
|
| 529 |
+
retrainer.use_enhanced_features = False # Keep False to avoid import issues
|
| 530 |
+
|
| 531 |
+
with patch.object(retrainer, 'train_and_evaluate_models') as mock_train:
|
| 532 |
+
# Mock successful training with ensemble selection
|
| 533 |
+
mock_results = {
|
| 534 |
+
'logistic_regression': {
|
| 535 |
+
'model': Mock(),
|
| 536 |
+
'tuning_results': {
|
| 537 |
+
'cross_validation': {
|
| 538 |
+
'test_scores': {'f1': {'mean': 0.75}}
|
| 539 |
+
}
|
| 540 |
+
}
|
| 541 |
+
},
|
| 542 |
+
'random_forest': {
|
| 543 |
+
'model': Mock(),
|
| 544 |
+
'tuning_results': {
|
| 545 |
+
'cross_validation': {
|
| 546 |
+
'test_scores': {'f1': {'mean': 0.77}}
|
| 547 |
+
}
|
| 548 |
+
}
|
| 549 |
+
},
|
| 550 |
+
'lightgbm': {
|
| 551 |
+
'model': Mock(),
|
| 552 |
+
'tuning_results': {
|
| 553 |
+
'cross_validation': {
|
| 554 |
+
'test_scores': {'f1': {'mean': 0.76}}
|
| 555 |
+
}
|
| 556 |
+
}
|
| 557 |
+
},
|
| 558 |
+
'ensemble': {
|
| 559 |
+
'model': Mock(),
|
| 560 |
+
'statistical_comparison': {
|
| 561 |
+
'ensemble_recommendation': {'use_ensemble': True, 'confidence': 0.85}
|
| 562 |
+
}
|
| 563 |
+
}
|
| 564 |
+
}
|
| 565 |
+
mock_train.return_value = mock_results
|
| 566 |
+
|
| 567 |
+
# Test model selection
|
| 568 |
+
best_name, best_model, best_metrics = retrainer.select_best_model(mock_results)
|
| 569 |
+
|
| 570 |
+
# Should select ensemble when recommended
|
| 571 |
+
assert best_name == 'ensemble'
|
| 572 |
+
assert best_model == mock_results['ensemble']['model']
|
| 573 |
+
|
| 574 |
+
|
| 575 |
+
class TestAutomatedRetrainingManager:
|
| 576 |
+
"""Test automated retraining management"""
|
| 577 |
+
|
| 578 |
+
@pytest.fixture
|
| 579 |
+
def automation_manager(self):
|
| 580 |
+
"""Create automation manager for testing"""
|
| 581 |
+
with tempfile.TemporaryDirectory() as temp_dir:
|
| 582 |
+
manager = AutomatedRetrainingManager(base_dir=Path(temp_dir))
|
| 583 |
+
yield manager
|
| 584 |
+
|
| 585 |
+
def test_initialization(self, automation_manager):
|
| 586 |
+
"""Test automation manager initialization"""
|
| 587 |
+
assert automation_manager.enhanced_features_available is not None
|
| 588 |
+
assert automation_manager.automation_dir.exists()
|
| 589 |
+
assert hasattr(automation_manager, 'drift_monitor')
|
| 590 |
+
|
| 591 |
+
def test_manual_retraining_trigger(self, automation_manager):
|
| 592 |
+
"""Test manual retraining trigger functionality"""
|
| 593 |
+
with patch.object(EnhancedModelRetrainer, 'automated_retrain_with_validation') as mock_retrain:
|
| 594 |
+
mock_retrain.return_value = (True, "Retraining completed successfully")
|
| 595 |
+
|
| 596 |
+
result = automation_manager.trigger_manual_retraining("test_reason")
|
| 597 |
+
|
| 598 |
+
assert result['success'] == True
|
| 599 |
+
assert 'enhanced' in result['message'].lower()
|
| 600 |
+
mock_retrain.assert_called_once()
|
| 601 |
+
|
| 602 |
+
|
| 603 |
+
# Performance and Resource Tests
|
| 604 |
+
class TestPerformanceConstraints:
|
| 605 |
+
"""Test performance under CPU constraints (HuggingFace Spaces)"""
|
| 606 |
+
|
| 607 |
+
def test_cpu_optimization_settings(self):
|
| 608 |
+
"""Test all models use CPU-friendly settings"""
|
| 609 |
+
retrainer = EnhancedModelRetrainer()
|
| 610 |
+
|
| 611 |
+
for model_name, config in retrainer.models.items():
|
| 612 |
+
model = config['model']
|
| 613 |
+
|
| 614 |
+
# Check n_jobs setting for models that support it
|
| 615 |
+
if hasattr(model, 'n_jobs'):
|
| 616 |
+
assert model.n_jobs == 1, f"{model_name} should use n_jobs=1 for CPU optimization"
|
| 617 |
+
|
| 618 |
+
# Check LightGBM specific settings
|
| 619 |
+
if isinstance(model, lgb.LGBMClassifier):
|
| 620 |
+
assert model.n_estimators <= 100, "LightGBM should use reasonable n_estimators for CPU"
|
| 621 |
+
assert model.num_leaves <= 31, "LightGBM should use reasonable num_leaves for CPU"
|
| 622 |
+
assert model.verbose == -1, "LightGBM should suppress verbose output"
|
| 623 |
+
|
| 624 |
+
def test_memory_efficient_processing(self):
|
| 625 |
+
"""Test memory-efficient data processing"""
|
| 626 |
+
retrainer = EnhancedModelRetrainer()
|
| 627 |
+
|
| 628 |
+
# Test with reasonably sized dataset
|
| 629 |
+
large_data = pd.DataFrame({
|
| 630 |
+
'text': ['Sample text for testing memory efficiency'] * 1000,
|
| 631 |
+
'label': np.random.randint(0, 2, 1000)
|
| 632 |
+
})
|
| 633 |
+
|
| 634 |
+
# Should handle without memory issues
|
| 635 |
+
cleaned_data = retrainer.clean_and_validate_data(large_data)
|
| 636 |
+
assert len(cleaned_data) <= len(large_data)
|
| 637 |
+
|
| 638 |
+
# Check feature selection limits
|
| 639 |
+
assert retrainer.feature_selection_k <= retrainer.max_features
|
| 640 |
+
assert retrainer.max_features <= 7500 # Reasonable limit for CPU constraints
|
| 641 |
+
|
| 642 |
+
|
| 643 |
+
if __name__ == "__main__":
|
| 644 |
+
# Run tests with verbose output
|
| 645 |
+
pytest.main([__file__, "-v", "--tb=short"])
|