Spaces:

Ahmedik95316
/

Fake-News-Detection-with-MLOps

Running

App Files Files Community

Ahmedik95316 commited on Aug 21

Commit

cecd6fa

1 Parent(s): 92a45c5

Update app/streamlit_app.py

Browse files

Adding Data Validation Schemas

Files changed (1) hide show

app/streamlit_app.py +77 -191

app/streamlit_app.py CHANGED Viewed

@@ -199,6 +199,17 @@ class StreamlitAppManager:
             logger.warning(f"Could not fetch validation health: {e}")
             return None
 # Initialize app manager
 app_manager = StreamlitAppManager()
@@ -539,6 +550,63 @@ def render_cv_results_section():
         error_msg = cv_results.get('error', 'Unknown error') if cv_results else 'No CV results available'
         st.warning(f"Cross-validation results not available: {error_msg}")
 def render_model_comparison_section():
     """Render model comparison results section"""
     st.subheader("⚖️ Model Comparison Results")
@@ -1431,12 +1499,10 @@ def main():
     with tab5:
         show_logs_section()
     # Tab 6: System Status
     with tab6:
         render_system_status()
 def render_system_status():
     """Render system status tab"""
     st.header("System Status & Monitoring")
@@ -1460,7 +1526,7 @@ def render_system_status():
     with col1:
         st.write(f"**Environment:** {env_info['environment']}")
         st.write(f"**Base Directory:** {env_info['base_dir']}")
-        st.write(f"**Working Directory:** {env_info['current_working_directory']}")
     with col2:
         st.write(f"**Data Directory:** {env_info['data_dir']}")
@@ -1484,13 +1550,11 @@ def render_system_status():
                 else:
                     st.error("🔴 System Status: Unhealthy")
-                # Detailed health metrics
                 col1, col2, col3 = st.columns(3)
                 with col1:
                     st.subheader("🤖 Model Health")
                     model_health = health_data.get('model_health', {})
                     for key, value in model_health.items():
                         if key not in ['test_prediction', 'model_path', 'data_path', 'environment']:
                             display_key = key.replace('_', ' ').title()
@@ -1500,57 +1564,22 @@ def render_system_status():
                             else:
                                 st.write(f"**{display_key}:** {value}")
-                with col2:
-                    st.subheader("💻 System Resources")
-                    system_health = health_data.get('system_health', {})
-                    for key, value in system_health.items():
-                        if isinstance(value, (int, float)):
-                            st.metric(key.replace('_', ' ').title(),
-                                      f"{value:.1f}%")
-                with col3:
-                    st.subheader("🔗 API Health")
-                    api_health = health_data.get('api_health', {})
-                    for key, value in api_health.items():
-                        st.write(
-                            f"**{key.replace('_', ' ').title()}:** {value}")
-                # Environment details from API
-                env_data = health_data.get('environment_info', {})
-                if env_data:
-                    st.subheader("📊 File Availability")
-                    col1, col2 = st.columns(2)
-                    with col1:
-                        st.write("**Datasets:**")
-                        datasets = env_data.get('available_datasets', {})
-                        for name, exists in datasets.items():
-                            status = "✅" if exists else "❌"
-                            st.write(f"{status} {name}")
-                    with col2:
-                        st.write("**Models:**")
-                        models = env_data.get('available_models', {})
-                        for name, exists in models.items():
-                            status = "✅" if exists else "❌"
-                            st.write(f"{status} {name}")
         except Exception as e:
             st.error(f"Failed to get health status: {e}")
     else:
         st.error("🔴 API Service is not available")
     # Model information
     st.subheader("🎯 Model Information")
     metadata = load_json_file(path_manager.get_metadata_path(), {})
     if metadata:
         col1, col2 = st.columns(2)
         with col1:
             for key in ['model_version', 'test_accuracy', 'test_f1', 'model_type']:
                 if key in metadata:
@@ -1560,7 +1589,6 @@ def render_system_status():
                         st.metric(display_key, f"{value:.4f}")
                     else:
                         st.metric(display_key, str(value))
         with col2:
             for key in ['train_size', 'timestamp', 'environment']:
                 if key in metadata:
@@ -1568,114 +1596,14 @@ def render_system_status():
                     value = metadata[key]
                     if key == 'timestamp':
                         try:
-                            dt = datetime.fromisoformat(
-                                value.replace('Z', '+00:00'))
                             value = dt.strftime('%Y-%m-%d %H:%M:%S')
                         except:
                             pass
                     st.write(f"**{display_key}:** {value}")
     else:
         st.warning("No model metadata available")
-    st.divider()
-    show_validation_status()
-    # Recent activity
-    st.subheader("📜 Recent Activity")
-    activity_log = load_json_file(path_manager.get_activity_log_path(), [])
-    if activity_log:
-        recent_activities = activity_log[-10:] if len(
-            activity_log) > 10 else activity_log
-        for entry in reversed(recent_activities):
-            timestamp = entry.get('timestamp', 'Unknown')
-            event = entry.get('event', 'Unknown event')
-            level = entry.get('level', 'INFO')
-            if level == 'ERROR':
-                st.error(f"🔴 {timestamp} - {event}")
-            elif level == 'WARNING':
-                st.warning(f"🟡 {timestamp} - {event}")
-            else:
-                st.info(f"🔵 {timestamp} - {event}")
-    else:
-        st.info("No recent activity logs found")
-    # File system status
-    st.subheader("📁 File System Status")
-    critical_files = [
-        (path_manager.get_model_file_path(), "Main Model"),
-        (path_manager.get_vectorizer_path(), "Vectorizer"),
-        (path_manager.get_combined_dataset_path(), "Training Dataset"),
-        (path_manager.get_metadata_path(), "Model Metadata")
-    ]
-    col1, col2 = st.columns(2)
-    with col1:
-        st.write("**Critical Files:**")
-        for file_path, description in critical_files:
-            if file_path.exists():
-                st.success(f"✅ {description}")
-            else:
-                st.error(f"❌ {description}")
-    with col2:
-        # Disk usage information
-        try:
-            import shutil
-            # Check disk usage for the base directory
-            base_path = path_manager.base_paths['base']
-            total, used, free = shutil.disk_usage(base_path)
-            st.write("**Disk Usage:**")
-            st.write(f"Total: {total // (1024**3)} GB")
-            st.write(f"Used: {used // (1024**3)} GB")
-            st.write(f"Free: {free // (1024**3)} GB")
-            usage_percent = (used / total) * 100
-            if usage_percent > 90:
-                st.error(f"⚠️ Disk usage: {usage_percent:.1f}%")
-            elif usage_percent > 75:
-                st.warning(f"⚠️ Disk usage: {usage_percent:.1f}%")
-            else:
-                st.success(f"✅ Disk usage: {usage_percent:.1f}%")
-        except Exception as e:
-            st.error(f"Cannot check disk usage: {e}")
-    # Initialize system button
-    if st.button("🔧 Initialize System", help="Run system initialization if components are missing"):
-        with st.spinner("Running system initialization..."):
-            try:
-                result = subprocess.run(
-                    [sys.executable, str(path_manager.base_paths['base'] / "initialize_system.py")],
-                    capture_output=True,
-                    text=True,
-                    timeout=300,
-                    cwd=str(path_manager.base_paths['base'])
-                )
-                if result.returncode == 0:
-                    st.success(
-                        "✅ System initialization completed successfully!")
-                    st.code(result.stdout)
-                    time.sleep(2)
-                    st.rerun()
-                else:
-                    st.error("❌ System initialization failed")
-                    st.code(result.stderr)
-            except subprocess.TimeoutExpired:
-                st.error("⏰ Initialization timed out")
-            except Exception as e:
-                st.error(f"❌ Initialization error: {e}")
 # Auto-refresh logic
 if st.session_state.auto_refresh:
@@ -1683,48 +1611,6 @@ if st.session_state.auto_refresh:
     if time_since_refresh > timedelta(seconds=app_manager.config['refresh_interval']):
         st.session_state.last_refresh = datetime.now()
         st.rerun()
-def show_validation_status():
-    """Display validation system status"""
-    st.subheader("Data Validation Status")
-    validation_health = app_manager.get_validation_health_from_api()
-    validation_stats = app_manager.get_validation_statistics_from_api()
-    if validation_health:
-        health_data = validation_health.get('validation_health', {})
-        overall_status = health_data.get('overall_status', 'unknown')
-        if overall_status == 'healthy':
-            st.success("Validation System: Healthy")
-        elif overall_status == 'degraded':
-            st.warning("Validation System: Degraded")
-        else:
-            st.error("Validation System: Unhealthy")
-    if validation_stats and validation_stats.get('statistics_available'):
-        overall_metrics = validation_stats.get('overall_metrics', {})
-        col1, col2, col3, col4 = st.columns(4)
-        with col1:
-            st.metric("Total Validations", overall_metrics.get('total_validations', 0))
-        with col2:
-            st.metric("Articles Processed", overall_metrics.get('total_articles_processed', 0))
-        with col3:
-            success_rate = overall_metrics.get('overall_success_rate', 0)
-            st.metric("Success Rate", f"{success_rate:.1%}")
-        with col4:
-            quality_score = overall_metrics.get('average_quality_score', 0)
-            st.metric("Quality Score", f"{quality_score:.3f}")
-    else:
-        st.info("No validation statistics available yet")
 # Run main application
 if __name__ == "__main__":

             logger.warning(f"Could not fetch validation health: {e}")
             return None
+    def get_validation_quality_report_from_api(self):
+        """Get validation quality report from API"""
+        try:
+            if not self.api_available:
+                return None
+            response = self.session.get(f"{self.config['api_url']}/validation/quality-report", timeout=10)
+            return response.json() if response.status_code == 200 else None
+        except Exception as e:
+            logger.warning(f"Could not fetch quality report: {e}")
+            return None
 # Initialize app manager
 app_manager = StreamlitAppManager()
         error_msg = cv_results.get('error', 'Unknown error') if cv_results else 'No CV results available'
         st.warning(f"Cross-validation results not available: {error_msg}")
+def render_validation_statistics_section():
+    """Render validation statistics section"""
+    st.subheader("📊 Data Validation Statistics")
+    validation_stats = app_manager.get_validation_statistics_from_api()
+    if validation_stats and validation_stats.get('statistics_available'):
+        overall_metrics = validation_stats.get('overall_metrics', {})
+        col1, col2, col3, col4 = st.columns(4)
+        with col1:
+            st.metric("Total Validations", overall_metrics.get('total_validations', 0))
+        with col2:
+            st.metric("Articles Processed", overall_metrics.get('total_articles_processed', 0))
+        with col3:
+            success_rate = overall_metrics.get('overall_success_rate', 0)
+            st.metric("Success Rate", f"{success_rate:.1%}")
+        with col4:
+            quality_score = overall_metrics.get('average_quality_score', 0)
+            st.metric("Avg Quality", f"{quality_score:.3f}")
+    else:
+        st.info("No validation statistics available yet.")
+def render_validation_quality_report():
+    """Render validation quality report section"""
+    st.subheader("📋 Data Quality Report")
+    quality_report = app_manager.get_validation_quality_report_from_api()
+    if quality_report and 'error' not in quality_report:
+        overall_stats = quality_report.get('overall_statistics', {})
+        quality_assessment = quality_report.get('quality_assessment', {})
+        col1, col2 = st.columns(2)
+        with col1:
+            st.metric("Total Articles", overall_stats.get('total_articles', 0))
+            st.metric("Success Rate", f"{overall_stats.get('overall_success_rate', 0):.1%}")
+        with col2:
+            quality_level = quality_assessment.get('quality_level', 'unknown')
+            if quality_level == 'excellent':
+                st.success(f"Quality Level: {quality_level.title()}")
+            elif quality_level == 'good':
+                st.info(f"Quality Level: {quality_level.title()}")
+            elif quality_level == 'fair':
+                st.warning(f"Quality Level: {quality_level.title()}")
+            else:
+                st.error(f"Quality Level: {quality_level.title()}")
+        recommendations = quality_report.get('recommendations', [])
+        if recommendations:
+            st.subheader("💡 Recommendations")
+            for i, rec in enumerate(recommendations, 1):
+                st.write(f"{i}. {rec}")
+    else:
+        st.info("Quality report not available yet.")
 def render_model_comparison_section():
     """Render model comparison results section"""
     st.subheader("⚖️ Model Comparison Results")
     with tab5:
         show_logs_section()
     # Tab 6: System Status
     with tab6:
         render_system_status()
 def render_system_status():
     """Render system status tab"""
     st.header("System Status & Monitoring")
     with col1:
         st.write(f"**Environment:** {env_info['environment']}")
         st.write(f"**Base Directory:** {env_info['base_dir']}")
+        st.write(f"**Working Directory:** {env_info.get('current_working_directory', 'N/A')}")
     with col2:
         st.write(f"**Data Directory:** {env_info['data_dir']}")
                 else:
                     st.error("🔴 System Status: Unhealthy")
+                # Basic health display
                 col1, col2, col3 = st.columns(3)
                 with col1:
                     st.subheader("🤖 Model Health")
                     model_health = health_data.get('model_health', {})
                     for key, value in model_health.items():
                         if key not in ['test_prediction', 'model_path', 'data_path', 'environment']:
                             display_key = key.replace('_', ' ').title()
                             else:
                                 st.write(f"**{display_key}:** {value}")
         except Exception as e:
             st.error(f"Failed to get health status: {e}")
     else:
         st.error("🔴 API Service is not available")
+    # Add the validation sections as specified in the document
+    st.divider()
+    render_validation_statistics_section()
+    st.divider()
+    render_validation_quality_report()
     # Model information
     st.subheader("🎯 Model Information")
     metadata = load_json_file(path_manager.get_metadata_path(), {})
     if metadata:
         col1, col2 = st.columns(2)
         with col1:
             for key in ['model_version', 'test_accuracy', 'test_f1', 'model_type']:
                 if key in metadata:
                         st.metric(display_key, f"{value:.4f}")
                     else:
                         st.metric(display_key, str(value))
         with col2:
             for key in ['train_size', 'timestamp', 'environment']:
                 if key in metadata:
                     value = metadata[key]
                     if key == 'timestamp':
                         try:
+                            dt = datetime.fromisoformat(value.replace('Z', '+00:00'))
                             value = dt.strftime('%Y-%m-%d %H:%M:%S')
                         except:
                             pass
                     st.write(f"**{display_key}:** {value}")
     else:
         st.warning("No model metadata available")
 # Auto-refresh logic
 if st.session_state.auto_refresh:
     if time_since_refresh > timedelta(seconds=app_manager.config['refresh_interval']):
         st.session_state.last_refresh = datetime.now()
         st.rerun()
 # Run main application
 if __name__ == "__main__":