Spaces:

Trainera
/

foodrecognitionapi

Sleeping

App Files Files Community

har1zarD commited on Oct 31

Commit

9ccc31e

1 Parent(s): 213ba68

readme

Browse files

Files changed (3) hide show

Dockerfile +51 -19
app.py +544 -80
requirements.txt +3 -0

Dockerfile CHANGED Viewed

@@ -1,14 +1,22 @@
-# Advanced Food Recognition API - Optimized for HF Spaces
 FROM python:3.11-slim
-# Create user for Hugging Face Spaces
 RUN useradd -m -u 1000 user
 # Set working directory
 WORKDIR /app
-# Install system dependencies for advanced image processing
-RUN apt-get update && apt-get install -y \
     gcc \
     g++ \
     libglib2.0-0 \
@@ -19,30 +27,48 @@ RUN apt-get update && apt-get install -y \
     libgl1-mesa-dev \
     libglib2.0-dev \
     curl \
-    && rm -rf /var/lib/apt/lists/*
-# Copy requirements first (for better caching)
 COPY --chown=user:user requirements.txt .
-# Install NumPy 1.x first to ensure compatibility
-RUN pip install --no-cache-dir "numpy>=1.24.0,<2.0.0"
-# Install remaining Python dependencies as root
 RUN pip install --no-cache-dir -r requirements.txt
-# Copy application code with correct ownership
-COPY --chown=user:user app.py app.py
-# Create cache directory with correct permissions
-RUN mkdir -p /home/user/.cache /tmp/transformers /tmp/huggingface /tmp/torch && chown -R user:user /home/user/.cache /tmp/transformers /tmp/huggingface /tmp/torch
-# Switch to non-root user
 USER user
-# Set environment variables
 ENV PYTHONUNBUFFERED=1
 ENV PORT=7860
 ENV HOME=/home/user
 ENV HF_HOME=/tmp/huggingface
 ENV TRANSFORMERS_CACHE=/tmp/transformers
 ENV XDG_CACHE_HOME=/tmp
@@ -50,17 +76,23 @@ ENV TORCH_HOME=/tmp/torch
 ENV HF_HUB_DISABLE_TELEMETRY=1
 ENV HF_HUB_ENABLE_HF_TRANSFER=0
 # Performance optimizations
 ENV TOKENIZERS_PARALLELISM=false
 ENV OMP_NUM_THREADS=2
 ENV MKL_NUM_THREADS=2
-# Expose port (7860 for Hugging Face Spaces)
 EXPOSE 7860
-# Health check for container monitoring
-HEALTHCHECK --interval=30s --timeout=10s --start-period=60s --retries=3 \
     CMD curl -f http://localhost:7860/health || exit 1
-# Run the optimized food recognition API
 CMD ["python", "app.py"]

+# ============================================================
+# 🍽️ Trainera Food Recognition API
+# Production-Ready Multilingual AI Food Recognition
+# ============================================================
 FROM python:3.11-slim
+# Metadata
+LABEL maintainer="Trainera Team"
+LABEL description="AI Food Recognition API with OpenAI translations (101+ food categories)"
+LABEL version="2.0.0"
+# Create non-root user for security (HF Spaces requirement)
 RUN useradd -m -u 1000 user
 # Set working directory
 WORKDIR /app
+# Install system dependencies for ML and image processing
+RUN apt-get update && apt-get install -y --no-install-recommends \
     gcc \
     g++ \
     libglib2.0-0 \
     libgl1-mesa-dev \
     libglib2.0-dev \
     curl \
+    && rm -rf /var/lib/apt/lists/* \
+    && apt-get clean
+# Copy requirements first (Docker layer caching optimization)
 COPY --chown=user:user requirements.txt .
+# Install Python dependencies
+# Step 1: Install NumPy 1.x first (transformers compatibility)
+RUN pip install --no-cache-dir --upgrade pip && \
+    pip install --no-cache-dir "numpy>=1.24.0,<2.0.0"
+# Step 2: Install remaining dependencies
 RUN pip install --no-cache-dir -r requirements.txt
+# Copy application code
+COPY --chown=user:user app.py .
+# Create cache directories with correct permissions
+RUN mkdir -p \
+    /home/user/.cache \
+    /tmp/transformers \
+    /tmp/huggingface \
+    /tmp/torch \
+    && chown -R user:user /home/user/.cache /tmp/transformers /tmp/huggingface /tmp/torch
+# Switch to non-root user (security best practice)
 USER user
+# Environment Variables
+# ============================================================
+# Python configuration
 ENV PYTHONUNBUFFERED=1
+ENV PYTHONDONTWRITEBYTECODE=1
+# Port configuration (7860 = HF Spaces standard)
 ENV PORT=7860
+# User home
 ENV HOME=/home/user
+# Hugging Face cache directories
 ENV HF_HOME=/tmp/huggingface
 ENV TRANSFORMERS_CACHE=/tmp/transformers
 ENV XDG_CACHE_HOME=/tmp
 ENV HF_HUB_DISABLE_TELEMETRY=1
 ENV HF_HUB_ENABLE_HF_TRANSFER=0
+# OpenAI API Key (set via HF Spaces secrets or docker run -e)
+ENV OPENAI_API_KEY=${OPENAI_API_KEY:-}
+# USDA API Keys (optional - defaults to DEMO_KEY)
+ENV USDA_API_KEY=${USDA_API_KEY:-DEMO_KEY}
 # Performance optimizations
 ENV TOKENIZERS_PARALLELISM=false
 ENV OMP_NUM_THREADS=2
 ENV MKL_NUM_THREADS=2
+# Expose port
 EXPOSE 7860
+# Health check (monitors API health every 30s)
+HEALTHCHECK --interval=30s --timeout=10s --start-period=90s --retries=3 \
     CMD curl -f http://localhost:7860/health || exit 1
+# Run the application
 CMD ["python", "app.py"]

app.py CHANGED Viewed

@@ -19,13 +19,25 @@ import aiohttp
 import re
 from typing import Dict, Any, List, Optional
 from io import BytesIO
 import torch
 import torch.nn.functional as F
 from PIL import Image, ImageEnhance
 import numpy as np
-from fastapi import FastAPI, File, UploadFile, HTTPException, Request
 from fastapi.middleware.cors import CORSMiddleware
 from fastapi.responses import JSONResponse
 import uvicorn
@@ -33,11 +45,18 @@ import uvicorn
 from transformers import AutoImageProcessor, AutoModelForImageClassification
 from contextlib import asynccontextmanager
 # ==================== CONFIGURATION ====================
 MAX_FILE_SIZE = 10 * 1024 * 1024  # 10MB
 MAX_IMAGE_SIZE = 512
 ALLOWED_TYPES = ["image/jpeg", "image/jpg", "image/png", "image/webp"]
 # ==================== MULTI-MODEL FOOD RECOGNITION ====================
 FOOD_MODELS = {
     # Primary specialize food models
@@ -86,9 +105,9 @@ PRIMARY_MODEL = "food101"
 COMPREHENSIVE_FOOD_CATEGORIES = {
     # Food-101 categories
     "pizza", "hamburger", "sushi", "ice_cream", "french_fries", "chicken_wings",
-    "chocolate_cake", "caesar_salad", "steak", "tacos", "pancakes", "lasagna",
-    "apple_pie", "chicken_curry", "pad_thai", "ramen", "waffles", "donuts",
-    "cheesecake", "fish_and_chips", "fried_rice", "greek_salad", "guacamole",
     # Balkanska/Srpska tradicionalna jela
     "cevapi", "cevapcici", "burek", "pljeskavica", "sarma", "klepe", "dolma",
@@ -365,10 +384,19 @@ def clean_food_name_for_search(raw_name: str) -> str:
     """Smart cleaning of Food-101 names for better API searches."""
     # Remove underscores and replace with spaces
     cleaned = raw_name.replace("_", " ")
     # Remove common Food-101 artifacts
     cleaned = re.sub(r'\b(and|with|the|a)\b', ' ', cleaned, flags=re.IGNORECASE)
     # Handle specific Food-101 patterns
     replacements = {
         "cup cakes": "cupcakes",
@@ -378,15 +406,16 @@ def clean_food_name_for_search(raw_name: str) -> str:
         "shrimp and grits": "shrimp grits",
         "macaroni and cheese": "mac and cheese"
     }
     for old, new in replacements.items():
         if old in cleaned.lower():
             cleaned = new
             break
-    # Clean whitespace
     cleaned = re.sub(r'\s+', ' ', cleaned).strip()
     return cleaned
 async def search_openfoodfacts_nutrition(food_name: str) -> Optional[Dict[str, Any]]:
@@ -472,9 +501,9 @@ async def get_nutrition_from_apis(food_name: str) -> Dict[str, Any]:
     """Get nutrition data from multiple FREE databases with comprehensive fallback."""
     # Clean the Food-101 name for better searches
     cleaned_name = clean_food_name_for_search(food_name)
     logger.info(f"🔍 Searching nutrition for: '{food_name}' → '{cleaned_name}'")
     # Try APIs in order: Free/Unlimited first, then limited APIs
     nutrition_sources = [
         ("OpenFoodFacts", search_openfoodfacts_nutrition),  # FREE, 2M+ products
@@ -483,23 +512,238 @@ async def get_nutrition_from_apis(food_name: str) -> Dict[str, Any]:
         ("Edamam", search_edamam_nutrition),                # 1000/month limit
         ("Spoonacular", search_spoonacular_nutrition)       # 150/day limit
     ]
     for source_name, search_func in nutrition_sources:
         try:
             nutrition_data = await search_func(cleaned_name)
             if nutrition_data and nutrition_data.get("calories", 0) > 0:
                 nutrition_data["source"] = source_name
                 return nutrition_data
         except Exception as e:
-            logger.warning(f"⚠️ {source_name} search failed: {e}")
             continue
     # All APIs failed, return default values
-    logger.warning(f"🚨 No nutrition data found for '{cleaned_name}', using defaults")
     default_nutrition = DEFAULT_NUTRITION.copy()
     default_nutrition["source"] = "Default (APIs unavailable)"
     return default_nutrition
 # ==================== MULTI-MODEL FOOD RECOGNIZER ====================
 class MultiModelFoodRecognizer:
     """Production-ready multi-model ensemble for comprehensive food recognition."""
@@ -657,84 +901,186 @@ class MultiModelFoodRecognizer:
         """Main predict method - uses ensemble if available, fallback to primary."""
         return self.predict_ensemble(image, top_k)
-    def predict_ensemble(self, image: Image.Image, top_k: int = 5) -> Dict[str, Any]:
-        """Ensemble prediction using all available models."""
         if not self.is_loaded:
             raise RuntimeError("Models not loaded")
         all_predictions = []
         model_results = {}
-        # Get predictions from all models
         for model_key in self.available_models:
-            predictions = self._predict_with_model(image, model_key, top_k)
             if predictions:
                 model_results[model_key] = predictions
                 all_predictions.extend(predictions)
         if not all_predictions:
             raise RuntimeError("No models produced valid predictions")
         # Ensemble voting: weight by model priority and confidence
         food_scores = {}
         for pred in all_predictions:
             model_key = pred["model"]
             priority_weight = 1.0 / FOOD_MODELS[model_key]["priority"]  # Higher priority = lower number = higher weight
             confidence_weight = pred["confidence"]
-            # Combined score
-            combined_score = priority_weight * confidence_weight
             food_name = pred["raw_label"]
             if food_name not in food_scores:
                 food_scores[food_name] = {
                     "total_score": 0,
                     "count": 0,
                     "best_prediction": pred,
-                    "models": []
                 }
             food_scores[food_name]["total_score"] += combined_score
             food_scores[food_name]["count"] += 1
             food_scores[food_name]["models"].append(model_key)
             # Keep the prediction with highest confidence as representative
             if pred["confidence"] > food_scores[food_name]["best_prediction"]["confidence"]:
                 food_scores[food_name]["best_prediction"] = pred
         # Sort by ensemble score
         sorted_foods = sorted(
-            food_scores.items(),
-            key=lambda x: x[1]["total_score"],
             reverse=True
         )
-        # Format final results
         final_predictions = []
-        for food_name, data in sorted_foods[:top_k]:
             pred = data["best_prediction"].copy()
             pred["ensemble_score"] = data["total_score"]
             pred["model_count"] = data["count"]
             pred["contributing_models"] = data["models"]
             final_predictions.append(pred)
-        # Primary result
-        primary = final_predictions[0] if final_predictions else {
             "label": "Unknown Food",
             "raw_label": "unknown",
             "confidence": 0.0,
             "ensemble_score": 0.0,
             "model_count": 0,
-            "contributing_models": []
         }
         return {
             "success": True,
             "label": primary["label"],
             "confidence": primary["confidence"],
             "primary_label": primary["raw_label"],
             "ensemble_score": primary.get("ensemble_score", 0),
-            "alternatives": final_predictions[1:],
             "model_results": model_results,
             "system_info": {
                 "available_models": self.available_models,
@@ -756,8 +1102,9 @@ async def lifespan(app: FastAPI):
     logger.info(f"🖥️  Device: {device.upper()}")
     logger.info(f"📊 Models: {len(recognizer.available_models)} active models")
     logger.info(f"🎯 Total Food Categories: {sum(FOOD_MODELS[m]['classes'] for m in recognizer.available_models)}")
     logger.info("=" * 60)
     yield
     # Shutdown
@@ -780,6 +1127,17 @@ logger.info("=" * 60)
 device = select_device()
 recognizer = MultiModelFoodRecognizer(device)
 # Create FastAPI app
 app = FastAPI(
     title="AI Food Recognition API",
@@ -867,43 +1225,149 @@ def health_check():
     }
 @app.post("/api/nutrition/analyze-food")
-async def analyze_food_nutrition(file: UploadFile = File(...)):
     """
-    Analyze food image for Next.js frontend.
-    Returns nutrition-focused response format.
     """
-    logger.info(f"🍽️ Nutrition analysis request: {file.filename}")
     try:
-        # Validate and process image
-        image = await validate_and_read_image(file)
-        # Step 1: AI Model Prediction
-        results = recognizer.predict(image, top_k=5)
-        # Step 2: API Nutrition Lookup
-        nutrition_data = await get_nutrition_from_apis(results["primary_label"])
-        # Log result
-        confidence_pct = f"{results['confidence']:.1%}"
-        source = nutrition_data.get("source", "Unknown")
-        logger.info(f"✅ Prediction: {results['label']} ({confidence_pct}) | Nutrition: {source}")
-        # Return frontend-expected format
-        return JSONResponse(content={
-            "label": results["label"],
-            "confidence": results["confidence"],
-            "nutrition": {
-                "calories": nutrition_data["calories"],
-                "protein": nutrition_data["protein"],
-                "carbs": nutrition_data["carbs"],
-                "fat": nutrition_data["fat"]
-            },
-            "alternatives": results["alternatives"],
-            "source": f"AI Recognition + {source} Database"
-        })
     except HTTPException:
         raise
     except Exception as e:
@@ -923,8 +1387,8 @@ async def analyze_food_spaces(file: UploadFile = File(...)):
         # Validate and process image
         image = await validate_and_read_image(file)
-        # Step 1: AI Model Prediction
-        results = recognizer.predict(image, top_k=5)
         # Step 2: API Nutrition Lookup
         nutrition_data = await get_nutrition_from_apis(results["primary_label"])

 import re
 from typing import Dict, Any, List, Optional
 from io import BytesIO
+from pathlib import Path
+# Load .env file if exists
+try:
+    from dotenv import load_dotenv
+    env_path = Path(__file__).parent / '.env'
+    load_dotenv(dotenv_path=env_path)
+    logging.info(f"✅ Loaded .env from {env_path}")
+except ImportError:
+    logging.warning("⚠️ python-dotenv not installed, using system environment variables")
+except Exception as e:
+    logging.warning(f"⚠️ Could not load .env: {e}")
 import torch
 import torch.nn.functional as F
 from PIL import Image, ImageEnhance
 import numpy as np
+from fastapi import FastAPI, File, UploadFile, HTTPException, Request, Form
 from fastapi.middleware.cors import CORSMiddleware
 from fastapi.responses import JSONResponse
 import uvicorn
 from transformers import AutoImageProcessor, AutoModelForImageClassification
 from contextlib import asynccontextmanager
+# OpenAI for translations
+from openai import AsyncOpenAI
 # ==================== CONFIGURATION ====================
 MAX_FILE_SIZE = 10 * 1024 * 1024  # 10MB
 MAX_IMAGE_SIZE = 512
 ALLOWED_TYPES = ["image/jpeg", "image/jpg", "image/png", "image/webp"]
+# OpenAI Configuration (will be initialized after logger is set up)
+OPENAI_API_KEY = os.environ.get("OPENAI_API_KEY", "")
+openai_client = None  # Will be initialized in lifespan startup
 # ==================== MULTI-MODEL FOOD RECOGNITION ====================
 FOOD_MODELS = {
     # Primary specialize food models
 COMPREHENSIVE_FOOD_CATEGORIES = {
     # Food-101 categories
     "pizza", "hamburger", "sushi", "ice_cream", "french_fries", "chicken_wings",
+    "chocolate_cake", "caesar_salad", "steak", "tacos", "pancakes", "pancake", "lasagna",
+    "apple_pie", "chicken_curry", "pad_thai", "ramen", "waffles", "waffle", "donuts",
+    "cheesecake", "fish_and_chips", "fried_rice", "greek_salad", "guacamole", "crepe", "crepes",
     # Balkanska/Srpska tradicionalna jela
     "cevapi", "cevapcici", "burek", "pljeskavica", "sarma", "klepe", "dolma",
     """Smart cleaning of Food-101 names for better API searches."""
     # Remove underscores and replace with spaces
     cleaned = raw_name.replace("_", " ")
+    # Handle comma-separated names - take the first part (usually English name)
+    # Example: "Pineapple, Ananas" → "Pineapple"
+    if "," in cleaned:
+        parts = cleaned.split(",")
+        # Try to detect which part is English (usually the first one)
+        # Keep the part that's more likely to be in nutrition databases
+        cleaned = parts[0].strip()
+        logger.info(f"🧹 Cleaned comma-separated name: '{raw_name}' → '{cleaned}'")
     # Remove common Food-101 artifacts
     cleaned = re.sub(r'\b(and|with|the|a)\b', ' ', cleaned, flags=re.IGNORECASE)
     # Handle specific Food-101 patterns
     replacements = {
         "cup cakes": "cupcakes",
         "shrimp and grits": "shrimp grits",
         "macaroni and cheese": "mac and cheese"
     }
     for old, new in replacements.items():
         if old in cleaned.lower():
             cleaned = new
             break
+    # Clean whitespace and extra punctuation
     cleaned = re.sub(r'\s+', ' ', cleaned).strip()
+    cleaned = re.sub(r'[^\w\s-]', '', cleaned)  # Remove special chars except hyphens
     return cleaned
 async def search_openfoodfacts_nutrition(food_name: str) -> Optional[Dict[str, Any]]:
     """Get nutrition data from multiple FREE databases with comprehensive fallback."""
     # Clean the Food-101 name for better searches
     cleaned_name = clean_food_name_for_search(food_name)
     logger.info(f"🔍 Searching nutrition for: '{food_name}' → '{cleaned_name}'")
     # Try APIs in order: Free/Unlimited first, then limited APIs
     nutrition_sources = [
         ("OpenFoodFacts", search_openfoodfacts_nutrition),  # FREE, 2M+ products
         ("Edamam", search_edamam_nutrition),                # 1000/month limit
         ("Spoonacular", search_spoonacular_nutrition)       # 150/day limit
     ]
+    # First attempt with cleaned name
     for source_name, search_func in nutrition_sources:
         try:
             nutrition_data = await search_func(cleaned_name)
             if nutrition_data and nutrition_data.get("calories", 0) > 0:
                 nutrition_data["source"] = source_name
+                logger.info(f"✅ Found nutrition data from {source_name} for '{cleaned_name}'")
                 return nutrition_data
         except Exception as e:
+            logger.warning(f"⚠️ {source_name} search failed for '{cleaned_name}': {e}")
             continue
+    # If cleaned name failed and it's different from original, try original name too
+    if cleaned_name.lower() != food_name.lower():
+        logger.info(f"🔄 Retrying with original name: '{food_name}'")
+        for source_name, search_func in nutrition_sources:
+            try:
+                nutrition_data = await search_func(food_name)
+                if nutrition_data and nutrition_data.get("calories", 0) > 0:
+                    nutrition_data["source"] = source_name
+                    logger.info(f"✅ Found nutrition data from {source_name} for original '{food_name}'")
+                    return nutrition_data
+            except Exception as e:
+                logger.warning(f"⚠️ {source_name} search failed for original '{food_name}': {e}")
+                continue
+    # Try with just the first word as last resort (e.g., "pineapple juice" → "pineapple")
+    words = cleaned_name.split()
+    if len(words) > 1:
+        first_word = words[0]
+        logger.info(f"🔄 Last resort: trying first word only: '{first_word}'")
+        for source_name, search_func in nutrition_sources:
+            try:
+                nutrition_data = await search_func(first_word)
+                if nutrition_data and nutrition_data.get("calories", 0) > 0:
+                    nutrition_data["source"] = f"{source_name} (matched: {first_word})"
+                    logger.info(f"✅ Found nutrition data from {source_name} for '{first_word}'")
+                    return nutrition_data
+            except Exception as e:
+                logger.warning(f"⚠️ {source_name} search failed for '{first_word}': {e}")
+                continue
     # All APIs failed, return default values
+    logger.warning(f"🚨 No nutrition data found for '{food_name}' after all attempts, using defaults")
     default_nutrition = DEFAULT_NUTRITION.copy()
     default_nutrition["source"] = "Default (APIs unavailable)"
     return default_nutrition
+# ==================== TRANSLATION SYSTEM ====================
+# In-memory translation cache to reduce API calls
+translation_cache: Dict[str, Dict[str, str]] = {}  # {locale: {english: translated}}
+# Language code mapping (i18n locale → full language name)
+LANGUAGE_MAP = {
+    "en": "English",
+    "bs": "Bosnian",
+    "de": "German",
+    "es": "Spanish",
+    "fr": "French",
+    "it": "Italian",
+    "pt": "Portuguese",
+    "ar": "Arabic",
+    "tr": "Turkish",
+    "nl": "Dutch",
+    "ru": "Russian",
+    "zh": "Chinese",
+    "ja": "Japanese",
+    "ko": "Korean",
+    "hi": "Hindi",
+    "sr": "Serbian",
+    "hr": "Croatian",
+    "sq": "Albanian",
+    "mk": "Macedonian",
+}
+async def translate_food_names_batch(food_names: List[str], target_locale: str) -> Dict[str, str]:
+    """
+    Translate multiple food names in a single API call (COST OPTIMIZATION).
+    Args:
+        food_names: List of food names in English
+        target_locale: Target language code
+    Returns:
+        Dictionary mapping original names to translated names
+    """
+    # Skip translation if target is English or no OpenAI client
+    if target_locale == "en" or not openai_client or not OPENAI_API_KEY:
+        return {name: name for name in food_names}
+    # Check cache first
+    if target_locale not in translation_cache:
+        translation_cache[target_locale] = {}
+    translations = {}
+    needs_translation = []
+    # Separate cached and uncached items
+    for name in food_names:
+        if name in translation_cache[target_locale]:
+            translations[name] = translation_cache[target_locale][name]
+            logger.info(f"💾 Cache hit: '{name}' → '{translations[name]}' ({target_locale})")
+        else:
+            needs_translation.append(name)
+    # If all cached, return immediately
+    if not needs_translation:
+        return translations
+    # Get target language name
+    target_language = LANGUAGE_MAP.get(target_locale, target_locale)
+    try:
+        logger.info(f"🌐 Batch translating {len(needs_translation)} items to {target_language}")
+        # Create batch translation prompt (1 API call for multiple items)
+        food_list = "\n".join(f"{i+1}. {name}" for i, name in enumerate(needs_translation))
+        response = await openai_client.chat.completions.create(
+            model="gpt-4o-mini",
+            messages=[
+                {
+                    "role": "system",
+                    "content": f"You are a professional food translator. Translate food names to {target_language}. Return ONLY the translations, one per line, in the same order. Keep it natural and commonly used."
+                },
+                {
+                    "role": "user",
+                    "content": f"Translate these food names to {target_language}:\n{food_list}"
+                }
+            ],
+            max_tokens=150,
+            temperature=0.3,
+        )
+        translated_lines = response.choices[0].message.content.strip().split('\n')
+        # Parse translations and update cache
+        for i, name in enumerate(needs_translation):
+            if i < len(translated_lines):
+                # Remove numbering if present (e.g., "1. Ananas" → "Ananas")
+                translated = translated_lines[i].strip()
+                translated = translated.split('. ', 1)[-1] if '. ' in translated else translated
+                translations[name] = translated
+                translation_cache[target_locale][name] = translated
+                logger.info(f"✅ '{name}' → '{translated}'")
+        return translations
+    except Exception as e:
+        logger.warning(f"⚠️ Batch translation failed: {e}")
+        # Return originals on failure
+        for name in needs_translation:
+            translations[name] = name
+        return translations
+async def translate_food_name(food_name: str, target_locale: str) -> str:
+    """
+    Translate single food name (uses batch function internally for caching).
+    Args:
+        food_name: Food name in English
+        target_locale: Target language code
+    Returns:
+        Translated food name or original if translation fails/not needed
+    """
+    result = await translate_food_names_batch([food_name], target_locale)
+    return result.get(food_name, food_name)
+async def translate_description(description: str, target_locale: str) -> str:
+    """
+    Translate food description to target language using OpenAI with caching.
+    Args:
+        description: Description in English
+        target_locale: Target language code
+    Returns:
+        Translated description or original if translation fails/not needed
+    """
+    # Skip translation if target is English or no OpenAI client
+    if target_locale == "en" or not openai_client or not OPENAI_API_KEY:
+        return description
+    # Simple cache key (hash of description + locale)
+    cache_key = f"desc_{hash(description)}_{target_locale}"
+    # Check if cached in locale cache
+    if target_locale not in translation_cache:
+        translation_cache[target_locale] = {}
+    if cache_key in translation_cache[target_locale]:
+        logger.info(f"💾 Description cache hit ({target_locale})")
+        return translation_cache[target_locale][cache_key]
+    # Get target language name
+    target_language = LANGUAGE_MAP.get(target_locale, target_locale)
+    try:
+        logger.info(f"🌐 Translating description to {target_language}")
+        response = await openai_client.chat.completions.create(
+            model="gpt-4o-mini",
+            messages=[
+                {
+                    "role": "system",
+                    "content": f"You are a food description translator. Translate to {target_language}. Keep it natural and concise. Return ONLY the translation."
+                },
+                {
+                    "role": "user",
+                    "content": description
+                }
+            ],
+            max_tokens=100,
+            temperature=0.3,
+        )
+        translated = response.choices[0].message.content.strip()
+        # Cache the result
+        translation_cache[target_locale][cache_key] = translated
+        logger.info(f"✅ Description translated to {target_language}")
+        return translated
+    except Exception as e:
+        logger.warning(f"⚠️ Description translation failed: {e}")
+        return description
 # ==================== MULTI-MODEL FOOD RECOGNIZER ====================
 class MultiModelFoodRecognizer:
     """Production-ready multi-model ensemble for comprehensive food recognition."""
         """Main predict method - uses ensemble if available, fallback to primary."""
         return self.predict_ensemble(image, top_k)
+    def predict_ensemble(self, image: Image.Image, top_k: int = 10) -> Dict[str, Any]:
+        """Ensemble prediction using all available models with smart filtering."""
         if not self.is_loaded:
             raise RuntimeError("Models not loaded")
         all_predictions = []
         model_results = {}
+        # Get MORE predictions from all models (top 15 instead of 5)
+        predictions_per_model = 15
         for model_key in self.available_models:
+            predictions = self._predict_with_model(image, model_key, predictions_per_model)
             if predictions:
                 model_results[model_key] = predictions
                 all_predictions.extend(predictions)
         if not all_predictions:
             raise RuntimeError("No models produced valid predictions")
+        # NON-FOOD items that should be COMPLETELY FILTERED OUT
+        non_food_items = {
+            # Kitchen utensils & cookware
+            'plate', 'dish', 'bowl', 'cup', 'glass', 'mug', 'spoon', 'fork', 'knife',
+            'spatula', 'pan', 'pot', 'tray', 'napkin', 'table', 'cloth', 'placemat',
+            'chopsticks', 'straw', 'bottle', 'container', 'lid', 'wrapper', 'packaging',
+            'cutting board', 'grater', 'whisk', 'ladle', 'tongs', 'peeler', 'sieve',
+            'colander', 'mixer', 'blender', 'toaster', 'oven', 'microwave', 'fridge',
+            'freezer', 'dishwasher', 'sink', 'counter', 'shelf', 'cabinet', 'drawer',
+            'waffle iron', 'frying pan', 'frypan', 'skillet', 'saucepan', 'stockpot',
+            'baking sheet', 'baking pan', 'baking dish', 'loaf pan', 'muffin tin',
+            'rolling pin', 'measuring cup', 'measuring spoon', 'kitchen scale',
+            'bakery', 'bakeshop', 'bakehouse', 'restaurant', 'kitchen', 'dining room',
+            # Animals (NOT food!)
+            'dog', 'cat', 'bird', 'fish', 'horse', 'cow', 'pig', 'chicken',
+            'terrier', 'retriever', 'bulldog', 'poodle', 'beagle', 'dachshund',
+            'lobster', 'crab', 'shrimp', 'hunting dog', 'hyena', 'wolf', 'fox',
+            # Objects/Electronics
+            'joystick', 'controller', 'remote', 'phone', 'computer', 'mouse', 'keyboard',
+            'water jug', 'jug', 'pitcher', 'vase', 'flowerpot'
+        }
+        # Generic FOOD terms that should be deprioritized (but not removed)
+        generic_terms = {
+            'fruit', 'vegetable', 'food', 'meal', 'snack', 'dessert',
+            'salad', 'soup', 'drink', 'beverage', 'meat', 'fish', 'seafood',
+            'bread', 'pastry', 'cake', 'cookie', 'candy', 'chocolate'
+        }
         # Ensemble voting: weight by model priority and confidence
         food_scores = {}
+        filtered_count = 0
         for pred in all_predictions:
+            food_label_lower = pred["raw_label"].lower().replace("_", " ")
+            # FILTER OUT non-food items completely
+            is_non_food = any(non_food in food_label_lower for non_food in non_food_items)
+            if is_non_food:
+                filtered_count += 1
+                logger.info(f"🚫 Filtered non-food item: '{pred['raw_label']}'")
+                continue  # Skip this prediction entirely
             model_key = pred["model"]
             priority_weight = 1.0 / FOOD_MODELS[model_key]["priority"]  # Higher priority = lower number = higher weight
             confidence_weight = pred["confidence"]
+            # PENALTY for generic terms - reduce their score significantly
+            is_generic = any(generic in food_label_lower for generic in generic_terms)
+            # If it's a single-word generic term, penalize it even more
+            is_single_generic = food_label_lower in generic_terms
+            if is_single_generic:
+                combined_score = priority_weight * confidence_weight * 0.1  # 90% penalty
+            elif is_generic:
+                combined_score = priority_weight * confidence_weight * 0.5  # 50% penalty
+            else:
+                combined_score = priority_weight * confidence_weight  # Full score for specific items
             food_name = pred["raw_label"]
             if food_name not in food_scores:
                 food_scores[food_name] = {
                     "total_score": 0,
                     "count": 0,
                     "best_prediction": pred,
+                    "models": [],
+                    "is_generic": is_generic
                 }
             food_scores[food_name]["total_score"] += combined_score
             food_scores[food_name]["count"] += 1
             food_scores[food_name]["models"].append(model_key)
             # Keep the prediction with highest confidence as representative
             if pred["confidence"] > food_scores[food_name]["best_prediction"]["confidence"]:
                 food_scores[food_name]["best_prediction"] = pred
+        if filtered_count > 0:
+            logger.info(f"✅ Filtered out {filtered_count} non-food items")
         # Sort by ensemble score
         sorted_foods = sorted(
+            food_scores.items(),
+            key=lambda x: x[1]["total_score"],
             reverse=True
         )
+        # Format final results - return MORE alternatives (up to top_k)
         final_predictions = []
+        for food_name, data in sorted_foods[:top_k * 2]:  # Get double to have enough after filtering
             pred = data["best_prediction"].copy()
             pred["ensemble_score"] = data["total_score"]
             pred["model_count"] = data["count"]
             pred["contributing_models"] = data["models"]
+            pred["is_generic"] = data["is_generic"]
             final_predictions.append(pred)
+        # Remove duplicates AND non-food items (double check)
+        filtered_predictions = []
+        seen_labels = set()
+        for pred in final_predictions:
+            label_lower = pred["raw_label"].lower().replace("_", " ").strip()
+            # DOUBLE CHECK: Filter non-food items again
+            is_non_food = any(non_food in label_lower for non_food in non_food_items)
+            if is_non_food:
+                continue  # Skip non-food items
+            # Skip if we've already seen very similar label
+            if label_lower not in seen_labels:
+                filtered_predictions.append(pred)
+                seen_labels.add(label_lower)
+            if len(filtered_predictions) >= top_k:
+                break
+        # Primary result - prefer specific over generic AND high confidence
+        primary = filtered_predictions[0] if filtered_predictions else {
             "label": "Unknown Food",
             "raw_label": "unknown",
             "confidence": 0.0,
             "ensemble_score": 0.0,
             "model_count": 0,
+            "contributing_models": [],
+            "is_generic": False
         }
+        # QUALITY CHECK: If primary confidence is < 10%, try to find better alternative
+        MIN_CONFIDENCE = 0.10  # 10%
+        if primary.get("confidence", 0) < MIN_CONFIDENCE and len(filtered_predictions) > 1:
+            logger.warning(f"⚠️ Low confidence ({primary['confidence']:.1%}) for '{primary['label']}', checking alternatives...")
+            # Find first alternative with higher confidence
+            for i, pred in enumerate(filtered_predictions[1:], 1):
+                if pred.get("confidence", 0) >= MIN_CONFIDENCE / 2:  # At least 5%
+                    filtered_predictions[0], filtered_predictions[i] = filtered_predictions[i], filtered_predictions[0]
+                    primary = filtered_predictions[0]
+                    logger.info(f"🔄 Swapped low-confidence primary with better alternative: {primary['label']} ({primary['confidence']:.1%})")
+                    break
+        # If primary is generic but we have specific alternatives, swap them
+        if primary.get("is_generic") and len(filtered_predictions) > 1:
+            for i, pred in enumerate(filtered_predictions[1:], 1):
+                if not pred.get("is_generic"):
+                    # Swap primary with this specific prediction
+                    filtered_predictions[0], filtered_predictions[i] = filtered_predictions[i], filtered_predictions[0]
+                    primary = filtered_predictions[0]
+                    logger.info(f"🔄 Swapped generic primary with specific: {primary['label']}")
+                    break
         return {
             "success": True,
             "label": primary["label"],
             "confidence": primary["confidence"],
             "primary_label": primary["raw_label"],
             "ensemble_score": primary.get("ensemble_score", 0),
+            "alternatives": filtered_predictions[1:],  # Now returns up to 9 alternatives
             "model_results": model_results,
             "system_info": {
                 "available_models": self.available_models,
     logger.info(f"🖥️  Device: {device.upper()}")
     logger.info(f"📊 Models: {len(recognizer.available_models)} active models")
     logger.info(f"🎯 Total Food Categories: {sum(FOOD_MODELS[m]['classes'] for m in recognizer.available_models)}")
+    logger.info(f"🌐 Translations: {'✅ Enabled' if openai_client else '❌ Disabled'}")
     logger.info("=" * 60)
     yield
     # Shutdown
 device = select_device()
 recognizer = MultiModelFoodRecognizer(device)
+# Initialize OpenAI client BEFORE FastAPI app
+if OPENAI_API_KEY:
+    try:
+        openai_client = AsyncOpenAI(api_key=OPENAI_API_KEY)
+        logger.info(f"✅ OpenAI client initialized (key: {OPENAI_API_KEY[:20]}...)")
+    except Exception as e:
+        logger.warning(f"⚠️ OpenAI client initialization failed: {e}")
+        openai_client = None
+else:
+    logger.warning("⚠️ OpenAI API key not found - translations disabled")
 # Create FastAPI app
 app = FastAPI(
     title="AI Food Recognition API",
     }
 @app.post("/api/nutrition/analyze-food")
+async def analyze_food_nutrition(request: Request, file: UploadFile = File(None)):
     """
+    Analyze food image or manual entry for Next.js frontend.
+    Supports two modes:
+    1. Image upload: AI recognition + nutrition lookup
+    2. Manual entry: Direct nutrition lookup by food name
+    Returns nutrition-focused response format with translations.
     """
     try:
+        # Parse form data
+        form_data = await request.form()
+        manual_input = form_data.get("manualInput", "false").lower() == "true"
+        locale = form_data.get("locale", "en")  # Get user's language preference
+        logger.info(f"📥 Request received - Mode: {'Manual' if manual_input else 'Image'}, Locale: {locale}")
+        # MODE 1: Manual food entry (from alternatives or manual input)
+        if manual_input:
+            food_name = form_data.get("manualFoodName")
+            serving_size = form_data.get("manualServingSize", "100")
+            serving_unit = form_data.get("manualServingUnit", "g")
+            description = form_data.get("manualDescription", "")
+            if not food_name:
+                raise HTTPException(status_code=400, detail="manualFoodName is required for manual entry")
+            logger.info(f"🍽️ Manual nutrition lookup: {food_name} ({serving_size}{serving_unit})")
+            # Direct nutrition API lookup
+            nutrition_data = await get_nutrition_from_apis(food_name)
+            if not nutrition_data or nutrition_data.get("calories", 0) == 0:
+                raise HTTPException(
+                    status_code=404,
+                    detail=f"Failed to retrieve nutrition information for manual entry"
+                )
+            source = nutrition_data.get("source", "Unknown")
+            logger.info(f"✅ Manual lookup: {food_name} | Nutrition: {source}")
+            # Translate food name and description
+            translated_name = await translate_food_name(food_name, locale)
+            base_description = description or f"Manual entry: {food_name}"
+            translated_description = await translate_description(base_description, locale)
+            # Return manual entry format
+            return JSONResponse(content={
+                "data": {
+                    "label": translated_name,
+                    "confidence": 1.0,  # Manual entry has 100% confidence
+                    "nutrition": {
+                        "calories": nutrition_data["calories"],
+                        "protein": nutrition_data["protein"],
+                        "carbs": nutrition_data["carbs"],
+                        "fat": nutrition_data["fat"]
+                    },
+                    "servingSize": serving_size,
+                    "servingUnit": serving_unit,
+                    "description": translated_description,
+                    "alternatives": [],  # No alternatives for manual entry
+                    "source": f"{source} Database",
+                    "isManualEntry": True
+                }
+            })
+        # MODE 2: Image upload (AI recognition)
+        else:
+            if not file:
+                raise HTTPException(status_code=400, detail="File is required for image analysis")
+            logger.info(f"🍽️ Image analysis request: {file.filename}")
+            # Validate and process image
+            image = await validate_and_read_image(file)
+            # Step 1: AI Model Prediction (request top 10 for more alternatives)
+            results = recognizer.predict(image, top_k=10)
+            # Step 2: API Nutrition Lookup
+            nutrition_data = await get_nutrition_from_apis(results["primary_label"])
+            # Log result
+            confidence_pct = f"{results['confidence']:.1%}"
+            source = nutrition_data.get("source", "Unknown")
+            logger.info(f"✅ Prediction: {results['label']} ({confidence_pct}) | Nutrition: {source}")
+            # BATCH TRANSLATION OPTIMIZATION: Translate all food names at once
+            if locale != "en" and openai_client:
+                # Collect all names to translate (primary + alternatives)
+                names_to_translate = [results["label"]]
+                if results.get("alternatives"):
+                    names_to_translate.extend([
+                        alt.get("label", alt.get("raw_label", ""))
+                        for alt in results["alternatives"]
+                    ])
+                # Single API call for all translations
+                translations = await translate_food_names_batch(names_to_translate, locale)
+                # Apply translations
+                translated_name = translations.get(results["label"], results["label"])
+                # Translate description
+                base_description = f"{results['label']} identified with {int(results['confidence'] * 100)}% confidence"
+                translated_description = await translate_description(base_description, locale)
+                # Map alternatives with translations
+                translated_alternatives = []
+                if results.get("alternatives"):
+                    for alt in results["alternatives"]:
+                        alt_name = alt.get("label", alt.get("raw_label", ""))
+                        translated_alternatives.append({
+                            **alt,
+                            "label": translations.get(alt_name, alt_name),
+                            "original_label": alt_name
+                        })
+            else:
+                # No translation needed
+                translated_name = results["label"]
+                translated_description = f"{results['label']} identified with {int(results['confidence'] * 100)}% confidence"
+                translated_alternatives = results["alternatives"]
+            # Return frontend-expected format
+            return JSONResponse(content={
+                "data": {
+                    "label": translated_name,
+                    "confidence": results["confidence"],
+                    "description": translated_description,  # Translated description
+                    "nutrition": {
+                        "calories": nutrition_data["calories"],
+                        "protein": nutrition_data["protein"],
+                        "carbs": nutrition_data["carbs"],
+                        "fat": nutrition_data["fat"]
+                    },
+                    "alternatives": translated_alternatives,
+                    "source": f"AI Recognition + {source} Database",
+                    "isManualEntry": False,
+                    "locale": locale  # Return locale for debugging
+                }
+            })
     except HTTPException:
         raise
     except Exception as e:
         # Validate and process image
         image = await validate_and_read_image(file)
+        # Step 1: AI Model Prediction (request top 10 for more alternatives)
+        results = recognizer.predict(image, top_k=10)
         # Step 2: API Nutrition Lookup
         nutrition_data = await get_nutrition_from_apis(results["primary_label"])

requirements.txt CHANGED Viewed

@@ -21,6 +21,9 @@ python-multipart>=0.0.6
 # Async HTTP client for USDA API
 aiohttp>=3.8.0
 # Utilities
 python-dotenv>=1.0.0

 # Async HTTP client for USDA API
 aiohttp>=3.8.0
+# OpenAI for translations
+openai>=1.0.0
 # Utilities
 python-dotenv>=1.0.0