import torch
import re
import os
from unsloth import FastLanguageModel
from transformers import AutoTokenizer, AutoModelForCausalLM
from fastapi import FastAPI, HTTPException
from pydantic import BaseModel

# --- Model Paths (These are identifiers for the cached models) ---
LORA_ADAPTER_PATH = "enoch10jason/gemma-grammar-lora"
GENDER_MODEL_PATH = "google/gemma-3-270m-qat-q4_0-unquantized"

# --- Global variables for models ---
grammar_model = None
grammar_tokenizer = None
gender_model = None
gender_tokenizer = None
device = "cpu"

print("--- Starting Model Loading From Cache ---")

try:
    # 1. Load your fine-tuned model using Unsloth
    # This correctly loads the model and applies the adapter.
    print(f"Loading grammar model and adapter: {LORA_ADAPTER_PATH}")
    grammar_model, grammar_tokenizer = FastLanguageModel.from_pretrained(
        model_name=LORA_ADAPTER_PATH,
        dtype=torch.float32,
        load_in_4bit=False,  # CPU mode
    )
    print("✅ Your fine-tuned grammar model is ready!")

    # 2. Load the gender verifier model
    print(f"Loading gender model: {GENDER_MODEL_PATH}")
    gender_tokenizer = AutoTokenizer.from_pretrained(GENDER_MODEL_PATH)
    gender_model = AutoModelForCausalLM.from_pretrained(GENDER_MODEL_PATH).to(device)
    print("✅ Gender verifier model loaded successfully!")

except Exception as e:
    print(f"❌ Critical error during model loading: {e}")
    grammar_model = None
    gender_model = None

print("--- Model Loading Complete ---")

# --- FastAPI Application Setup ---
app = FastAPI(title="Text Correction API")

class CorrectionRequest(BaseModel):
    text: str

class CorrectionResponse(BaseModel):
    original_text: str
    corrected_text: str

# --- API Endpoints ---
@app.post("/correct_grammar", response_model=CorrectionResponse)
async def handle_grammar_correction(request: CorrectionRequest):
    if not grammar_model:
        raise HTTPException(status_code=503, detail="Grammar model is not available.")
    
    prompt_text = request.text
    input_text = f"Prompt: {prompt_text}\nResponse:"
    inputs = grammar_tokenizer(input_text, return_tensors="pt").to(device)
    
    output_ids = grammar_model.generate(**inputs, max_new_tokens=256, do_sample=False)
    output_text = grammar_tokenizer.decode(output_ids[0], skip_special_tokens=True)
    
    corrected = output_text.split("Response:")[-1].strip()
    return CorrectionResponse(original_text=prompt_text, corrected_text=corrected)

@app.post("/correct_gender", response_model=CorrectionResponse)
async def handle_gender_correction(request: CorrectionRequest):
    if not gender_model:
        raise HTTPException(status_code=503, detail="Gender model is not available.")

    prompt_text = request.text
    input_text = f"Prompt: Please rewrite the sentence with correct grammar and gender. Output ONLY the corrected sentence:\n{prompt_text}\nResponse:"
    inputs = gender_tokenizer(input_text, return_tensors="pt").to(device)
    output_ids = gender_model.generate(**inputs, max_new_tokens=256, do_sample=False)
    output_text = gender_tokenizer.decode(output_ids[0], skip_special_tokens=True)
    
    cleaned_from_model = output_text.split("Response:")[-1].strip().strip('"')
    
    # Regex safety net
    corrections = {
        r'\bher wife\b': 'her husband', r'\bhis husband\b': 'his wife',
        r'\bhe is a girl\b': 'he is a boy', r'\bshe is a boy\b': 'she is a girl'
    }
    for pattern, replacement in corrections.items():
        cleaned_from_model = re.sub(pattern, replacement, cleaned_from_model, flags=re.IGNORECASE)
    
    return CorrectionResponse(original_text=prompt_text, corrected_text=cleaned_from_model)

@app.get("/")
def read_root():
    return {"status": "Text Correction API is running."}