Spaces:

enoch10jason
/

grammar-correction-api

Build error

Enoch Jason J commited on Oct 7

Commit

1ab6f41

1 Parent(s): bab74ff

Finalize Hub-based deployment strategy

Files changed (3) hide show

Dockerfile CHANGED Viewed

@@ -10,23 +10,18 @@ COPY download_models.py .
 RUN pip install --no-cache-dir -r requirements_local.txt
 # --- Pre-download and Cache Models ---
-# The RUN command securely accesses the HF_TOKEN secret.
 RUN --mount=type=cache,target=/root/.cache/huggingface \
     --mount=type=secret,id=HUGGING_FACE_HUB_TOKEN \
     HUGGING_FACE_HUB_TOKEN=$(cat /run/secrets/HUGGING_FACE_HUB_TOKEN) python download_models.py
 # --- Copy Application Files ---
-# FIX: Standardized to use 'main.py'
 COPY app.py .
-# FIX: Copy the local LoRA adapter from the build context.
-# This assumes you've moved 'gemma-grammar-lora' into your project folder.
-COPY gemma-grammar-lora /app/gemma-grammar-lora
 # Expose the port the app runs on
 EXPOSE 8000
 # Command to run the application
-# FIX: Standardized to use 'app:app'
 CMD ["uvicorn", "app:app", "--host", "0.0.0.0", "--port", "8000"]

 RUN pip install --no-cache-dir -r requirements_local.txt
 # --- Pre-download and Cache Models ---
+# The RUN command securely accesses the HF_TOKEN secret to download all models.
 RUN --mount=type=cache,target=/root/.cache/huggingface \
     --mount=type=secret,id=HUGGING_FACE_HUB_TOKEN \
     HUGGING_FACE_HUB_TOKEN=$(cat /run/secrets/HUGGING_FACE_HUB_TOKEN) python download_models.py
 # --- Copy Application Files ---
+# We only need to copy the main application file now.
 COPY app.py .
 # Expose the port the app runs on
 EXPOSE 8000
 # Command to run the application
 CMD ["uvicorn", "app:app", "--host", "0.0.0.0", "--port", "8000"]

app.py CHANGED Viewed

@@ -11,7 +11,7 @@ from peft import PeftModel
 # --- Model Paths ---
 GENDER_MODEL_PATH = "google/gemma-3-270m-qat-q4_0-unquantized"
 BASE_MODEL_PATH = "unsloth/gemma-2b-it"
-# FIX: This now correctly points to your model on the Hugging Face Hub.
 LORA_ADAPTER_PATH = "enoch10jason/gemma-grammar-lora"
 # --- Global variables for models ---
@@ -24,8 +24,8 @@ device = "cpu"
 print("--- Starting Model Loading ---")
 try:
-    # Models are loaded from the pre-downloaded cache inside the image.
-    # No token is needed at runtime.
     print(f"Loading gender model from cache: {GENDER_MODEL_PATH}")
     gender_tokenizer = AutoTokenizer.from_pretrained(GENDER_MODEL_PATH)
     gender_model = AutoModelForCausalLM.from_pretrained(GENDER_MODEL_PATH).to(device)
@@ -65,7 +65,7 @@ class CorrectionResponse(BaseModel):
     original_text: str
     corrected_text: str
-# --- Helper Functions (No changes needed) ---
 def clean_grammar_response(text: str) -> str:
     if "Response:" in text:
         parts = text.split("Response:")
@@ -88,7 +88,7 @@ def correct_gender_rules(text: str) -> str:
         text = re.sub(pattern, replacement, text, flags=re.IGNORECASE)
     return text
-# --- API Endpoints (No changes needed) ---
 @app.post("/correct_grammar", response_model=CorrectionResponse)
 async def handle_grammar_correction(request: CorrectionRequest):
     if not grammar_model or not grammar_tokenizer:

 # --- Model Paths ---
 GENDER_MODEL_PATH = "google/gemma-3-270m-qat-q4_0-unquantized"
 BASE_MODEL_PATH = "unsloth/gemma-2b-it"
+# This correctly points to your model on the Hugging Face Hub.
 LORA_ADAPTER_PATH = "enoch10jason/gemma-grammar-lora"
 # --- Global variables for models ---
 print("--- Starting Model Loading ---")
 try:
+    # Models are loaded from the pre-downloaded cache in the image.
+    # No token is needed at runtime because the files are already cached.
     print(f"Loading gender model from cache: {GENDER_MODEL_PATH}")
     gender_tokenizer = AutoTokenizer.from_pretrained(GENDER_MODEL_PATH)
     gender_model = AutoModelForCausalLM.from_pretrained(GENDER_MODEL_PATH).to(device)
     original_text: str
     corrected_text: str
+# --- Helper Functions ---
 def clean_grammar_response(text: str) -> str:
     if "Response:" in text:
         parts = text.split("Response:")
         text = re.sub(pattern, replacement, text, flags=re.IGNORECASE)
     return text
+# --- API Endpoints ---
 @app.post("/correct_grammar", response_model=CorrectionResponse)
 async def handle_grammar_correction(request: CorrectionRequest):
     if not grammar_model or not grammar_tokenizer:

download_models.py CHANGED Viewed

@@ -7,7 +7,7 @@ from peft import PeftModel
 GENDER_MODEL_PATH = "google/gemma-3-270m-qat-q4_0-unquantized"
 BASE_MODEL_PATH = "unsloth/gemma-2b-it"
-# This now points to your fine-tuned model on the Hugging Face Hub.
 LORA_ADAPTER_PATH = "enoch10jason/gemma-grammar-lora"
 hf_token = os.getenv("HUGGING_FACE_HUB_TOKEN")
@@ -35,7 +35,7 @@ print("✅ Base model downloaded.")
 # 3. Download Your Fine-Tuned LoRA Adapter
 print(f"Downloading LoRA adapter: {LORA_ADAPTER_PATH}")
-# This step downloads the adapter and links it to the base model, caching it.
 PeftModel.from_pretrained(base_model, LORA_ADAPTER_PATH, token=hf_token)
 print("✅ LoRA adapter downloaded.")

 GENDER_MODEL_PATH = "google/gemma-3-270m-qat-q4_0-unquantized"
 BASE_MODEL_PATH = "unsloth/gemma-2b-it"
+# This correctly points to your fine-tuned model on the Hugging Face Hub.
 LORA_ADAPTER_PATH = "enoch10jason/gemma-grammar-lora"
 hf_token = os.getenv("HUGGING_FACE_HUB_TOKEN")
 # 3. Download Your Fine-Tuned LoRA Adapter
 print(f"Downloading LoRA adapter: {LORA_ADAPTER_PATH}")
+# This step downloads your private adapter and links it to the base model, caching it.
 PeftModel.from_pretrained(base_model, LORA_ADAPTER_PATH, token=hf_token)
 print("✅ LoRA adapter downloaded.")