Enoch Jason J commited on
Commit
1ab6f41
Β·
1 Parent(s): bab74ff

Finalize Hub-based deployment strategy

Browse files
Files changed (3) hide show
  1. Dockerfile +2 -7
  2. app.py +5 -5
  3. download_models.py +2 -2
Dockerfile CHANGED
@@ -10,23 +10,18 @@ COPY download_models.py .
10
  RUN pip install --no-cache-dir -r requirements_local.txt
11
 
12
  # --- Pre-download and Cache Models ---
13
- # The RUN command securely accesses the HF_TOKEN secret.
14
  RUN --mount=type=cache,target=/root/.cache/huggingface \
15
  --mount=type=secret,id=HUGGING_FACE_HUB_TOKEN \
16
  HUGGING_FACE_HUB_TOKEN=$(cat /run/secrets/HUGGING_FACE_HUB_TOKEN) python download_models.py
17
 
18
  # --- Copy Application Files ---
19
- # FIX: Standardized to use 'main.py'
20
  COPY app.py .
21
 
22
- # FIX: Copy the local LoRA adapter from the build context.
23
- # This assumes you've moved 'gemma-grammar-lora' into your project folder.
24
- COPY gemma-grammar-lora /app/gemma-grammar-lora
25
-
26
  # Expose the port the app runs on
27
  EXPOSE 8000
28
 
29
  # Command to run the application
30
- # FIX: Standardized to use 'app:app'
31
  CMD ["uvicorn", "app:app", "--host", "0.0.0.0", "--port", "8000"]
32
 
 
10
  RUN pip install --no-cache-dir -r requirements_local.txt
11
 
12
  # --- Pre-download and Cache Models ---
13
+ # The RUN command securely accesses the HF_TOKEN secret to download all models.
14
  RUN --mount=type=cache,target=/root/.cache/huggingface \
15
  --mount=type=secret,id=HUGGING_FACE_HUB_TOKEN \
16
  HUGGING_FACE_HUB_TOKEN=$(cat /run/secrets/HUGGING_FACE_HUB_TOKEN) python download_models.py
17
 
18
  # --- Copy Application Files ---
19
+ # We only need to copy the main application file now.
20
  COPY app.py .
21
 
 
 
 
 
22
  # Expose the port the app runs on
23
  EXPOSE 8000
24
 
25
  # Command to run the application
 
26
  CMD ["uvicorn", "app:app", "--host", "0.0.0.0", "--port", "8000"]
27
 
app.py CHANGED
@@ -11,7 +11,7 @@ from peft import PeftModel
11
  # --- Model Paths ---
12
  GENDER_MODEL_PATH = "google/gemma-3-270m-qat-q4_0-unquantized"
13
  BASE_MODEL_PATH = "unsloth/gemma-2b-it"
14
- # FIX: This now correctly points to your model on the Hugging Face Hub.
15
  LORA_ADAPTER_PATH = "enoch10jason/gemma-grammar-lora"
16
 
17
  # --- Global variables for models ---
@@ -24,8 +24,8 @@ device = "cpu"
24
  print("--- Starting Model Loading ---")
25
 
26
  try:
27
- # Models are loaded from the pre-downloaded cache inside the image.
28
- # No token is needed at runtime.
29
  print(f"Loading gender model from cache: {GENDER_MODEL_PATH}")
30
  gender_tokenizer = AutoTokenizer.from_pretrained(GENDER_MODEL_PATH)
31
  gender_model = AutoModelForCausalLM.from_pretrained(GENDER_MODEL_PATH).to(device)
@@ -65,7 +65,7 @@ class CorrectionResponse(BaseModel):
65
  original_text: str
66
  corrected_text: str
67
 
68
- # --- Helper Functions (No changes needed) ---
69
  def clean_grammar_response(text: str) -> str:
70
  if "Response:" in text:
71
  parts = text.split("Response:")
@@ -88,7 +88,7 @@ def correct_gender_rules(text: str) -> str:
88
  text = re.sub(pattern, replacement, text, flags=re.IGNORECASE)
89
  return text
90
 
91
- # --- API Endpoints (No changes needed) ---
92
  @app.post("/correct_grammar", response_model=CorrectionResponse)
93
  async def handle_grammar_correction(request: CorrectionRequest):
94
  if not grammar_model or not grammar_tokenizer:
 
11
  # --- Model Paths ---
12
  GENDER_MODEL_PATH = "google/gemma-3-270m-qat-q4_0-unquantized"
13
  BASE_MODEL_PATH = "unsloth/gemma-2b-it"
14
+ # This correctly points to your model on the Hugging Face Hub.
15
  LORA_ADAPTER_PATH = "enoch10jason/gemma-grammar-lora"
16
 
17
  # --- Global variables for models ---
 
24
  print("--- Starting Model Loading ---")
25
 
26
  try:
27
+ # Models are loaded from the pre-downloaded cache in the image.
28
+ # No token is needed at runtime because the files are already cached.
29
  print(f"Loading gender model from cache: {GENDER_MODEL_PATH}")
30
  gender_tokenizer = AutoTokenizer.from_pretrained(GENDER_MODEL_PATH)
31
  gender_model = AutoModelForCausalLM.from_pretrained(GENDER_MODEL_PATH).to(device)
 
65
  original_text: str
66
  corrected_text: str
67
 
68
+ # --- Helper Functions ---
69
  def clean_grammar_response(text: str) -> str:
70
  if "Response:" in text:
71
  parts = text.split("Response:")
 
88
  text = re.sub(pattern, replacement, text, flags=re.IGNORECASE)
89
  return text
90
 
91
+ # --- API Endpoints ---
92
  @app.post("/correct_grammar", response_model=CorrectionResponse)
93
  async def handle_grammar_correction(request: CorrectionRequest):
94
  if not grammar_model or not grammar_tokenizer:
download_models.py CHANGED
@@ -7,7 +7,7 @@ from peft import PeftModel
7
 
8
  GENDER_MODEL_PATH = "google/gemma-3-270m-qat-q4_0-unquantized"
9
  BASE_MODEL_PATH = "unsloth/gemma-2b-it"
10
- # This now points to your fine-tuned model on the Hugging Face Hub.
11
  LORA_ADAPTER_PATH = "enoch10jason/gemma-grammar-lora"
12
 
13
  hf_token = os.getenv("HUGGING_FACE_HUB_TOKEN")
@@ -35,7 +35,7 @@ print("βœ… Base model downloaded.")
35
 
36
  # 3. Download Your Fine-Tuned LoRA Adapter
37
  print(f"Downloading LoRA adapter: {LORA_ADAPTER_PATH}")
38
- # This step downloads the adapter and links it to the base model, caching it.
39
  PeftModel.from_pretrained(base_model, LORA_ADAPTER_PATH, token=hf_token)
40
  print("βœ… LoRA adapter downloaded.")
41
 
 
7
 
8
  GENDER_MODEL_PATH = "google/gemma-3-270m-qat-q4_0-unquantized"
9
  BASE_MODEL_PATH = "unsloth/gemma-2b-it"
10
+ # This correctly points to your fine-tuned model on the Hugging Face Hub.
11
  LORA_ADAPTER_PATH = "enoch10jason/gemma-grammar-lora"
12
 
13
  hf_token = os.getenv("HUGGING_FACE_HUB_TOKEN")
 
35
 
36
  # 3. Download Your Fine-Tuned LoRA Adapter
37
  print(f"Downloading LoRA adapter: {LORA_ADAPTER_PATH}")
38
+ # This step downloads your private adapter and links it to the base model, caching it.
39
  PeftModel.from_pretrained(base_model, LORA_ADAPTER_PATH, token=hf_token)
40
  print("βœ… LoRA adapter downloaded.")
41