Spaces:
Build error
Build error
Enoch Jason J
commited on
Commit
Β·
1ab6f41
1
Parent(s):
bab74ff
Finalize Hub-based deployment strategy
Browse files- Dockerfile +2 -7
- app.py +5 -5
- download_models.py +2 -2
Dockerfile
CHANGED
|
@@ -10,23 +10,18 @@ COPY download_models.py .
|
|
| 10 |
RUN pip install --no-cache-dir -r requirements_local.txt
|
| 11 |
|
| 12 |
# --- Pre-download and Cache Models ---
|
| 13 |
-
# The RUN command securely accesses the HF_TOKEN secret.
|
| 14 |
RUN --mount=type=cache,target=/root/.cache/huggingface \
|
| 15 |
--mount=type=secret,id=HUGGING_FACE_HUB_TOKEN \
|
| 16 |
HUGGING_FACE_HUB_TOKEN=$(cat /run/secrets/HUGGING_FACE_HUB_TOKEN) python download_models.py
|
| 17 |
|
| 18 |
# --- Copy Application Files ---
|
| 19 |
-
#
|
| 20 |
COPY app.py .
|
| 21 |
|
| 22 |
-
# FIX: Copy the local LoRA adapter from the build context.
|
| 23 |
-
# This assumes you've moved 'gemma-grammar-lora' into your project folder.
|
| 24 |
-
COPY gemma-grammar-lora /app/gemma-grammar-lora
|
| 25 |
-
|
| 26 |
# Expose the port the app runs on
|
| 27 |
EXPOSE 8000
|
| 28 |
|
| 29 |
# Command to run the application
|
| 30 |
-
# FIX: Standardized to use 'app:app'
|
| 31 |
CMD ["uvicorn", "app:app", "--host", "0.0.0.0", "--port", "8000"]
|
| 32 |
|
|
|
|
| 10 |
RUN pip install --no-cache-dir -r requirements_local.txt
|
| 11 |
|
| 12 |
# --- Pre-download and Cache Models ---
|
| 13 |
+
# The RUN command securely accesses the HF_TOKEN secret to download all models.
|
| 14 |
RUN --mount=type=cache,target=/root/.cache/huggingface \
|
| 15 |
--mount=type=secret,id=HUGGING_FACE_HUB_TOKEN \
|
| 16 |
HUGGING_FACE_HUB_TOKEN=$(cat /run/secrets/HUGGING_FACE_HUB_TOKEN) python download_models.py
|
| 17 |
|
| 18 |
# --- Copy Application Files ---
|
| 19 |
+
# We only need to copy the main application file now.
|
| 20 |
COPY app.py .
|
| 21 |
|
|
|
|
|
|
|
|
|
|
|
|
|
| 22 |
# Expose the port the app runs on
|
| 23 |
EXPOSE 8000
|
| 24 |
|
| 25 |
# Command to run the application
|
|
|
|
| 26 |
CMD ["uvicorn", "app:app", "--host", "0.0.0.0", "--port", "8000"]
|
| 27 |
|
app.py
CHANGED
|
@@ -11,7 +11,7 @@ from peft import PeftModel
|
|
| 11 |
# --- Model Paths ---
|
| 12 |
GENDER_MODEL_PATH = "google/gemma-3-270m-qat-q4_0-unquantized"
|
| 13 |
BASE_MODEL_PATH = "unsloth/gemma-2b-it"
|
| 14 |
-
#
|
| 15 |
LORA_ADAPTER_PATH = "enoch10jason/gemma-grammar-lora"
|
| 16 |
|
| 17 |
# --- Global variables for models ---
|
|
@@ -24,8 +24,8 @@ device = "cpu"
|
|
| 24 |
print("--- Starting Model Loading ---")
|
| 25 |
|
| 26 |
try:
|
| 27 |
-
# Models are loaded from the pre-downloaded cache
|
| 28 |
-
# No token is needed at runtime.
|
| 29 |
print(f"Loading gender model from cache: {GENDER_MODEL_PATH}")
|
| 30 |
gender_tokenizer = AutoTokenizer.from_pretrained(GENDER_MODEL_PATH)
|
| 31 |
gender_model = AutoModelForCausalLM.from_pretrained(GENDER_MODEL_PATH).to(device)
|
|
@@ -65,7 +65,7 @@ class CorrectionResponse(BaseModel):
|
|
| 65 |
original_text: str
|
| 66 |
corrected_text: str
|
| 67 |
|
| 68 |
-
# --- Helper Functions
|
| 69 |
def clean_grammar_response(text: str) -> str:
|
| 70 |
if "Response:" in text:
|
| 71 |
parts = text.split("Response:")
|
|
@@ -88,7 +88,7 @@ def correct_gender_rules(text: str) -> str:
|
|
| 88 |
text = re.sub(pattern, replacement, text, flags=re.IGNORECASE)
|
| 89 |
return text
|
| 90 |
|
| 91 |
-
# --- API Endpoints
|
| 92 |
@app.post("/correct_grammar", response_model=CorrectionResponse)
|
| 93 |
async def handle_grammar_correction(request: CorrectionRequest):
|
| 94 |
if not grammar_model or not grammar_tokenizer:
|
|
|
|
| 11 |
# --- Model Paths ---
|
| 12 |
GENDER_MODEL_PATH = "google/gemma-3-270m-qat-q4_0-unquantized"
|
| 13 |
BASE_MODEL_PATH = "unsloth/gemma-2b-it"
|
| 14 |
+
# This correctly points to your model on the Hugging Face Hub.
|
| 15 |
LORA_ADAPTER_PATH = "enoch10jason/gemma-grammar-lora"
|
| 16 |
|
| 17 |
# --- Global variables for models ---
|
|
|
|
| 24 |
print("--- Starting Model Loading ---")
|
| 25 |
|
| 26 |
try:
|
| 27 |
+
# Models are loaded from the pre-downloaded cache in the image.
|
| 28 |
+
# No token is needed at runtime because the files are already cached.
|
| 29 |
print(f"Loading gender model from cache: {GENDER_MODEL_PATH}")
|
| 30 |
gender_tokenizer = AutoTokenizer.from_pretrained(GENDER_MODEL_PATH)
|
| 31 |
gender_model = AutoModelForCausalLM.from_pretrained(GENDER_MODEL_PATH).to(device)
|
|
|
|
| 65 |
original_text: str
|
| 66 |
corrected_text: str
|
| 67 |
|
| 68 |
+
# --- Helper Functions ---
|
| 69 |
def clean_grammar_response(text: str) -> str:
|
| 70 |
if "Response:" in text:
|
| 71 |
parts = text.split("Response:")
|
|
|
|
| 88 |
text = re.sub(pattern, replacement, text, flags=re.IGNORECASE)
|
| 89 |
return text
|
| 90 |
|
| 91 |
+
# --- API Endpoints ---
|
| 92 |
@app.post("/correct_grammar", response_model=CorrectionResponse)
|
| 93 |
async def handle_grammar_correction(request: CorrectionRequest):
|
| 94 |
if not grammar_model or not grammar_tokenizer:
|
download_models.py
CHANGED
|
@@ -7,7 +7,7 @@ from peft import PeftModel
|
|
| 7 |
|
| 8 |
GENDER_MODEL_PATH = "google/gemma-3-270m-qat-q4_0-unquantized"
|
| 9 |
BASE_MODEL_PATH = "unsloth/gemma-2b-it"
|
| 10 |
-
# This
|
| 11 |
LORA_ADAPTER_PATH = "enoch10jason/gemma-grammar-lora"
|
| 12 |
|
| 13 |
hf_token = os.getenv("HUGGING_FACE_HUB_TOKEN")
|
|
@@ -35,7 +35,7 @@ print("β
Base model downloaded.")
|
|
| 35 |
|
| 36 |
# 3. Download Your Fine-Tuned LoRA Adapter
|
| 37 |
print(f"Downloading LoRA adapter: {LORA_ADAPTER_PATH}")
|
| 38 |
-
# This step downloads
|
| 39 |
PeftModel.from_pretrained(base_model, LORA_ADAPTER_PATH, token=hf_token)
|
| 40 |
print("β
LoRA adapter downloaded.")
|
| 41 |
|
|
|
|
| 7 |
|
| 8 |
GENDER_MODEL_PATH = "google/gemma-3-270m-qat-q4_0-unquantized"
|
| 9 |
BASE_MODEL_PATH = "unsloth/gemma-2b-it"
|
| 10 |
+
# This correctly points to your fine-tuned model on the Hugging Face Hub.
|
| 11 |
LORA_ADAPTER_PATH = "enoch10jason/gemma-grammar-lora"
|
| 12 |
|
| 13 |
hf_token = os.getenv("HUGGING_FACE_HUB_TOKEN")
|
|
|
|
| 35 |
|
| 36 |
# 3. Download Your Fine-Tuned LoRA Adapter
|
| 37 |
print(f"Downloading LoRA adapter: {LORA_ADAPTER_PATH}")
|
| 38 |
+
# This step downloads your private adapter and links it to the base model, caching it.
|
| 39 |
PeftModel.from_pretrained(base_model, LORA_ADAPTER_PATH, token=hf_token)
|
| 40 |
print("β
LoRA adapter downloaded.")
|
| 41 |
|