Spaces:

Nexari-Research
/

Nexari-G1.1

Running

Nexari-Research commited on 8 days ago

Commit

2f7603d

verified ·

1 Parent(s): 59903c2

Update chat_model.py

Files changed (1) hide show

chat_model.py CHANGED Viewed

@@ -1,4 +1,6 @@
-import os, asyncio, logging
 from huggingface_hub import hf_hub_download
 from llama_cpp import Llama
@@ -9,19 +11,30 @@ model = None
 REPO_ID = "bartowski/Meta-Llama-3.1-8B-Instruct-GGUF"
 FILENAME = "Meta-Llama-3.1-8B-Instruct-Q4_K_M.gguf"
-def load_model():
     global model
-    os.makedirs(BASE_DIR, exist_ok=True)
-    path = hf_hub_download(REPO_ID, FILENAME, local_dir=BASE_DIR)
-    model = Llama(
-        model_path=path,
-        n_ctx=4096,
-        n_threads=os.cpu_count(),
-        n_batch=256,
-        verbose=False
-    )
-    logger.info("Chat model ready")
-    return model
 async def load_model_async():
     return await asyncio.to_thread(load_model)

+import os
+import logging
+import asyncio
 from huggingface_hub import hf_hub_download
 from llama_cpp import Llama
 REPO_ID = "bartowski/Meta-Llama-3.1-8B-Instruct-GGUF"
 FILENAME = "Meta-Llama-3.1-8B-Instruct-Q4_K_M.gguf"
+def download_if_needed(local_dir: str):
+    os.makedirs(local_dir, exist_ok=True)
+    local_path = os.path.join(local_dir, FILENAME)
+    if os.path.exists(local_path):
+        logger.info(f"Chat model already present: {local_path}")
+        return local_path
+    logger.info(f"Downloading chat model to {local_dir} ...")
+    path = hf_hub_download(repo_id=REPO_ID, filename=FILENAME, local_dir=local_dir)
+    logger.info(f"Downloaded chat: {path}")
+    return path
+def load_model(local_dir: str = None):
     global model
+    if not local_dir:
+        local_dir = BASE_DIR
+    try:
+        model_path = download_if_needed(local_dir)
+        model = Llama(model_path=model_path, n_ctx=2048, verbose=False)
+        logger.info("Chat model loaded")
+        return model
+    except Exception as e:
+        logger.exception(f"Failed to load chat model: {e}")
+        model = None
+        raise
 async def load_model_async():
     return await asyncio.to_thread(load_model)