Spaces:
Paused
Paused
Oleh Kuznetsov
commited on
Commit
·
07f77e4
1
Parent(s):
f7972c6
fixup! feat(rec): Fix prompt storage
Browse files
app.py
CHANGED
|
@@ -14,6 +14,7 @@ VLLM_MODEL_NAME = os.getenv("VLLM_MODEL_NAME")
|
|
| 14 |
VLLM_GPU_MEMORY_UTILIZATION = float(os.getenv("VLLM_GPU_MEMORY_UTILIZATION"))
|
| 15 |
VLLM_MAX_SEQ_LEN = int(os.getenv("VLLM_MAX_SEQ_LEN"))
|
| 16 |
HF_TOKEN = os.getenv("HF_TOKEN")
|
|
|
|
| 17 |
|
| 18 |
# -------------------------------- HELPERS -------------------------------------
|
| 19 |
def load_prompt(path: Path) -> str:
|
|
@@ -40,6 +41,7 @@ local_llm = LLM(
|
|
| 40 |
gpu_memory_utilization=VLLM_GPU_MEMORY_UTILIZATION,
|
| 41 |
hf_token=HF_TOKEN,
|
| 42 |
enforce_eager=True,
|
|
|
|
| 43 |
)
|
| 44 |
|
| 45 |
json_schema = StructuredQueryRewriteResponse.model_json_schema()
|
|
|
|
| 14 |
VLLM_GPU_MEMORY_UTILIZATION = float(os.getenv("VLLM_GPU_MEMORY_UTILIZATION"))
|
| 15 |
VLLM_MAX_SEQ_LEN = int(os.getenv("VLLM_MAX_SEQ_LEN"))
|
| 16 |
HF_TOKEN = os.getenv("HF_TOKEN")
|
| 17 |
+
VLLM_DTYPE = os.getenv("VLLM_DTYPE")
|
| 18 |
|
| 19 |
# -------------------------------- HELPERS -------------------------------------
|
| 20 |
def load_prompt(path: Path) -> str:
|
|
|
|
| 41 |
gpu_memory_utilization=VLLM_GPU_MEMORY_UTILIZATION,
|
| 42 |
hf_token=HF_TOKEN,
|
| 43 |
enforce_eager=True,
|
| 44 |
+
dtype=VLLM_DTYPE,
|
| 45 |
)
|
| 46 |
|
| 47 |
json_schema = StructuredQueryRewriteResponse.model_json_schema()
|