Spaces:
Build error
Build error
File size: 1,326 Bytes
45cc284 d9da646 4c7d562 611f683 b2bc09b 611f683 45cc284 4c7d562 45cc284 4c7d562 b2bc09b 4c7d562 b2bc09b 4c7d562 b2bc09b 4c7d562 d9da646 4c7d562 d9da646 b2bc09b 4c7d562 3b89341 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 |
from fastapi import FastAPI
from pydantic import BaseModel
from llama_cpp import Llama
from pathlib import Path
import os
from dotenv import load_dotenv
from huggingface_hub import hf_hub_download
# Carica variabili da .env
load_dotenv()
HUGGINGFACE_HUB_TOKEN = os.environ.get("HUGGINGFACE_HUB_TOKEN")
app = FastAPI(title="Company Assistant API")
# Percorso del modello locale
MODEL_DIR = Path("models/llama-3.2-1b")
MODEL_DIR.mkdir(parents=True, exist_ok=True)
MODEL_PATH = MODEL_DIR / "llama-3.2-1b-instruct-q4_k_m.gguf"
print('ciao')
# Scarica il modello se non esiste
if not MODEL_PATH.exists():
print("Scaricando il modello GGUF Llama 3.2-1B...")
MODEL_PATH = hf_hub_download(
repo_id="hugging-quants/Llama-3.2-1B-Instruct-Q4_K_M-GGUF",
filename="llama-3.2-1b-instruct-q4_k_m.gguf",
cache_dir=str(MODEL_DIR),
token=HUGGINGFACE_HUB_TOKEN
)
print("Modello scaricato!")
# Inizializza il modello
llm = Llama(model_path=str(MODEL_PATH))
# Modello dati richiesta
class Message(BaseModel):
text: str
@app.get("/")
def root():
return {"message": "Company Assistant API è attiva!"}
@app.post("/chat")
def chat(message: Message):
print(message)
result = llm(prompt=message.text, max_tokens=200)
print('risultato ', result)
return {"reply": result}
|