File size: 1,326 Bytes
45cc284
d9da646
 
4c7d562
611f683
 
b2bc09b
611f683
 
 
 
45cc284
4c7d562
45cc284
4c7d562
b2bc09b
4c7d562
 
b2bc09b
 
 
 
4c7d562
 
b2bc09b
 
 
 
 
 
 
4c7d562
 
 
 
 
 
d9da646
 
 
4c7d562
 
 
 
d9da646
 
b2bc09b
4c7d562
3b89341
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
from fastapi import FastAPI
from pydantic import BaseModel
from llama_cpp import Llama
from pathlib import Path
import os
from dotenv import load_dotenv
from huggingface_hub import hf_hub_download

# Carica variabili da .env
load_dotenv()
HUGGINGFACE_HUB_TOKEN = os.environ.get("HUGGINGFACE_HUB_TOKEN")

app = FastAPI(title="Company Assistant API")

# Percorso del modello locale
MODEL_DIR = Path("models/llama-3.2-1b")
MODEL_DIR.mkdir(parents=True, exist_ok=True)

MODEL_PATH = MODEL_DIR / "llama-3.2-1b-instruct-q4_k_m.gguf"

print('ciao')

# Scarica il modello se non esiste
if not MODEL_PATH.exists():
    print("Scaricando il modello GGUF Llama 3.2-1B...")
    MODEL_PATH = hf_hub_download(
        repo_id="hugging-quants/Llama-3.2-1B-Instruct-Q4_K_M-GGUF",
        filename="llama-3.2-1b-instruct-q4_k_m.gguf",
        cache_dir=str(MODEL_DIR),
        token=HUGGINGFACE_HUB_TOKEN
    )
    print("Modello scaricato!")

# Inizializza il modello
llm = Llama(model_path=str(MODEL_PATH))

# Modello dati richiesta
class Message(BaseModel):
    text: str

@app.get("/")
def root():
    return {"message": "Company Assistant API è attiva!"}

@app.post("/chat")
def chat(message: Message):
    print(message)
    result = llm(prompt=message.text, max_tokens=200)
    print('risultato ', result)
    return {"reply": result}