testi123456789 commited on
Commit
b8a9630
·
verified ·
1 Parent(s): 8282e74

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +48 -18
app.py CHANGED
@@ -1,35 +1,65 @@
1
  import gradio as gr
 
2
  from transformers import AutoTokenizer, AutoModelForCausalLM
3
  from peft import PeftModel
4
 
5
- # 1) Load your tokenizer
6
- tokenizer = AutoTokenizer.from_pretrained("testi123456789/elektromart")
 
 
7
 
8
- # 2) Load the base model onto CPU (full-precision)
9
- base = AutoModelForCausalLM.from_pretrained("finnish-nlp/ahma-3b")
 
 
 
10
 
11
- # 3) Apply your LoRA adapter without any device dispatching
12
  model = PeftModel.from_pretrained(
13
- base,
14
- "testi123456789/elektromart",
15
- device_map=None
16
  )
17
  model.to("cpu")
18
  model.eval()
19
 
20
- # 4) Define the chat function
21
- def chat_fn(prompt):
 
 
 
 
 
 
 
22
  inputs = tokenizer(prompt, return_tensors="pt")
23
  inputs.pop("token_type_ids", None)
24
  inputs = {k: v.to("cpu") for k, v in inputs.items()}
25
- outputs = model.generate(**inputs, max_new_tokens=100)
26
- return tokenizer.decode(outputs[0], skip_special_tokens=True)
27
 
 
 
 
 
 
 
 
 
 
28
 
29
- # 5) Launch Gradio
30
- gr.Interface(
 
 
 
 
 
31
  fn=chat_fn,
32
- inputs=gr.Textbox(placeholder="Kysy jotain…"),
33
- outputs="text",
34
- title="ElektroMart Chatbot"
35
- ).launch()
 
 
 
 
 
 
 
1
  import gradio as gr
2
+ import torch
3
  from transformers import AutoTokenizer, AutoModelForCausalLM
4
  from peft import PeftModel
5
 
6
+ # 1) Load tokenizer and base model on CPU (or GPU if available)
7
+ tokenizer = AutoTokenizer.from_pretrained("finnish-nlp/ahma-3b")
8
+ if tokenizer.pad_token is None:
9
+ tokenizer.pad_token = tokenizer.eos_token
10
 
11
+ base_model = AutoModelForCausalLM.from_pretrained(
12
+ "finnish-nlp/ahma-3b",
13
+ torch_dtype=torch.float32,
14
+ device_map={"": "cpu"}
15
+ )
16
 
17
+ # 2) Apply your fine-tuned LoRA adapter
18
  model = PeftModel.from_pretrained(
19
+ base_model,
20
+ "testi123456789/elektromart"
 
21
  )
22
  model.to("cpu")
23
  model.eval()
24
 
25
+ # 3) Instruction you fine-tuned on
26
+ INSTRUCTION = "Vastaa asiakkaan kyselyyn ystävällisesti ElektroMartin asiakaspalveluna."
27
+
28
+ def chat_fn(user_question: str, max_new_tokens: int = 100,
29
+ temperature: float = 0.7, repetition_penalty: float = 1.25) -> str:
30
+ # 4) Build the prompt exactly as during training
31
+ prompt = f"[INST] {INSTRUCTION}\n{user_question} [/INST]\n"
32
+
33
+ # 5) Tokenize & clean up
34
  inputs = tokenizer(prompt, return_tensors="pt")
35
  inputs.pop("token_type_ids", None)
36
  inputs = {k: v.to("cpu") for k, v in inputs.items()}
 
 
37
 
38
+ # 6) Generate
39
+ with torch.no_grad():
40
+ outputs = model.generate(
41
+ **inputs,
42
+ max_new_tokens=max_new_tokens,
43
+ pad_token_id=tokenizer.eos_token_id,
44
+ do_sample=True,
45
+ repetition_penalty=repetition_penalty
46
+ )
47
 
48
+ # 7) Decode only the newly generated part
49
+ generated = outputs[0][ inputs["input_ids"].shape[-1] : ]
50
+ answer = tokenizer.decode(generated, skip_special_tokens=True)
51
+ return answer.strip()
52
+
53
+ # 8) Expose Gradio interface
54
+ iface = gr.Interface(
55
  fn=chat_fn,
56
+ inputs=[
57
+ gr.Textbox(label="Kysy jotain…", placeholder="Kirjoita kysymyksesi tähän"),
58
+ ],
59
+ outputs=gr.Textbox(label="Vastaus"),
60
+ title="ElektroMartin Chatbotti"
61
+ )
62
+
63
+ if __name__ == "__main__":
64
+ iface.launch()
65
+