Spaces:
Sleeping
Sleeping
| import gradio as gr | |
| import torch | |
| from transformers import AutoTokenizer, AutoModelForCausalLM | |
| from peft import PeftModel | |
| # 1) Load tokenizer and base model on CPU (or GPU if available) | |
| tokenizer = AutoTokenizer.from_pretrained("finnish-nlp/ahma-3b") | |
| if tokenizer.pad_token is None: | |
| tokenizer.pad_token = tokenizer.eos_token | |
| base_model = AutoModelForCausalLM.from_pretrained( | |
| "finnish-nlp/ahma-3b", | |
| torch_dtype=torch.float32, | |
| device_map={"": "cpu"} | |
| ) | |
| # 2) Apply your fine-tuned LoRA adapter | |
| model = PeftModel.from_pretrained( | |
| base_model, | |
| "testi123456789/elektromart" | |
| ) | |
| model.to("cpu") | |
| model.eval() | |
| # 3) Instruction you fine-tuned on | |
| INSTRUCTION = "Vastaa asiakkaan kyselyyn ystävällisesti ElektroMartin asiakaspalveluna." | |
| def chat_fn(user_question: str, max_new_tokens: int = 100, | |
| temperature: float = 0.7, repetition_penalty: float = 1.25) -> str: | |
| # 4) Build the prompt exactly as during training | |
| prompt = f"[INST] {INSTRUCTION}\n{user_question} [/INST]\n" | |
| # 5) Tokenize & clean up | |
| inputs = tokenizer(prompt, return_tensors="pt") | |
| inputs.pop("token_type_ids", None) | |
| inputs = {k: v.to("cpu") for k, v in inputs.items()} | |
| # 6) Generate | |
| with torch.no_grad(): | |
| outputs = model.generate( | |
| **inputs, | |
| max_new_tokens=max_new_tokens, | |
| pad_token_id=tokenizer.eos_token_id, | |
| do_sample=True, | |
| repetition_penalty=repetition_penalty | |
| ) | |
| # 7) Decode only the newly generated part | |
| generated = outputs[0][ inputs["input_ids"].shape[-1] : ] | |
| answer = tokenizer.decode(generated, skip_special_tokens=True) | |
| return answer.strip() | |
| # 8) Expose Gradio interface | |
| iface = gr.Interface( | |
| fn=chat_fn, | |
| inputs=[ | |
| gr.Textbox(label="Kysy jotain…", placeholder="Kirjoita kysymyksesi tähän"), | |
| ], | |
| outputs=gr.Textbox(label="Vastaus"), | |
| title="ElektroMartin Chatbotti" | |
| ) | |
| if __name__ == "__main__": | |
| iface.launch() | |