Spaces:
Sleeping
Sleeping
| # import os | |
| # from transformers import AutoModelForCausalLM, AutoTokenizer | |
| # import torch | |
| # # Correct model name | |
| # MODEL_NAME = "bigcode/starcoder" | |
| # # Ensure the token is provided | |
| # HF_TOKEN = os.getenv("HUGGINGFACE_TOKEN") | |
| # if not HF_TOKEN: | |
| # raise ValueError("Missing Hugging Face token. Set HUGGINGFACE_TOKEN as an environment variable.") | |
| # # Set device | |
| # device = "cuda" if torch.cuda.is_available() else "cpu" | |
| # # Load tokenizer with authentication | |
| # tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME, token=HF_TOKEN) | |
| # # Load model with optimizations | |
| # model = AutoModelForCausalLM.from_pretrained( | |
| # MODEL_NAME, | |
| # token=HF_TOKEN, | |
| # torch_dtype=torch.float16, # Reduce memory usage | |
| # low_cpu_mem_usage=True, # Optimize loading | |
| # device_map="auto", # Automatic device placement | |
| # offload_folder="offload" # Offload to disk if needed | |
| # ).to(device) | |
| # def generate_code(prompt: str, max_tokens: int = 256): | |
| # """Generates code based on the input prompt.""" | |
| # if not prompt.strip(): | |
| # return "Error: Empty prompt provided." | |
| # inputs = tokenizer(prompt, return_tensors="pt").to(device) | |
| # output = model.generate(**inputs, max_new_tokens=max_tokens) | |
| # return tokenizer.decode(output[0], skip_special_tokens=True) | |
| import os | |
| from transformers import AutoModelForCausalLM, AutoTokenizer, BitsAndBytesConfig | |
| import torch | |
| MODEL_NAME = "bigcode/starcoderbase-1b" # Lighter version | |
| HF_TOKEN = os.getenv("HUGGINGFACE_TOKEN") | |
| quant_config = BitsAndBytesConfig( | |
| load_in_4bit=True, | |
| bnb_4bit_quant_type="nf4", | |
| bnb_4bit_compute_dtype=torch.float16 | |
| ) | |
| tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME, token=HF_TOKEN) | |
| model = AutoModelForCausalLM.from_pretrained( | |
| MODEL_NAME, | |
| token=HF_TOKEN, | |
| quantization_config=quant_config, | |
| device_map="auto", | |
| trust_remote_code=True | |
| ) | |
| def generate_code(prompt: str, max_tokens: int = 256): | |
| inputs = tokenizer(prompt, return_tensors="pt").to(model.device) | |
| output = model.generate(**inputs, max_new_tokens=max_tokens) | |
| return tokenizer.decode(output[0], skip_special_tokens=True) |