Spaces:

Nemil
/

ImageCaptionGenerator

Runtime error

App Files Files Community

Nemil commited on Apr 17, 2024

Commit

7927ce3

verified ·

1 Parent(s): 8c2556c

Upload app.py

Browse files

Files changed (1) hide show

app.py +53 -48

app.py CHANGED Viewed

@@ -7,7 +7,7 @@ def install(package):
 install("evaluate")
 install("jiwer")
 install("huggingface_hub")
-install("gradio==3.36.0")
 install("bitsandbytes")
 install("git+https://github.com/huggingface/transformers.git")
 install("git+https://github.com/huggingface/peft.git")
@@ -17,8 +17,6 @@ install("safetensors")
 install("torch")
 install("xformers")
 install("datasets")
-install("stable-diffusion")
-install("accelerate")
 from transformers import AutoProcessor, AutoModelForCausalLM, BitsAndBytesConfig
 import torch
@@ -132,7 +130,6 @@ from peft import (
 from transformers import AutoTokenizer, AutoModelForCausalLM, BitsAndBytesConfig, AutoConfig
 from peft import LoraConfig, get_peft_model
 os.environ["CUDA_VISIBLE_DEVICES"] = "0"
 class Social_Media_Captioner:
@@ -157,52 +154,58 @@ class Social_Media_Captioner:
     def _load_model(self):
-        self.bnb_config = BitsAndBytesConfig(
-            load_in_4bit = True,
-            bnb_4bit_use_double_quant = True,
-            bnb_4bit_quant_type= "nf4",
-            bnb_4bit_compute_dtype=torch.bfloat16,
-            )
-        self.model = AutoModelForCausalLM.from_pretrained(
-            self.MODEL_NAME,
-            device_map = "auto",
-            trust_remote_code = True,
-            quantization_config = self.bnb_config
-            )
-        # Defining the tokenizers
-        self.tokenizer = AutoTokenizer.from_pretrained(self.MODEL_NAME)
-        self.tokenizer.pad_token = self.tokenizer.eos_token
-        if self.use_finetuned:
-            # LORA Config Model
-            self.lora_config = LoraConfig(
-                r=16,
-                lora_alpha=32,
-                target_modules=["query_key_value"],
-                lora_dropout=0.05,
-                bias="none",
-                task_type="CAUSAL_LM"
-            )
-            self.model = get_peft_model(self.model, self.lora_config)
-            # Fitting the adapters
-            self.peft_config = PeftConfig.from_pretrained(self.peft_model_name)
             self.model = AutoModelForCausalLM.from_pretrained(
-                self.peft_config.base_model_name_or_path,
-                return_dict = True,
-                quantization_config = self.bnb_config,
-                device_map= "auto",
-                trust_remote_code = True
                 )
-            self.model = PeftModel.from_pretrained(self.model, self.peft_model_name)
             # Defining the tokenizers
-            self.tokenizer = AutoTokenizer.from_pretrained(self.peft_config.base_model_name_or_path)
             self.tokenizer.pad_token = self.tokenizer.eos_token
-        self.model_loaded = True
-        print("Model Loaded successfully")
     def inference(self, input_text: str, use_cached=True, cache_generation=True) -> str | None:
         if not self.model_loaded:
@@ -246,7 +249,7 @@ class Social_Media_Captioner:
             raise Exception("Enter a valid input text to generate a valid prompt")
         return f"""
-            Convert the given image description to a appropriate metaphoric caption
             Description: {input_text}
             Caption:
             """.strip()
@@ -295,18 +298,20 @@ class Captions:
             image_description = self.image_to_text._generate_description(image, max_length=max_length_GIT)
         captions = self.LLM.inference(image_description, use_cached=use_cached_LLM, cache_generation=cache_generation_LLM)
         return captions
 caption_generator = Captions()
 import gradio as gr
 def setup(image):
     return caption_generator.generate_captions(image = image)
 iface = gr.Interface(
     fn=setup,
-    inputs=gr.inputs.Image(type="pil", label="Upload Image"),
-    outputs=gr.outputs.Textbox(label="Caption")
 )
 iface.launch()

 install("evaluate")
 install("jiwer")
 install("huggingface_hub")
+install("gradio")
 install("bitsandbytes")
 install("git+https://github.com/huggingface/transformers.git")
 install("git+https://github.com/huggingface/peft.git")
 install("torch")
 install("xformers")
 install("datasets")
 from transformers import AutoProcessor, AutoModelForCausalLM, BitsAndBytesConfig
 import torch
 from transformers import AutoTokenizer, AutoModelForCausalLM, BitsAndBytesConfig, AutoConfig
 from peft import LoraConfig, get_peft_model
 os.environ["CUDA_VISIBLE_DEVICES"] = "0"
 class Social_Media_Captioner:
     def _load_model(self):
+        try:
+            self.bnb_config = BitsAndBytesConfig(
+                load_in_4bit = True,
+                bnb_4bit_use_double_quant = True,
+                bnb_4bit_quant_type= "nf4",
+                bnb_4bit_compute_dtype=torch.bfloat16,
+                )
             self.model = AutoModelForCausalLM.from_pretrained(
+                self.MODEL_NAME,
+                device_map = "auto",
+                trust_remote_code = True,
+                quantization_config = self.bnb_config
                 )
             # Defining the tokenizers
+            self.tokenizer = AutoTokenizer.from_pretrained(self.MODEL_NAME)
             self.tokenizer.pad_token = self.tokenizer.eos_token
+            if self.use_finetuned:
+                # LORA Config Model
+                self.lora_config = LoraConfig(
+                    r=16,
+                    lora_alpha=32,
+                    target_modules=["query_key_value"],
+                    lora_dropout=0.05,
+                    bias="none",
+                    task_type="CAUSAL_LM"
+                )
+                self.model = get_peft_model(self.model, self.lora_config)
+                # Fitting the adapters
+                self.peft_config = PeftConfig.from_pretrained(self.peft_model_name)
+                self.model = AutoModelForCausalLM.from_pretrained(
+                    self.peft_config.base_model_name_or_path,
+                    return_dict = True,
+                    quantization_config = self.bnb_config,
+                    device_map= "auto",
+                    trust_remote_code = True
+                    )
+                self.model = PeftModel.from_pretrained(self.model, self.peft_model_name)
+                # Defining the tokenizers
+                self.tokenizer = AutoTokenizer.from_pretrained(self.peft_config.base_model_name_or_path)
+                self.tokenizer.pad_token = self.tokenizer.eos_token
+            self.model_loaded = True
+            print("Model Loaded successfully")
+        except Exception as e:
+            print(e)
+            self.model_loaded = False
     def inference(self, input_text: str, use_cached=True, cache_generation=True) -> str | None:
         if not self.model_loaded:
             raise Exception("Enter a valid input text to generate a valid prompt")
         return f"""
+            Convert the given image description to social media worthy metaphoric caption
             Description: {input_text}
             Caption:
             """.strip()
             image_description = self.image_to_text._generate_description(image, max_length=max_length_GIT)
         captions = self.LLM.inference(image_description, use_cached=use_cached_LLM, cache_generation=cache_generation_LLM)
         return captions
 caption_generator = Captions()
 import gradio as gr
 def setup(image):
+    # Assuming `caption_generator.generate_captions` is your function to generate captions.
+    # This is just a placeholder for your actual caption generation logic.
     return caption_generator.generate_captions(image = image)
 iface = gr.Interface(
     fn=setup,
+    inputs=gr.Image(type="pil", label="Upload Image"),  # Updated usage here
+    outputs="text"  # Simplified usage here
 )
 iface.launch()