Use this to generate Title of the image in Nandalal Bose style
import torch
from transformers import BlipProcessor, BlipForConditionalGeneration
from PIL import Image
from huggingface_hub import login
# Optional: Login if your repo is Private. If Public, you can skip this.
# HF_TOKEN = "hf_..."
# login(token=HF_TOKEN)
MODEL_REPO_ID = "Aarnb/Image_Title_as_Nandalal"
device = "cuda" if torch.cuda.is_available() else "cpu"
print(f"Using device: {device}")
# LOAD MODEL & PROCESSOR
print(f"Loading model from {MODEL_REPO_ID}...")
# Load the processor (handles image resizing and normalization)
processor = BlipProcessor.from_pretrained(MODEL_REPO_ID)
# Load the model (the weights you trained)
model = BlipForConditionalGeneration.from_pretrained(MODEL_REPO_ID).to(device)
model.eval() # Set to evaluation mode
print("Model loaded successfully!")
# PREDICTION FUNCTION
def predict_description(image_path):
"""
Takes an image path and returns the generated description.
"""
try:
raw_image = Image.open(image_path).convert('RGB')
# Note: We do NOT pass text here, because we want the model to generate it.
inputs = processor(images=raw_image, return_tensors="pt").to(device)
# max_new_tokens: Controls how long the generated description is
outputs = model.generate(
**inputs,
max_new_tokens=50,
min_length=5,
num_beams=5, # Using beam search for better quality
early_stopping=True
)
description = processor.decode(outputs[0], skip_special_tokens=True)
return description
except Exception as e:
return f"Error: {e}"
# Replace this with the path to the image you want to test
img_path = "img26.jpg"
print("-" * 30)
print(f"Analyzing: {img_path}")
result = predict_description(img_path)
print(f"Predicted Description: {result}")
print("-" * 30)
- Downloads last month
- 26
Inference Providers
NEW
This model isn't deployed by any Inference Provider.
๐
Ask for provider support