| # Florence2 Image Annotator | |
| This is a custom block designed to annotate images via text prompts using the [Florence2]("https://huggingface.co/microsoft/Florence-2-large") model. The model can be used as a processor to generate inpainting masks or bounding box annotations. | |
| # How to use | |
| ```python | |
| import torch | |
| from diffusers.modular_pipelines import ModularPipelineBlocks, SequentialPipelineBlocks | |
| from diffusers.modular_pipelines.stable_diffusion_xl import INPAINT_BLOCKS | |
| from diffusers.utils import load_image | |
| # fetch the Florence2 image annotator block that will create our mask | |
| image_annotator_block = ModularPipelineBlocks.from_pretrained("diffusers/florence2-image-annotator", trust_remote_code=True) | |
| my_blocks = INPAINT_BLOCKS.copy() | |
| # insert the annotation block before the image encoding step | |
| my_blocks.insert("image_annotator", image_annotator_block, 1) | |
| # Create our initial set of inpainting blocks | |
| blocks = SequentialPipelineBlocks.from_blocks_dict(my_blocks) | |
| repo_id = "diffusers-internal-dev/modular-sdxl-inpainting" | |
| pipe = blocks.init_pipeline(repo_id) | |
| pipe.load_default_components(torch_dtype=torch.float16, device_map="cuda", trust_remote_code=True) | |
| image = load_image("https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/transformers/tasks/car.jpg?download=true") | |
| image = image.resize((1024, 1024)) | |
| prompt = ["A red car"] | |
| annotation_task = "<REFERRING_EXPRESSION_SEGMENTATION>" | |
| annotation_prompt = ["the car"] | |
| output = pipe( | |
| prompt=prompt, | |
| image=image, | |
| annotation_task=annotation_task, | |
| annotation_prompt=annotation_prompt, | |
| annotation_output_type="mask_image", | |
| num_inference_steps=35, | |
| guidance_scale=7.5, | |
| strength=0.95, | |
| output_type="pil", | |
| ) | |
| ``` | |