# Florence2 Image Annotator This is a custom block designed to annotate images via text prompts using the [Florence2]("https://huggingface.co/microsoft/Florence-2-large") model. The model can be used as a processor to generate inpainting masks or bounding box annotations. # How to use ```python import torch from diffusers.modular_pipelines import ModularPipelineBlocks, SequentialPipelineBlocks from diffusers.modular_pipelines.stable_diffusion_xl import INPAINT_BLOCKS from diffusers.utils import load_image # fetch the Florence2 image annotator block that will create our mask image_annotator_block = ModularPipelineBlocks.from_pretrained("diffusers/florence2-image-annotator", trust_remote_code=True) my_blocks = INPAINT_BLOCKS.copy() # insert the annotation block before the image encoding step my_blocks.insert("image_annotator", image_annotator_block, 1) # Create our initial set of inpainting blocks blocks = SequentialPipelineBlocks.from_blocks_dict(my_blocks) repo_id = "diffusers-internal-dev/modular-sdxl-inpainting" pipe = blocks.init_pipeline(repo_id) pipe.load_default_components(torch_dtype=torch.float16, device_map="cuda", trust_remote_code=True) image = load_image("https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/transformers/tasks/car.jpg?download=true") image = image.resize((1024, 1024)) prompt = ["A red car"] annotation_task = "" annotation_prompt = ["the car"] output = pipe( prompt=prompt, image=image, annotation_task=annotation_task, annotation_prompt=annotation_prompt, annotation_output_type="mask_image", num_inference_steps=35, guidance_scale=7.5, strength=0.95, output_type="pil", ) ```