Spaces:

atalaydenknalbant
/

Yolo11

Running on Zero

App Files Files Community

atalaydenknalbant commited on Jul 18, 2025

Commit

677ba31

verified ·

1 Parent(s): 6058ee6

Update app.py

Browse files

Files changed (1) hide show

app.py +62 -7

app.py CHANGED Viewed

@@ -8,6 +8,42 @@ import tempfile
 @spaces.GPU
 def yolo_inference(input_type, image, video, model_id, conf_threshold, iou_threshold, max_detection):
     if input_type == "Image":
         if image is None:
             width, height = 640, 480
@@ -23,7 +59,6 @@ def yolo_inference(input_type, image, video, model_id, conf_threshold, iou_thres
             draw.text((text_x, text_y), message, fill="black", font=font)
             return blank_image, None
-        model = YOLO(model_id)
         results = model.predict(
             source=image,
             conf=conf_threshold,
@@ -59,7 +94,6 @@ def yolo_inference(input_type, image, video, model_id, conf_threshold, iou_thres
             out.release()
             return None, temp_video_file
-        model = YOLO(model_id)
         cap = cv2.VideoCapture(video)
         fps = cap.get(cv2.CAP_PROP_FPS) if cap.get(cv2.CAP_PROP_FPS) > 0 else 25
         frames = []
@@ -100,7 +134,17 @@ def yolo_inference(input_type, image, video, model_id, conf_threshold, iou_thres
 def update_visibility(input_type):
     """
-    Show/hide image/video input and output depending on input_type.
     """
     if input_type == "Image":
         # image, video, output_image, output_video
@@ -110,9 +154,20 @@ def update_visibility(input_type):
 def yolo_inference_for_examples(image, model_id, conf_threshold, iou_threshold, max_detection):
     """
-    This is called by gr.Examples. We force the radio to 'Image'
-    and then do a standard image inference, returning both updated radio
-    value and the annotated image.
     """
     annotated_image, _ = yolo_inference(
         input_type="Image",
@@ -190,4 +245,4 @@ with gr.Blocks() as app:
     )
 if __name__ == '__main__':
-    app.launch()

 @spaces.GPU
 def yolo_inference(input_type, image, video, model_id, conf_threshold, iou_threshold, max_detection):
+    """
+    Performs object detection, instance segmentation, pose estimation,
+    oriented object detection, or classification using a YOLOv11 model
+    on either an image or a video.
+    This function loads the specified YOLOv11 model and applies it to the
+    provided input. For images, it returns an annotated image. For videos, it
+    processes each frame and returns an annotated video. It includes error
+    handling for missing inputs, returning blank outputs with informative messages.
+    Args:
+        input_type (str): Specifies the input type, either "Image" or "Video".
+        image (PIL.Image.Image or None): The input image if `input_type` is "Image".
+                                         None otherwise.
+        video (str or None): The path to the input video file if `input_type` is "Video".
+                             None otherwise.
+        model_id (str): The identifier of the YOLOv11 model to use
+                        (e.g., 'yolo11n.pt', 'yolo11s-seg.pt').
+        conf_threshold (float): The confidence threshold for object detection.
+                                Detections with lower confidence are discarded.
+        iou_threshold (float): The Intersection over Union (IoU) threshold for
+                               Non-Maximum Suppression (NMS). This is relevant
+                               for detection and segmentation tasks.
+        max_detection (int): The maximum number of detections to return per image or frame.
+    Returns:
+        tuple: A tuple containing two elements:
+            - PIL.Image.Image or None: The annotated image if `input_type` was "Image",
+                                       otherwise None.
+            - str or None: The path to the annotated video file if `input_type` was "Video",
+                           otherwise None.
+    """
+    # For YOLOv11, the model_id can directly be used by YOLO() as they are often
+    # pre-trained weights included with the Ultralytics package.
+    model = YOLO(model_id)
     if input_type == "Image":
         if image is None:
             width, height = 640, 480
             draw.text((text_x, text_y), message, fill="black", font=font)
             return blank_image, None
         results = model.predict(
             source=image,
             conf=conf_threshold,
             out.release()
             return None, temp_video_file
         cap = cv2.VideoCapture(video)
         fps = cap.get(cv2.CAP_PROP_FPS) if cap.get(cv2.CAP_PROP_FPS) > 0 else 25
         frames = []
 def update_visibility(input_type):
     """
+    Adjusts the visibility of Gradio components based on the selected input type.
+    This function dynamically shows or hides the image and video input/output
+    components in the Gradio interface to ensure only relevant fields are visible.
+    Args:
+        input_type (str): The selected input type, either "Image" or "Video".
+    Returns:
+        tuple: A tuple of `gr.update` objects for the visibility of:
+               (image input, video input, image output, video output).
     """
     if input_type == "Image":
         # image, video, output_image, output_video
 def yolo_inference_for_examples(image, model_id, conf_threshold, iou_threshold, max_detection):
     """
+    Wrapper function for `yolo_inference` specifically for Gradio examples that use images.
+    This function simplifies the `yolo_inference` call for the `gr.Examples` component,
+    ensuring only image-based inference is performed for predefined examples.
+    Args:
+        image (PIL.Image.Image): The input image for the example.
+        model_id (str): The identifier of the YOLO model to use.
+        conf_threshold (float): The confidence threshold.
+        iou_threshold (float): The IoU threshold.
+        max_detection (int): The maximum number of detections.
+    Returns:
+        PIL.Image.Image or None: The annotated image. Returns None if no image is processed.
     """
     annotated_image, _ = yolo_inference(
         input_type="Image",
     )
 if __name__ == '__main__':
+    app.launch(mcp_server=True)