Spaces:
Running
on
Zero
Running
on
Zero
Update app.py
Browse files
app.py
CHANGED
|
@@ -8,6 +8,42 @@ import tempfile
|
|
| 8 |
|
| 9 |
@spaces.GPU
|
| 10 |
def yolo_inference(input_type, image, video, model_id, conf_threshold, iou_threshold, max_detection):
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 11 |
if input_type == "Image":
|
| 12 |
if image is None:
|
| 13 |
width, height = 640, 480
|
|
@@ -23,7 +59,6 @@ def yolo_inference(input_type, image, video, model_id, conf_threshold, iou_thres
|
|
| 23 |
draw.text((text_x, text_y), message, fill="black", font=font)
|
| 24 |
return blank_image, None
|
| 25 |
|
| 26 |
-
model = YOLO(model_id)
|
| 27 |
results = model.predict(
|
| 28 |
source=image,
|
| 29 |
conf=conf_threshold,
|
|
@@ -59,7 +94,6 @@ def yolo_inference(input_type, image, video, model_id, conf_threshold, iou_thres
|
|
| 59 |
out.release()
|
| 60 |
return None, temp_video_file
|
| 61 |
|
| 62 |
-
model = YOLO(model_id)
|
| 63 |
cap = cv2.VideoCapture(video)
|
| 64 |
fps = cap.get(cv2.CAP_PROP_FPS) if cap.get(cv2.CAP_PROP_FPS) > 0 else 25
|
| 65 |
frames = []
|
|
@@ -100,7 +134,17 @@ def yolo_inference(input_type, image, video, model_id, conf_threshold, iou_thres
|
|
| 100 |
|
| 101 |
def update_visibility(input_type):
|
| 102 |
"""
|
| 103 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 104 |
"""
|
| 105 |
if input_type == "Image":
|
| 106 |
# image, video, output_image, output_video
|
|
@@ -110,9 +154,20 @@ def update_visibility(input_type):
|
|
| 110 |
|
| 111 |
def yolo_inference_for_examples(image, model_id, conf_threshold, iou_threshold, max_detection):
|
| 112 |
"""
|
| 113 |
-
|
| 114 |
-
|
| 115 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 116 |
"""
|
| 117 |
annotated_image, _ = yolo_inference(
|
| 118 |
input_type="Image",
|
|
@@ -190,4 +245,4 @@ with gr.Blocks() as app:
|
|
| 190 |
)
|
| 191 |
|
| 192 |
if __name__ == '__main__':
|
| 193 |
-
app.launch()
|
|
|
|
| 8 |
|
| 9 |
@spaces.GPU
|
| 10 |
def yolo_inference(input_type, image, video, model_id, conf_threshold, iou_threshold, max_detection):
|
| 11 |
+
"""
|
| 12 |
+
Performs object detection, instance segmentation, pose estimation,
|
| 13 |
+
oriented object detection, or classification using a YOLOv11 model
|
| 14 |
+
on either an image or a video.
|
| 15 |
+
|
| 16 |
+
This function loads the specified YOLOv11 model and applies it to the
|
| 17 |
+
provided input. For images, it returns an annotated image. For videos, it
|
| 18 |
+
processes each frame and returns an annotated video. It includes error
|
| 19 |
+
handling for missing inputs, returning blank outputs with informative messages.
|
| 20 |
+
|
| 21 |
+
Args:
|
| 22 |
+
input_type (str): Specifies the input type, either "Image" or "Video".
|
| 23 |
+
image (PIL.Image.Image or None): The input image if `input_type` is "Image".
|
| 24 |
+
None otherwise.
|
| 25 |
+
video (str or None): The path to the input video file if `input_type` is "Video".
|
| 26 |
+
None otherwise.
|
| 27 |
+
model_id (str): The identifier of the YOLOv11 model to use
|
| 28 |
+
(e.g., 'yolo11n.pt', 'yolo11s-seg.pt').
|
| 29 |
+
conf_threshold (float): The confidence threshold for object detection.
|
| 30 |
+
Detections with lower confidence are discarded.
|
| 31 |
+
iou_threshold (float): The Intersection over Union (IoU) threshold for
|
| 32 |
+
Non-Maximum Suppression (NMS). This is relevant
|
| 33 |
+
for detection and segmentation tasks.
|
| 34 |
+
max_detection (int): The maximum number of detections to return per image or frame.
|
| 35 |
+
|
| 36 |
+
Returns:
|
| 37 |
+
tuple: A tuple containing two elements:
|
| 38 |
+
- PIL.Image.Image or None: The annotated image if `input_type` was "Image",
|
| 39 |
+
otherwise None.
|
| 40 |
+
- str or None: The path to the annotated video file if `input_type` was "Video",
|
| 41 |
+
otherwise None.
|
| 42 |
+
"""
|
| 43 |
+
# For YOLOv11, the model_id can directly be used by YOLO() as they are often
|
| 44 |
+
# pre-trained weights included with the Ultralytics package.
|
| 45 |
+
model = YOLO(model_id)
|
| 46 |
+
|
| 47 |
if input_type == "Image":
|
| 48 |
if image is None:
|
| 49 |
width, height = 640, 480
|
|
|
|
| 59 |
draw.text((text_x, text_y), message, fill="black", font=font)
|
| 60 |
return blank_image, None
|
| 61 |
|
|
|
|
| 62 |
results = model.predict(
|
| 63 |
source=image,
|
| 64 |
conf=conf_threshold,
|
|
|
|
| 94 |
out.release()
|
| 95 |
return None, temp_video_file
|
| 96 |
|
|
|
|
| 97 |
cap = cv2.VideoCapture(video)
|
| 98 |
fps = cap.get(cv2.CAP_PROP_FPS) if cap.get(cv2.CAP_PROP_FPS) > 0 else 25
|
| 99 |
frames = []
|
|
|
|
| 134 |
|
| 135 |
def update_visibility(input_type):
|
| 136 |
"""
|
| 137 |
+
Adjusts the visibility of Gradio components based on the selected input type.
|
| 138 |
+
|
| 139 |
+
This function dynamically shows or hides the image and video input/output
|
| 140 |
+
components in the Gradio interface to ensure only relevant fields are visible.
|
| 141 |
+
|
| 142 |
+
Args:
|
| 143 |
+
input_type (str): The selected input type, either "Image" or "Video".
|
| 144 |
+
|
| 145 |
+
Returns:
|
| 146 |
+
tuple: A tuple of `gr.update` objects for the visibility of:
|
| 147 |
+
(image input, video input, image output, video output).
|
| 148 |
"""
|
| 149 |
if input_type == "Image":
|
| 150 |
# image, video, output_image, output_video
|
|
|
|
| 154 |
|
| 155 |
def yolo_inference_for_examples(image, model_id, conf_threshold, iou_threshold, max_detection):
|
| 156 |
"""
|
| 157 |
+
Wrapper function for `yolo_inference` specifically for Gradio examples that use images.
|
| 158 |
+
|
| 159 |
+
This function simplifies the `yolo_inference` call for the `gr.Examples` component,
|
| 160 |
+
ensuring only image-based inference is performed for predefined examples.
|
| 161 |
+
|
| 162 |
+
Args:
|
| 163 |
+
image (PIL.Image.Image): The input image for the example.
|
| 164 |
+
model_id (str): The identifier of the YOLO model to use.
|
| 165 |
+
conf_threshold (float): The confidence threshold.
|
| 166 |
+
iou_threshold (float): The IoU threshold.
|
| 167 |
+
max_detection (int): The maximum number of detections.
|
| 168 |
+
|
| 169 |
+
Returns:
|
| 170 |
+
PIL.Image.Image or None: The annotated image. Returns None if no image is processed.
|
| 171 |
"""
|
| 172 |
annotated_image, _ = yolo_inference(
|
| 173 |
input_type="Image",
|
|
|
|
| 245 |
)
|
| 246 |
|
| 247 |
if __name__ == '__main__':
|
| 248 |
+
app.launch(mcp_server=True)
|