Synced repo using 'sync_with_huggingface' Github Action
Browse files- gradio_app.py +50 -13
gradio_app.py
CHANGED
|
@@ -230,6 +230,7 @@ with gr.Blocks(title="Surya") as demo:
|
|
| 230 |
in_num = gr.Slider(label="Page number", minimum=1, maximum=100, value=1, step=1)
|
| 231 |
in_img = gr.Image(label="Select page of Image", type="pil", sources=None)
|
| 232 |
|
|
|
|
| 233 |
text_det_btn = gr.Button("Run Text Detection")
|
| 234 |
layout_det_btn = gr.Button("Run Layout Analysis")
|
| 235 |
|
|
@@ -240,12 +241,28 @@ with gr.Blocks(title="Surya") as demo:
|
|
| 240 |
|
| 241 |
skip_table_detection_ckb = gr.Checkbox(label="Skip table detection", value=False, info="Table recognition only: Skip table detection and treat the whole image/page as a table.")
|
| 242 |
table_rec_btn = gr.Button("Run Table Rec")
|
| 243 |
-
|
| 244 |
-
ocr_errors_btn = gr.Button("Run bad PDF text detection")
|
| 245 |
with gr.Column():
|
| 246 |
-
result_img = gr.
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 247 |
result_json = gr.JSON(label="Result json")
|
| 248 |
-
ocr_boxes_img = gr.Image(label="OCR boxes image")
|
| 249 |
|
| 250 |
def show_image(file, num=1):
|
| 251 |
if file.endswith('.pdf'):
|
|
@@ -273,8 +290,12 @@ with gr.Blocks(title="Surya") as demo:
|
|
| 273 |
|
| 274 |
# Run Text Detection
|
| 275 |
def text_det_img(pil_image):
|
| 276 |
-
det_img,
|
| 277 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 278 |
text_det_btn.click(
|
| 279 |
fn=text_det_img,
|
| 280 |
inputs=[in_img],
|
|
@@ -284,7 +305,11 @@ with gr.Blocks(title="Surya") as demo:
|
|
| 284 |
# Run layout
|
| 285 |
def layout_det_img(pil_image):
|
| 286 |
layout_img, pred = layout_detection(pil_image)
|
| 287 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 288 |
layout_det_btn.click(
|
| 289 |
fn=layout_det_img,
|
| 290 |
inputs=[in_img],
|
|
@@ -304,11 +329,16 @@ with gr.Blocks(title="Surya") as demo:
|
|
| 304 |
recognize_math,
|
| 305 |
with_bboxes=ocr_with_boxes,
|
| 306 |
)
|
| 307 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 308 |
text_rec_btn.click(
|
| 309 |
fn=text_rec_img,
|
| 310 |
inputs=[in_img, in_file, in_num, skip_text_detection_ckb, recognize_math_ckb, ocr_with_boxes_ckb],
|
| 311 |
-
outputs=[result_img, result_json
|
| 312 |
)
|
| 313 |
|
| 314 |
# Run Table Recognition
|
|
@@ -318,7 +348,11 @@ with gr.Blocks(title="Surya") as demo:
|
|
| 318 |
else:
|
| 319 |
pil_image_highres = pil_image
|
| 320 |
table_img, pred = table_recognition(pil_image, pil_image_highres, skip_table_detection)
|
| 321 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 322 |
table_rec_btn.click(
|
| 323 |
fn=table_rec_img,
|
| 324 |
inputs=[in_img, in_file, in_num, skip_table_detection_ckb],
|
|
@@ -331,12 +365,15 @@ with gr.Blocks(title="Surya") as demo:
|
|
| 331 |
raise gr.Error("This feature only works with PDFs.", duration=5)
|
| 332 |
page_count = page_counter(in_file)
|
| 333 |
io_file = io.BytesIO(open(in_file.name, "rb").read())
|
| 334 |
-
|
| 335 |
-
return
|
|
|
|
|
|
|
|
|
|
| 336 |
ocr_errors_btn.click(
|
| 337 |
fn=ocr_errors_pdf,
|
| 338 |
inputs=[in_file],
|
| 339 |
-
outputs=[result_json]
|
| 340 |
)
|
| 341 |
|
| 342 |
if __name__ == "__main__":
|
|
|
|
| 230 |
in_num = gr.Slider(label="Page number", minimum=1, maximum=100, value=1, step=1)
|
| 231 |
in_img = gr.Image(label="Select page of Image", type="pil", sources=None)
|
| 232 |
|
| 233 |
+
ocr_errors_btn = gr.Button("Run bad PDF text detection")
|
| 234 |
text_det_btn = gr.Button("Run Text Detection")
|
| 235 |
layout_det_btn = gr.Button("Run Layout Analysis")
|
| 236 |
|
|
|
|
| 241 |
|
| 242 |
skip_table_detection_ckb = gr.Checkbox(label="Skip table detection", value=False, info="Table recognition only: Skip table detection and treat the whole image/page as a table.")
|
| 243 |
table_rec_btn = gr.Button("Run Table Rec")
|
|
|
|
|
|
|
| 244 |
with gr.Column():
|
| 245 |
+
result_img = gr.Gallery(label="Result images", show_label=True,
|
| 246 |
+
elem_id="gallery", columns=[1], rows=[2], object_fit="contain", height="auto")
|
| 247 |
+
|
| 248 |
+
gr.HTML("""
|
| 249 |
+
<style>
|
| 250 |
+
#gallery {
|
| 251 |
+
height: auto !important;
|
| 252 |
+
max-height: none !important;
|
| 253 |
+
overflow: visible !important;
|
| 254 |
+
}
|
| 255 |
+
#gallery .gallery-item {
|
| 256 |
+
flex-direction: column !important;
|
| 257 |
+
}
|
| 258 |
+
#gallery .gallery-item img {
|
| 259 |
+
width: 100% !important;
|
| 260 |
+
height: auto !important;
|
| 261 |
+
object-fit: contain !important;
|
| 262 |
+
}
|
| 263 |
+
</style>
|
| 264 |
+
""")
|
| 265 |
result_json = gr.JSON(label="Result json")
|
|
|
|
| 266 |
|
| 267 |
def show_image(file, num=1):
|
| 268 |
if file.endswith('.pdf'):
|
|
|
|
| 290 |
|
| 291 |
# Run Text Detection
|
| 292 |
def text_det_img(pil_image):
|
| 293 |
+
det_img, pred = text_detection(pil_image)
|
| 294 |
+
det_json = pred.model_dump(exclude=["heatmap", "affinity_map"])
|
| 295 |
+
return (
|
| 296 |
+
gr.update(label="Result image: text detected", value=[det_img], rows=[1], height=det_img.height),
|
| 297 |
+
gr.update(label="Result json: " + str(len(det_json['bboxes'])) + " text boxes detected", value=det_json)
|
| 298 |
+
)
|
| 299 |
text_det_btn.click(
|
| 300 |
fn=text_det_img,
|
| 301 |
inputs=[in_img],
|
|
|
|
| 305 |
# Run layout
|
| 306 |
def layout_det_img(pil_image):
|
| 307 |
layout_img, pred = layout_detection(pil_image)
|
| 308 |
+
layout_json = pred.model_dump(exclude=["segmentation_map"])
|
| 309 |
+
return (
|
| 310 |
+
gr.update(label="Result image: layout detected", value=[layout_img], rows=[1], height=layout_img.height),
|
| 311 |
+
gr.update(label="Result json: " + str(len(layout_json['bboxes'])) + " layout labels detected", value=layout_json)
|
| 312 |
+
)
|
| 313 |
layout_det_btn.click(
|
| 314 |
fn=layout_det_img,
|
| 315 |
inputs=[in_img],
|
|
|
|
| 329 |
recognize_math,
|
| 330 |
with_bboxes=ocr_with_boxes,
|
| 331 |
)
|
| 332 |
+
text_img = [(rec_img, "Text"), (box_img, "Boxes")]
|
| 333 |
+
text_json = pred.model_dump()
|
| 334 |
+
return (
|
| 335 |
+
gr.update(label="Result image: text recognized", value=text_img, rows=[2], height=rec_img.height + box_img.height),
|
| 336 |
+
gr.update(label="Result json: " + str(len(text_json['text_lines'])) + " text lines recognized", value=text_json)
|
| 337 |
+
)
|
| 338 |
text_rec_btn.click(
|
| 339 |
fn=text_rec_img,
|
| 340 |
inputs=[in_img, in_file, in_num, skip_text_detection_ckb, recognize_math_ckb, ocr_with_boxes_ckb],
|
| 341 |
+
outputs=[result_img, result_json]
|
| 342 |
)
|
| 343 |
|
| 344 |
# Run Table Recognition
|
|
|
|
| 348 |
else:
|
| 349 |
pil_image_highres = pil_image
|
| 350 |
table_img, pred = table_recognition(pil_image, pil_image_highres, skip_table_detection)
|
| 351 |
+
table_json = [p.model_dump() for p in pred]
|
| 352 |
+
return (
|
| 353 |
+
gr.update(label="Result image: table recognized", value=[table_img], rows=[1], height=table_img.height),
|
| 354 |
+
gr.update(label="Result json: " + str(len(table_json)) + " table tree recognized", value=table_json)
|
| 355 |
+
)
|
| 356 |
table_rec_btn.click(
|
| 357 |
fn=table_rec_img,
|
| 358 |
inputs=[in_img, in_file, in_num, skip_table_detection_ckb],
|
|
|
|
| 365 |
raise gr.Error("This feature only works with PDFs.", duration=5)
|
| 366 |
page_count = page_counter(in_file)
|
| 367 |
io_file = io.BytesIO(open(in_file.name, "rb").read())
|
| 368 |
+
layout_label, layout_json = ocr_errors(io_file, page_count)
|
| 369 |
+
return (
|
| 370 |
+
gr.update(label="Result image: NONE", value=None),
|
| 371 |
+
gr.update(label="Result json: " + layout_label, value=layout_json)
|
| 372 |
+
)
|
| 373 |
ocr_errors_btn.click(
|
| 374 |
fn=ocr_errors_pdf,
|
| 375 |
inputs=[in_file],
|
| 376 |
+
outputs=[result_img, result_json]
|
| 377 |
)
|
| 378 |
|
| 379 |
if __name__ == "__main__":
|