Synced repo using 'sync_with_huggingface' Github Action
Browse files- gradio_app.py +31 -6
gradio_app.py
CHANGED
|
@@ -63,11 +63,21 @@ def run_ocr_errors(pdf_file, page_count, sample_len=512, max_samples=10, max_pag
|
|
| 63 |
return label, results.labels
|
| 64 |
|
| 65 |
# just copy from streamlit_app.py
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 66 |
def text_detection(img) -> (Image.Image, TextDetectionResult):
|
| 67 |
-
|
| 68 |
-
|
| 69 |
-
det_img = draw_polys_on_image(
|
| 70 |
-
return det_img,
|
| 71 |
|
| 72 |
# just copy from streamlit_app.py
|
| 73 |
def layout_detection(img) -> (Image.Image, LayoutResult):
|
|
@@ -178,6 +188,7 @@ with gr.Blocks(title="Surya") as demo:
|
|
| 178 |
in_img = gr.Image(label="Select page of Image", type="pil", sources=None)
|
| 179 |
|
| 180 |
text_det_btn = gr.Button("Run Text Detection")
|
|
|
|
| 181 |
layout_det_btn = gr.Button("Run Layout Analysis")
|
| 182 |
|
| 183 |
lang_dd = gr.Dropdown(label="Languages", choices=sorted(list(CODE_TO_LANGUAGE.values())), multiselect=True, max_choices=4, info="Select the languages in the image (if known) to improve OCR accuracy. Optional.")
|
|
@@ -218,13 +229,27 @@ with gr.Blocks(title="Surya") as demo:
|
|
| 218 |
|
| 219 |
# Run Text Detection
|
| 220 |
def text_det_img(pil_image):
|
| 221 |
-
det_img,
|
| 222 |
-
return det_img,
|
| 223 |
text_det_btn.click(
|
| 224 |
fn=text_det_img,
|
| 225 |
inputs=[in_img],
|
| 226 |
outputs=[result_img, result_json]
|
| 227 |
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 228 |
# Run layout
|
| 229 |
def layout_det_img(pil_image):
|
| 230 |
layout_img, pred = layout_detection(pil_image)
|
|
|
|
| 63 |
return label, results.labels
|
| 64 |
|
| 65 |
# just copy from streamlit_app.py
|
| 66 |
+
def inline_detection(img) -> (Image.Image, TextDetectionResult):
|
| 67 |
+
text_pred = predictors["detection"]([img])[0]
|
| 68 |
+
text_boxes = [p.bbox for p in text_pred.bboxes]
|
| 69 |
+
|
| 70 |
+
inline_pred = predictors["inline_detection"]([img], [text_boxes], include_maps=True)[0]
|
| 71 |
+
inline_polygons = [p.polygon for p in inline_pred.bboxes]
|
| 72 |
+
det_img = draw_polys_on_image(inline_polygons, img.copy(), color='blue')
|
| 73 |
+
return det_img, text_pred, inline_pred
|
| 74 |
+
|
| 75 |
+
# just copy from streamlit_app.py `name 'inline_pred' is not defined`
|
| 76 |
def text_detection(img) -> (Image.Image, TextDetectionResult):
|
| 77 |
+
text_pred = predictors["detection"]([img])[0]
|
| 78 |
+
text_polygons = [p.polygon for p in text_pred.bboxes]
|
| 79 |
+
det_img = draw_polys_on_image(text_polygons, img.copy())
|
| 80 |
+
return det_img, text_pred #, inline_pred
|
| 81 |
|
| 82 |
# just copy from streamlit_app.py
|
| 83 |
def layout_detection(img) -> (Image.Image, LayoutResult):
|
|
|
|
| 188 |
in_img = gr.Image(label="Select page of Image", type="pil", sources=None)
|
| 189 |
|
| 190 |
text_det_btn = gr.Button("Run Text Detection")
|
| 191 |
+
inline_det_btn = gr.Button("Run Inline Math Detection")
|
| 192 |
layout_det_btn = gr.Button("Run Layout Analysis")
|
| 193 |
|
| 194 |
lang_dd = gr.Dropdown(label="Languages", choices=sorted(list(CODE_TO_LANGUAGE.values())), multiselect=True, max_choices=4, info="Select the languages in the image (if known) to improve OCR accuracy. Optional.")
|
|
|
|
| 229 |
|
| 230 |
# Run Text Detection
|
| 231 |
def text_det_img(pil_image):
|
| 232 |
+
det_img, text_pred = text_detection(pil_image)
|
| 233 |
+
return det_img, text_pred.model_dump(exclude=["heatmap", "affinity_map"])
|
| 234 |
text_det_btn.click(
|
| 235 |
fn=text_det_img,
|
| 236 |
inputs=[in_img],
|
| 237 |
outputs=[result_img, result_json]
|
| 238 |
)
|
| 239 |
+
def inline_det_img(pil_image):
|
| 240 |
+
det_img, text_pred, inline_pred = inline_detection(pil_image)
|
| 241 |
+
json = {
|
| 242 |
+
"text": text_pred.model_dump(exclude=["heatmap", "affinity_map"]),
|
| 243 |
+
"inline": inline_pred.model_dump(exclude=["heatmap", "affinity_map"])
|
| 244 |
+
}
|
| 245 |
+
return det_img, json
|
| 246 |
+
inline_det_btn.click(
|
| 247 |
+
fn=inline_det_img,
|
| 248 |
+
inputs=[in_img],
|
| 249 |
+
outputs=[result_img, result_json]
|
| 250 |
+
)
|
| 251 |
+
|
| 252 |
+
|
| 253 |
# Run layout
|
| 254 |
def layout_det_img(pil_image):
|
| 255 |
layout_img, pred = layout_detection(pil_image)
|