| | import gradio as gr |
| | import PyPDF2 |
| | import re |
| |
|
| | def read_pdf(file): |
| | with open(file.name, 'rb') as f: |
| | reader = PyPDF2.PdfReader(f) |
| | paragraphs = [] |
| | for page in reader.pages: |
| | extracted_text = page.extract_text() |
| | formatted_text = re.sub(r'\n+', '\n', extracted_text) |
| | paragraphs.append(formatted_text.strip()) |
| | |
| | formatted_text = "\n\n".join(paragraphs) |
| | |
| | return formatted_text |
| |
|
| | iface = gr.Interface( |
| | read_pdf, |
| | gr.inputs.File(label="Upload a PDF file"), |
| | gr.outputs.Textbox(label="Extracted Text"), |
| | title="PDF Text Extractor", |
| | description="A smooth app that gets text from PDF files🧠", |
| | theme="ParityError/Anime" |
| | ) |
| | iface.launch() |
| |
|