File size: 287 Bytes
1108401 |
1 2 3 4 5 6 7 8 9 10 11 |
"""PDF text extraction using PyMuPDF (fitz) for embedded text layers."""
import fitz
def extract_text_from_pdf(pdf_path):
doc = fitz.open(pdf_path)
texts = []
for page in doc:
txt = page.get_text('text') or ''
texts.append(txt)
return '\n\n'.join(texts)
|