docintel-ai-extractor / pdf_loader.py
hmnshudhmn24's picture
Upload 14 files
1108401 verified
"""PDF text extraction using PyMuPDF (fitz) for embedded text layers."""
import fitz
def extract_text_from_pdf(pdf_path):
doc = fitz.open(pdf_path)
texts = []
for page in doc:
txt = page.get_text('text') or ''
texts.append(txt)
return '\n\n'.join(texts)