| """ |
| PaddleOCR Engine |
| |
| High-accuracy OCR using PaddleOCR. |
| Supports detection, recognition, and angle classification. |
| """ |
|
|
| import time |
| from typing import List, Optional, Tuple |
| import numpy as np |
| from loguru import logger |
|
|
| from .base import OCREngine, OCRConfig, OCRResult |
| from ..schemas.core import BoundingBox, OCRRegion |
|
|
| |
| try: |
| from paddleocr import PaddleOCR |
| HAS_PADDLEOCR = True |
| except ImportError: |
| HAS_PADDLEOCR = False |
| logger.warning( |
| "PaddleOCR not installed. Install with: " |
| "pip install paddleocr paddlepaddle-gpu (or paddlepaddle for CPU)" |
| ) |
|
|
|
|
| class PaddleOCREngine(OCREngine): |
| """ |
| OCR engine using PaddleOCR. |
| |
| Features: |
| - High accuracy text detection and recognition |
| - Multi-language support |
| - GPU acceleration |
| - Angle classification for rotated text |
| """ |
|
|
| |
| LANGUAGE_MAP = { |
| "en": "en", |
| "ch": "ch", |
| "chinese_cht": "chinese_cht", |
| "fr": "french", |
| "german": "german", |
| "es": "es", |
| "it": "it", |
| "pt": "pt", |
| "ru": "ru", |
| "japan": "japan", |
| "korean": "korean", |
| "ar": "ar", |
| "hi": "hi", |
| "latin": "latin", |
| } |
|
|
| def __init__(self, config: Optional[OCRConfig] = None): |
| """Initialize PaddleOCR engine.""" |
| super().__init__(config) |
| self._ocr: Optional[PaddleOCR] = None |
|
|
| def initialize(self): |
| """Initialize PaddleOCR model.""" |
| if not HAS_PADDLEOCR: |
| raise RuntimeError( |
| "PaddleOCR not installed. Install with: " |
| "pip install paddleocr paddlepaddle-gpu" |
| ) |
|
|
| if self._initialized: |
| return |
|
|
| logger.info("Initializing PaddleOCR engine...") |
|
|
| |
| lang = self.config.languages[0] if self.config.languages else "en" |
| paddle_lang = self.LANGUAGE_MAP.get(lang, "en") |
|
|
| try: |
| self._ocr = PaddleOCR( |
| use_angle_cls=self.config.use_angle_cls, |
| lang=paddle_lang, |
| use_gpu=self.config.use_gpu, |
| gpu_mem=500, |
| det_db_thresh=self.config.det_db_thresh, |
| det_db_box_thresh=self.config.det_db_box_thresh, |
| rec_batch_num=self.config.rec_batch_num, |
| drop_score=self.config.drop_score, |
| show_log=False, |
| ) |
| self._initialized = True |
| logger.info(f"PaddleOCR initialized (lang={paddle_lang}, gpu={self.config.use_gpu})") |
|
|
| except Exception as e: |
| logger.error(f"Failed to initialize PaddleOCR: {e}") |
| raise |
|
|
| def recognize( |
| self, |
| image: np.ndarray, |
| page_number: int = 0, |
| ) -> OCRResult: |
| """ |
| Perform OCR on an image using PaddleOCR. |
| |
| Args: |
| image: Image as numpy array (RGB, HWC format) |
| page_number: Page number for multi-page documents |
| |
| Returns: |
| OCRResult with recognized text and regions |
| """ |
| if not self._initialized: |
| self.initialize() |
|
|
| start_time = time.time() |
|
|
| try: |
| |
| results = self._ocr.ocr(image, cls=self.config.use_angle_cls) |
|
|
| |
| regions = [] |
| all_texts = [] |
| total_confidence = 0.0 |
|
|
| |
| if results and results[0]: |
| for idx, line in enumerate(results[0]): |
| if line is None: |
| continue |
|
|
| box_points = line[0] |
| text, confidence = line[1] |
|
|
| |
| if confidence < self.config.min_confidence: |
| continue |
|
|
| |
| bbox = self._polygon_to_bbox(box_points, image.shape[:2]) |
|
|
| |
| polygon = [(float(p[0]), float(p[1])) for p in box_points] |
|
|
| region = OCRRegion( |
| text=text, |
| confidence=float(confidence), |
| bbox=bbox, |
| polygon=polygon, |
| page=page_number, |
| line_id=idx, |
| engine="paddleocr", |
| ) |
| regions.append(region) |
| all_texts.append(text) |
| total_confidence += confidence |
|
|
| processing_time = (time.time() - start_time) * 1000 |
|
|
| return OCRResult( |
| regions=regions, |
| full_text="\n".join(all_texts), |
| confidence_avg=total_confidence / len(regions) if regions else 0.0, |
| processing_time_ms=processing_time, |
| engine="paddleocr", |
| success=True, |
| ) |
|
|
| except Exception as e: |
| logger.error(f"PaddleOCR recognition failed: {e}") |
| return OCRResult( |
| regions=[], |
| full_text="", |
| confidence_avg=0.0, |
| processing_time_ms=(time.time() - start_time) * 1000, |
| engine="paddleocr", |
| success=False, |
| error=str(e), |
| ) |
|
|
| def _polygon_to_bbox( |
| self, |
| points: List[List[float]], |
| image_shape: Tuple[int, int], |
| ) -> BoundingBox: |
| """Convert polygon points to bounding box.""" |
| x_coords = [p[0] for p in points] |
| y_coords = [p[1] for p in points] |
|
|
| height, width = image_shape |
|
|
| return BoundingBox( |
| x_min=max(0, min(x_coords)), |
| y_min=max(0, min(y_coords)), |
| x_max=min(width, max(x_coords)), |
| y_max=min(height, max(y_coords)), |
| normalized=False, |
| page_width=width, |
| page_height=height, |
| ) |
|
|
| def get_supported_languages(self) -> List[str]: |
| """Return list of supported language codes.""" |
| return list(self.LANGUAGE_MAP.keys()) |
|
|
| def recognize_with_structure( |
| self, |
| image: np.ndarray, |
| page_number: int = 0, |
| ) -> Tuple[OCRResult, Optional[dict]]: |
| """ |
| Perform OCR with structure analysis (tables, layout). |
| |
| Args: |
| image: Image as numpy array |
| page_number: Page number |
| |
| Returns: |
| Tuple of (OCRResult, structure_info) |
| """ |
| |
| ocr_result = self.recognize(image, page_number) |
|
|
| |
| |
| structure_info = None |
|
|
| return ocr_result, structure_info |
|
|