- tiling_ocr.py: split large drawings into overlapping tiles for better small-text recognition - preprocess_for_ocr.py: CLAHE + unsharp mask for enhancing blueprint contrast - visualize_dimensions.py: draw bounding boxes around detected dimension numbers - compare_ocr.py: side-by-side visualization of normal vs tiling OCR results - dimension_extractor.py: line-based dimension detection with pixel verification - ocr_qwen.py: Alibaba Cloud qwen-vl-ocr client with resize and regex fallback parser - test_qwen_ocr.py: standalone test for qwen OCR - process_any_pdf.py: add --use-tiling flag to switch between normal and tiling OCR
52 lines
1.6 KiB
Python
52 lines
1.6 KiB
Python
#!/usr/bin/env python3
|
|
# -*- coding: utf-8 -*-
|
|
"""
|
|
Предобработка PNG для улучшения OCR размерных чисел.
|
|
|
|
Алгоритм:
|
|
1. CLAHE — локальное повышение контраста
|
|
2. Unsharp mask — повышение резкости
|
|
3. Инвертирование (опционально для некоторых OCR)
|
|
4. Масштабирование x2 (если исходное маленькое)
|
|
"""
|
|
|
|
import sys
|
|
from pathlib import Path
|
|
import cv2
|
|
import numpy as np
|
|
|
|
|
|
def preprocess_for_ocr(img_path: Path, out_path: Path, scale: float = 2.0):
|
|
img = cv2.imread(str(img_path), cv2.IMREAD_GRAYSCALE)
|
|
if img is None:
|
|
raise RuntimeError(f"Cannot load {img_path}")
|
|
|
|
# Масштабирование
|
|
if scale != 1.0:
|
|
h, w = img.shape
|
|
img = cv2.resize(img, (int(w*scale), int(h*scale)), interpolation=cv2.INTER_CUBIC)
|
|
|
|
# CLAHE (локальный контраст)
|
|
clahe = cv2.createCLAHE(clipLimit=2.0, tileGridSize=(8,8))
|
|
img = clahe.apply(img)
|
|
|
|
# Unsharp mask
|
|
gaussian = cv2.GaussianBlur(img, (0,0), 3)
|
|
img = cv2.addWeighted(img, 1.5, gaussian, -0.5, 0)
|
|
|
|
# Нормализация
|
|
img = cv2.normalize(img, None, 0, 255, cv2.NORM_MINMAX)
|
|
|
|
cv2.imwrite(str(out_path), img)
|
|
print(f"[OK] Предобработка сохранена: {out_path}")
|
|
|
|
|
|
if __name__ == "__main__":
|
|
if len(sys.argv) < 2:
|
|
print("Usage: python preprocess_for_ocr.py <png>")
|
|
sys.exit(1)
|
|
|
|
png = Path(sys.argv[1])
|
|
out = png.parent / f"{png.stem}_preproc.png"
|
|
preprocess_for_ocr(png, out)
|