#!/usr/bin/env python3 # -*- coding: utf-8 -*- """ Tiling OCR для больших чертежей. Разрезает PNG на перекрывающиеся кропы, прогоняет OCR на каждом, объединяет результаты с дедупликацией. Эффект: каждый кроп масштабирован "крупнее" для OCR — мелкий текст находится на бОльшем % площади кропа. """ import sys import json import re import tempfile from pathlib import Path from typing import List, Dict, Tuple from PIL import Image from rapidocr import RapidOCR def make_tiles(img: Image.Image, tile_size: int = 2000, overlap: int = 200) -> List[Tuple[int, int, Image.Image]]: """ Генерирует кропы с перекрытием. Возвращает: [(offset_x, offset_y, cropped_image), ...] """ w, h = img.size tiles = [] step = tile_size - overlap for y in range(0, h, step): for x in range(0, w, step): x2 = min(x + tile_size, w) y2 = min(y + tile_size, h) crop = img.crop((x, y, x2, y2)) tiles.append((x, y, crop)) return tiles def iou_bbox(a: List, b: List) -> float: """IoU двух bbox в формате [[x1,y1],[x2,y2],[x3,y3],[x4,y4]].""" def _get_rect(box): if isinstance(box[0], list): xs = [p[0] for p in box] ys = [p[1] for p in box] return min(xs), min(ys), max(xs), max(ys) else: return box[0], box[1], box[2], box[3] ax1, ay1, ax2, ay2 = _get_rect(a) bx1, by1, bx2, by2 = _get_rect(b) ix1 = max(ax1, bx1) iy1 = max(ay1, by1) ix2 = min(ax2, bx2) iy2 = min(ay2, by2) if ix2 <= ix1 or iy2 <= iy1: return 0.0 inter = (ix2 - ix1) * (iy2 - iy1) area_a = (ax2 - ax1) * (ay2 - ay1) area_b = (bx2 - bx1) * (by2 - by1) union = area_a + area_b - inter return inter / union if union > 0 else 0.0 def run_tiling_ocr(png_path: Path, tile_size: int = 2000, overlap: int = 200, conf_threshold: float = 0.5): """Основная функция.""" print(f"[INFO] Загрузка {png_path.name}...") img = Image.open(png_path) print(f"[INFO] Размер: {img.size}") tiles = make_tiles(img, tile_size, overlap) print(f"[INFO] Кропов: {len(tiles)}") engine = RapidOCR() all_results = [] for i, (off_x, off_y, crop) in enumerate(tiles, 1): # Временно сохранить кроп tmp_path = Path(tempfile.gettempdir()) / f"tile_{i:03d}.png" crop.save(tmp_path) print(f" [{i}/{len(tiles)}] tile @ ({off_x}, {off_y}) size {crop.size} ...", end=" ", flush=True) res = engine(str(tmp_path)) tile_lines = 0 if res and res.txts is not None: for txt, box, score in zip(res.txts, res.boxes, res.scores): if score < conf_threshold: continue # Сдвинуть bbox на offset кропа shifted_box = [] for pt in box: shifted_box.append([float(pt[0]) + off_x, float(pt[1]) + off_y]) all_results.append({ "text": txt, "confidence": float(score), "bbox": shifted_box }) tile_lines += 1 print(f"{tile_lines} lines") # Дедупликация: если два bbox пересекаются (IoU > 0.5) — оставляем тот, что с higher confidence print(f"[INFO] Дедупликация {len(all_results)} строк...") unique = [] for r in sorted(all_results, key=lambda x: -x["confidence"]): is_dup = False for u in unique: if iou_bbox(r["bbox"], u["bbox"]) > 0.5: is_dup = True break if not is_dup: unique.append(r) print(f"[OK] Уникальных строк: {len(unique)}") return unique def main(): if len(sys.argv) < 2: print("Usage: python tiling_ocr.py [tile_size] [overlap]") sys.exit(1) png_path = Path(sys.argv[1]) tile_size = int(sys.argv[2]) if len(sys.argv) > 2 else 2000 overlap = int(sys.argv[3]) if len(sys.argv) > 3 else 200 results = run_tiling_ocr(png_path, tile_size, overlap) # Сохранить результаты out_json = png_path.parent / f"{png_path.stem}_tiling_ocr.json" with open(out_json, "w", encoding="utf-8") as f: json.dump({ "source": str(png_path), "tile_size": tile_size, "overlap": overlap, "total_lines": len(results), "lines": results }, f, ensure_ascii=False, indent=2) print(f"[OK] Сохранено: {out_json}") # Вывести числа nums = [r for r in results if re.match(r'^\d+([,.]\d+)?$', r["text"].strip())] print(f"\nНайдено {len(nums)} чисел:") for n in sorted(nums, key=lambda x: x["bbox"][0][1]): bbox = n["bbox"] cx = sum(p[0] for p in bbox) / len(bbox) cy = sum(p[1] for p in bbox) / len(bbox) print(f" {n['text']:>10} x={cx:>8.0f} y={cy:>8.0f} conf={n['confidence']:.2f}") if __name__ == "__main__": main()