#!/usr/bin/env python3 # -*- coding: utf-8 -*- """ Tiling OCR для больших чертежей. Разрезает PNG на перекрывающиеся кропы, прогоняет OCR на каждом, объединяет результаты с дедупликацией. Эффект: каждый кроп масштабирован "крупнее" для OCR — мелкий текст находится на бОльшем % площади кропа. """ import sys import json import re from pathlib import Path from typing import List, Dict, Tuple from PIL import Image from rapidocr import RapidOCR def make_tiles(img: Image.Image, tile_size: int = 2000, overlap: int = 200) -> List[Tuple[int, int, Image.Image]]: """ Генерирует кропы с перекрытием. Возвращает: [(offset_x, offset_y, cropped_image), ...] """ w, h = img.size tiles = [] step = tile_size - overlap for y in range(0, h, step): for x in range(0, w, step): x2 = min(x + tile_size, w) y2 = min(y + tile_size, h) crop = img.crop((x, y, x2, y2)) tiles.append((x, y, crop)) return tiles def iou_bbox(a: List, b: List) -> float: """IoU двух bbox в формате [[x1,y1],[x2,y2],[x3,y3],[x4,y4]].""" def _get_rect(box): if isinstance(box[0], list): xs = [p[0] for p in box] ys = [p[1] for p in box] return min(xs), min(ys), max(xs), max(ys) else: return box[0], box[1], box[2], box[3] ax1, ay1, ax2, ay2 = _get_rect(a) bx1, by1, bx2, by2 = _get_rect(b) ix1 = max(ax1, bx1) iy1 = max(ay1, by1) ix2 = min(ax2, bx2) iy2 = min(ay2, by2) if ix2 <= ix1 or iy2 <= iy1: return 0.0 inter = (ix2 - ix1) * (iy2 - iy1) area_a = (ax2 - ax1) * (ay2 - ay1) area_b = (bx2 - bx1) * (by2 - by1) union = area_a + area_b - inter return inter / union if union > 0 else 0.0 def run_tiling_ocr(png_path: Path, tile_size: int = 2000, overlap: int = 200, conf_threshold: float = 0.5): """Основная функция.""" print(f"[INFO] Загрузка {png_path.name}...") img = Image.open(png_path) print(f"[INFO] Размер: {img.size}") tiles = make_tiles(img, tile_size, overlap) print(f"[INFO] Кропов: {len(tiles)}") engine = RapidOCR() all_results = [] for i, (off_x, off_y, crop) in enumerate(tiles, 1): # Временно сохранить кроп tmp_path = f"/tmp/tile_{i:03d}.png" crop.save(tmp_path) print(f" [{i}/{len(tiles)}] tile @ ({off_x}, {off_y}) size {crop.size} ...", end=" ", flush=True) res = engine(tmp_path) tile_lines = 0 if res and res[0]: for item in res[0]: box, txt, score = item if score < conf_threshold: continue # Сдвинуть bbox на offset кропа shifted_box = [] for pt in box: shifted_box.append([pt[0] + off_x, pt[1] + off_y]) all_results.append({ "text": txt, "confidence": float(score), "bbox": shifted_box }) tile_lines += 1 print(f"{tile_lines} lines") # Дедупликация: если два bbox пересекаются (IoU > 0.5) — оставляем тот, что с higher confidence print(f"[INFO] Дедупликация {len(all_results)} строк...") unique = [] for r in sorted(all_results, key=lambda x: -x["confidence"]): is_dup = False for u in unique: if iou_bbox(r["bbox"], u["bbox"]) > 0.5: is_dup = True break if not is_dup: unique.append(r) print(f"[OK] Уникальных строк: {len(unique)}") return unique def main(): if len(sys.argv) < 2: print("Usage: python tiling_ocr.py [tile_size] [overlap]") sys.exit(1) png_path = Path(sys.argv[1]) tile_size = int(sys.argv[2]) if len(sys.argv) > 2 else 2000 overlap = int(sys.argv[3]) if len(sys.argv) > 3 else 200 results = run_tiling_ocr(png_path, tile_size, overlap) # Сохранить результаты out_json = png_path.parent / f"{png_path.stem}_tiling_ocr.json" with open(out_json, "w", encoding="utf-8") as f: json.dump({ "source": str(png_path), "tile_size": tile_size, "overlap": overlap, "total_lines": len(results), "lines": results }, f, ensure_ascii=False, indent=2) print(f"[OK] Сохранено: {out_json}") # Вывести числа nums = [r for r in results if re.match(r'^\d+([,.]\d+)?$', r["text"].strip())] print(f"\nНайдено {len(nums)} чисел:") for n in sorted(nums, key=lambda x: x["bbox"][0][1]): bbox = n["bbox"] cx = sum(p[0] for p in bbox) / len(bbox) cy = sum(p[1] for p in bbox) / len(bbox) print(f" {n['text']:>10} x={cx:>8.0f} y={cy:>8.0f} conf={n['confidence']:.2f}") if __name__ == "__main__": main()