158 lines
5.3 KiB
Python
158 lines
5.3 KiB
Python
#!/usr/bin/env python3
|
||
# -*- coding: utf-8 -*-
|
||
"""
|
||
Tiling OCR для больших чертежей.
|
||
|
||
Разрезает PNG на перекрывающиеся кропы, прогоняет OCR на каждом,
|
||
объединяет результаты с дедупликацией.
|
||
|
||
Эффект: каждый кроп масштабирован "крупнее" для OCR — мелкий текст
|
||
находится на бОльшем % площади кропа.
|
||
"""
|
||
|
||
import sys
|
||
import json
|
||
import re
|
||
import tempfile
|
||
from pathlib import Path
|
||
from typing import List, Dict, Tuple
|
||
from PIL import Image
|
||
from rapidocr import RapidOCR
|
||
|
||
|
||
def make_tiles(img: Image.Image, tile_size: int = 2000, overlap: int = 200) -> List[Tuple[int, int, Image.Image]]:
|
||
"""
|
||
Генерирует кропы с перекрытием.
|
||
Возвращает: [(offset_x, offset_y, cropped_image), ...]
|
||
"""
|
||
w, h = img.size
|
||
tiles = []
|
||
step = tile_size - overlap
|
||
|
||
for y in range(0, h, step):
|
||
for x in range(0, w, step):
|
||
x2 = min(x + tile_size, w)
|
||
y2 = min(y + tile_size, h)
|
||
crop = img.crop((x, y, x2, y2))
|
||
tiles.append((x, y, crop))
|
||
|
||
return tiles
|
||
|
||
|
||
def iou_bbox(a: List, b: List) -> float:
|
||
"""IoU двух bbox в формате [[x1,y1],[x2,y2],[x3,y3],[x4,y4]]."""
|
||
def _get_rect(box):
|
||
if isinstance(box[0], list):
|
||
xs = [p[0] for p in box]
|
||
ys = [p[1] for p in box]
|
||
return min(xs), min(ys), max(xs), max(ys)
|
||
else:
|
||
return box[0], box[1], box[2], box[3]
|
||
|
||
ax1, ay1, ax2, ay2 = _get_rect(a)
|
||
bx1, by1, bx2, by2 = _get_rect(b)
|
||
|
||
ix1 = max(ax1, bx1)
|
||
iy1 = max(ay1, by1)
|
||
ix2 = min(ax2, bx2)
|
||
iy2 = min(ay2, by2)
|
||
|
||
if ix2 <= ix1 or iy2 <= iy1:
|
||
return 0.0
|
||
|
||
inter = (ix2 - ix1) * (iy2 - iy1)
|
||
area_a = (ax2 - ax1) * (ay2 - ay1)
|
||
area_b = (bx2 - bx1) * (by2 - by1)
|
||
union = area_a + area_b - inter
|
||
return inter / union if union > 0 else 0.0
|
||
|
||
|
||
def run_tiling_ocr(png_path: Path, tile_size: int = 2000, overlap: int = 200, conf_threshold: float = 0.5):
|
||
"""Основная функция."""
|
||
print(f"[INFO] Загрузка {png_path.name}...")
|
||
img = Image.open(png_path)
|
||
print(f"[INFO] Размер: {img.size}")
|
||
|
||
tiles = make_tiles(img, tile_size, overlap)
|
||
print(f"[INFO] Кропов: {len(tiles)}")
|
||
|
||
engine = RapidOCR()
|
||
all_results = []
|
||
|
||
for i, (off_x, off_y, crop) in enumerate(tiles, 1):
|
||
# Временно сохранить кроп
|
||
tmp_path = Path(tempfile.gettempdir()) / f"tile_{i:03d}.png"
|
||
crop.save(tmp_path)
|
||
|
||
print(f" [{i}/{len(tiles)}] tile @ ({off_x}, {off_y}) size {crop.size} ...", end=" ", flush=True)
|
||
res = engine(str(tmp_path))
|
||
|
||
tile_lines = 0
|
||
if res and res.txts is not None:
|
||
for txt, box, score in zip(res.txts, res.boxes, res.scores):
|
||
if score < conf_threshold:
|
||
continue
|
||
# Сдвинуть bbox на offset кропа
|
||
shifted_box = []
|
||
for pt in box:
|
||
shifted_box.append([float(pt[0]) + off_x, float(pt[1]) + off_y])
|
||
all_results.append({
|
||
"text": txt,
|
||
"confidence": float(score),
|
||
"bbox": shifted_box
|
||
})
|
||
tile_lines += 1
|
||
print(f"{tile_lines} lines")
|
||
|
||
# Дедупликация: если два bbox пересекаются (IoU > 0.5) — оставляем тот, что с higher confidence
|
||
print(f"[INFO] Дедупликация {len(all_results)} строк...")
|
||
unique = []
|
||
for r in sorted(all_results, key=lambda x: -x["confidence"]):
|
||
is_dup = False
|
||
for u in unique:
|
||
if iou_bbox(r["bbox"], u["bbox"]) > 0.5:
|
||
is_dup = True
|
||
break
|
||
if not is_dup:
|
||
unique.append(r)
|
||
|
||
print(f"[OK] Уникальных строк: {len(unique)}")
|
||
return unique
|
||
|
||
|
||
def main():
|
||
if len(sys.argv) < 2:
|
||
print("Usage: python tiling_ocr.py <png> [tile_size] [overlap]")
|
||
sys.exit(1)
|
||
|
||
png_path = Path(sys.argv[1])
|
||
tile_size = int(sys.argv[2]) if len(sys.argv) > 2 else 2000
|
||
overlap = int(sys.argv[3]) if len(sys.argv) > 3 else 200
|
||
|
||
results = run_tiling_ocr(png_path, tile_size, overlap)
|
||
|
||
# Сохранить результаты
|
||
out_json = png_path.parent / f"{png_path.stem}_tiling_ocr.json"
|
||
with open(out_json, "w", encoding="utf-8") as f:
|
||
json.dump({
|
||
"source": str(png_path),
|
||
"tile_size": tile_size,
|
||
"overlap": overlap,
|
||
"total_lines": len(results),
|
||
"lines": results
|
||
}, f, ensure_ascii=False, indent=2)
|
||
print(f"[OK] Сохранено: {out_json}")
|
||
|
||
# Вывести числа
|
||
nums = [r for r in results if re.match(r'^\d+([,.]\d+)?$', r["text"].strip())]
|
||
print(f"\nНайдено {len(nums)} чисел:")
|
||
for n in sorted(nums, key=lambda x: x["bbox"][0][1]):
|
||
bbox = n["bbox"]
|
||
cx = sum(p[0] for p in bbox) / len(bbox)
|
||
cy = sum(p[1] for p in bbox) / len(bbox)
|
||
print(f" {n['text']:>10} x={cx:>8.0f} y={cy:>8.0f} conf={n['confidence']:.2f}")
|
||
|
||
|
||
if __name__ == "__main__":
|
||
main()
|