158 lines
5.3 KiB
Python
158 lines
5.3 KiB
Python
|
|
#!/usr/bin/env python3
|
|||
|
|
# -*- coding: utf-8 -*-
|
|||
|
|
"""
|
|||
|
|
Tiling OCR для больших чертежей.
|
|||
|
|
|
|||
|
|
Разрезает PNG на перекрывающиеся кропы, прогоняет OCR на каждом,
|
|||
|
|
объединяет результаты с дедупликацией.
|
|||
|
|
|
|||
|
|
Эффект: каждый кроп масштабирован "крупнее" для OCR — мелкий текст
|
|||
|
|
находится на бОльшем % площади кропа.
|
|||
|
|
"""
|
|||
|
|
|
|||
|
|
import sys
|
|||
|
|
import json
|
|||
|
|
import re
|
|||
|
|
from pathlib import Path
|
|||
|
|
from typing import List, Dict, Tuple
|
|||
|
|
from PIL import Image
|
|||
|
|
from rapidocr_onnxruntime import RapidOCR
|
|||
|
|
|
|||
|
|
|
|||
|
|
def make_tiles(img: Image.Image, tile_size: int = 2000, overlap: int = 200) -> List[Tuple[int, int, Image.Image]]:
|
|||
|
|
"""
|
|||
|
|
Генерирует кропы с перекрытием.
|
|||
|
|
Возвращает: [(offset_x, offset_y, cropped_image), ...]
|
|||
|
|
"""
|
|||
|
|
w, h = img.size
|
|||
|
|
tiles = []
|
|||
|
|
step = tile_size - overlap
|
|||
|
|
|
|||
|
|
for y in range(0, h, step):
|
|||
|
|
for x in range(0, w, step):
|
|||
|
|
x2 = min(x + tile_size, w)
|
|||
|
|
y2 = min(y + tile_size, h)
|
|||
|
|
crop = img.crop((x, y, x2, y2))
|
|||
|
|
tiles.append((x, y, crop))
|
|||
|
|
|
|||
|
|
return tiles
|
|||
|
|
|
|||
|
|
|
|||
|
|
def iou_bbox(a: List, b: List) -> float:
|
|||
|
|
"""IoU двух bbox в формате [[x1,y1],[x2,y2],[x3,y3],[x4,y4]]."""
|
|||
|
|
def _get_rect(box):
|
|||
|
|
if isinstance(box[0], list):
|
|||
|
|
xs = [p[0] for p in box]
|
|||
|
|
ys = [p[1] for p in box]
|
|||
|
|
return min(xs), min(ys), max(xs), max(ys)
|
|||
|
|
else:
|
|||
|
|
return box[0], box[1], box[2], box[3]
|
|||
|
|
|
|||
|
|
ax1, ay1, ax2, ay2 = _get_rect(a)
|
|||
|
|
bx1, by1, bx2, by2 = _get_rect(b)
|
|||
|
|
|
|||
|
|
ix1 = max(ax1, bx1)
|
|||
|
|
iy1 = max(ay1, by1)
|
|||
|
|
ix2 = min(ax2, bx2)
|
|||
|
|
iy2 = min(ay2, by2)
|
|||
|
|
|
|||
|
|
if ix2 <= ix1 or iy2 <= iy1:
|
|||
|
|
return 0.0
|
|||
|
|
|
|||
|
|
inter = (ix2 - ix1) * (iy2 - iy1)
|
|||
|
|
area_a = (ax2 - ax1) * (ay2 - ay1)
|
|||
|
|
area_b = (bx2 - bx1) * (by2 - by1)
|
|||
|
|
union = area_a + area_b - inter
|
|||
|
|
return inter / union if union > 0 else 0.0
|
|||
|
|
|
|||
|
|
|
|||
|
|
def run_tiling_ocr(png_path: Path, tile_size: int = 2000, overlap: int = 200, conf_threshold: float = 0.5):
|
|||
|
|
"""Основная функция."""
|
|||
|
|
print(f"[INFO] Загрузка {png_path.name}...")
|
|||
|
|
img = Image.open(png_path)
|
|||
|
|
print(f"[INFO] Размер: {img.size}")
|
|||
|
|
|
|||
|
|
tiles = make_tiles(img, tile_size, overlap)
|
|||
|
|
print(f"[INFO] Кропов: {len(tiles)}")
|
|||
|
|
|
|||
|
|
engine = RapidOCR()
|
|||
|
|
all_results = []
|
|||
|
|
|
|||
|
|
for i, (off_x, off_y, crop) in enumerate(tiles, 1):
|
|||
|
|
# Временно сохранить кроп
|
|||
|
|
tmp_path = f"/tmp/tile_{i:03d}.png"
|
|||
|
|
crop.save(tmp_path)
|
|||
|
|
|
|||
|
|
print(f" [{i}/{len(tiles)}] tile @ ({off_x}, {off_y}) size {crop.size} ...", end=" ", flush=True)
|
|||
|
|
res = engine(tmp_path)
|
|||
|
|
|
|||
|
|
tile_lines = 0
|
|||
|
|
if res and res[0]:
|
|||
|
|
for item in res[0]:
|
|||
|
|
box, txt, score = item
|
|||
|
|
if score < conf_threshold:
|
|||
|
|
continue
|
|||
|
|
# Сдвинуть bbox на offset кропа
|
|||
|
|
shifted_box = []
|
|||
|
|
for pt in box:
|
|||
|
|
shifted_box.append([pt[0] + off_x, pt[1] + off_y])
|
|||
|
|
all_results.append({
|
|||
|
|
"text": txt,
|
|||
|
|
"confidence": float(score),
|
|||
|
|
"bbox": shifted_box
|
|||
|
|
})
|
|||
|
|
tile_lines += 1
|
|||
|
|
print(f"{tile_lines} lines")
|
|||
|
|
|
|||
|
|
# Дедупликация: если два bbox пересекаются (IoU > 0.5) — оставляем тот, что с higher confidence
|
|||
|
|
print(f"[INFO] Дедупликация {len(all_results)} строк...")
|
|||
|
|
unique = []
|
|||
|
|
for r in sorted(all_results, key=lambda x: -x["confidence"]):
|
|||
|
|
is_dup = False
|
|||
|
|
for u in unique:
|
|||
|
|
if iou_bbox(r["bbox"], u["bbox"]) > 0.5:
|
|||
|
|
is_dup = True
|
|||
|
|
break
|
|||
|
|
if not is_dup:
|
|||
|
|
unique.append(r)
|
|||
|
|
|
|||
|
|
print(f"[OK] Уникальных строк: {len(unique)}")
|
|||
|
|
return unique
|
|||
|
|
|
|||
|
|
|
|||
|
|
def main():
|
|||
|
|
if len(sys.argv) < 2:
|
|||
|
|
print("Usage: python tiling_ocr.py <png> [tile_size] [overlap]")
|
|||
|
|
sys.exit(1)
|
|||
|
|
|
|||
|
|
png_path = Path(sys.argv[1])
|
|||
|
|
tile_size = int(sys.argv[2]) if len(sys.argv) > 2 else 2000
|
|||
|
|
overlap = int(sys.argv[3]) if len(sys.argv) > 3 else 200
|
|||
|
|
|
|||
|
|
results = run_tiling_ocr(png_path, tile_size, overlap)
|
|||
|
|
|
|||
|
|
# Сохранить результаты
|
|||
|
|
out_json = png_path.parent / f"{png_path.stem}_tiling_ocr.json"
|
|||
|
|
with open(out_json, "w", encoding="utf-8") as f:
|
|||
|
|
json.dump({
|
|||
|
|
"source": str(png_path),
|
|||
|
|
"tile_size": tile_size,
|
|||
|
|
"overlap": overlap,
|
|||
|
|
"total_lines": len(results),
|
|||
|
|
"lines": results
|
|||
|
|
}, f, ensure_ascii=False, indent=2)
|
|||
|
|
print(f"[OK] Сохранено: {out_json}")
|
|||
|
|
|
|||
|
|
# Вывести числа
|
|||
|
|
nums = [r for r in results if re.match(r'^\d+([,.]\d+)?$', r["text"].strip())]
|
|||
|
|
print(f"\nНайдено {len(nums)} чисел:")
|
|||
|
|
for n in sorted(nums, key=lambda x: x["bbox"][0][1]):
|
|||
|
|
bbox = n["bbox"]
|
|||
|
|
cx = sum(p[0] for p in bbox) / len(bbox)
|
|||
|
|
cy = sum(p[1] for p in bbox) / len(bbox)
|
|||
|
|
print(f" {n['text']:>10} x={cx:>8.0f} y={cy:>8.0f} conf={n['confidence']:.2f}")
|
|||
|
|
|
|||
|
|
|
|||
|
|
if __name__ == "__main__":
|
|||
|
|
main()
|