161 lines
5.1 KiB
Python
161 lines
5.1 KiB
Python
|
|
#!/usr/bin/env python3
|
|||
|
|
# -*- coding: utf-8 -*-
|
|||
|
|
"""
|
|||
|
|
Визуализация сравнения: обычный OCR vs tiling OCR.
|
|||
|
|
Рисует bbox зелёным (только обычный), красным (только tiling), жёлтым (оба).
|
|||
|
|
"""
|
|||
|
|
|
|||
|
|
import sys
|
|||
|
|
import json
|
|||
|
|
import re
|
|||
|
|
from pathlib import Path
|
|||
|
|
from PIL import Image, ImageDraw
|
|||
|
|
|
|||
|
|
|
|||
|
|
def load_ocr(path: Path):
|
|||
|
|
"""Загружает OCR lines из JSON."""
|
|||
|
|
data = json.loads(path.read_text(encoding="utf-8"))
|
|||
|
|
if "lines" in data:
|
|||
|
|
return data["lines"]
|
|||
|
|
if "pages" in data:
|
|||
|
|
lines = []
|
|||
|
|
for page in data["pages"]:
|
|||
|
|
lines.extend(page.get("ocr_lines", []))
|
|||
|
|
return lines
|
|||
|
|
return []
|
|||
|
|
|
|||
|
|
|
|||
|
|
def bbox_center(box):
|
|||
|
|
if isinstance(box[0], list):
|
|||
|
|
xs = [p[0] for p in box]
|
|||
|
|
ys = [p[1] for p in box]
|
|||
|
|
else:
|
|||
|
|
xs = [box[0], box[2]]
|
|||
|
|
ys = [box[1], box[3]]
|
|||
|
|
return sum(xs)/len(xs), sum(ys)/len(ys)
|
|||
|
|
|
|||
|
|
|
|||
|
|
def bbox_rect(box):
|
|||
|
|
if isinstance(box[0], list):
|
|||
|
|
xs = [p[0] for p in box]
|
|||
|
|
ys = [p[1] for p in box]
|
|||
|
|
else:
|
|||
|
|
xs = [box[0], box[2]]
|
|||
|
|
ys = [box[1], box[3]]
|
|||
|
|
return min(xs), min(ys), max(xs), max(ys)
|
|||
|
|
|
|||
|
|
|
|||
|
|
def find_matches(text: str, list_b, iou_thresh=0.3):
|
|||
|
|
"""Находит ближайший совпадающий bbox в list_b по IoU и тексту."""
|
|||
|
|
matches = []
|
|||
|
|
for b in list_b:
|
|||
|
|
if b["text"].strip() != text.strip():
|
|||
|
|
continue
|
|||
|
|
# IoU
|
|||
|
|
ax1, ay1, ax2, ay2 = bbox_rect(a["bbox"] if 'a' in dir() else None)
|
|||
|
|
# ... (упрощённо: сравниваем по центру)
|
|||
|
|
return matches
|
|||
|
|
|
|||
|
|
|
|||
|
|
def visualize_comparison(png_path: Path, normal_ocr_path: Path, tiling_ocr_path: Path, out_path: Path):
|
|||
|
|
"""Рисует сравнение."""
|
|||
|
|
img = Image.open(png_path)
|
|||
|
|
draw = ImageDraw.Draw(img)
|
|||
|
|
|
|||
|
|
normal = load_ocr(normal_ocr_path)
|
|||
|
|
tiling = load_ocr(tiling_ocr_path)
|
|||
|
|
|
|||
|
|
# Индексы для быстрого поиска
|
|||
|
|
normal_by_text = {}
|
|||
|
|
for n in normal:
|
|||
|
|
txt = n["text"].strip()
|
|||
|
|
if re.match(r'^\d+([,.]\d+)?$', txt):
|
|||
|
|
normal_by_text.setdefault(txt, []).append(n)
|
|||
|
|
|
|||
|
|
tiling_by_text = {}
|
|||
|
|
for t in tiling:
|
|||
|
|
txt = t["text"].strip()
|
|||
|
|
if re.match(r'^\d+([,.]\d+)?$', txt):
|
|||
|
|
tiling_by_text.setdefault(txt, []).append(t)
|
|||
|
|
|
|||
|
|
# Классификация
|
|||
|
|
only_normal = [] # зелёный
|
|||
|
|
only_tiling = [] # красный
|
|||
|
|
both = [] # жёлтый
|
|||
|
|
|
|||
|
|
all_texts = set(normal_by_text.keys()) | set(tiling_by_text.keys())
|
|||
|
|
|
|||
|
|
for txt in all_texts:
|
|||
|
|
n_list = normal_by_text.get(txt, [])
|
|||
|
|
t_list = tiling_by_text.get(txt, [])
|
|||
|
|
|
|||
|
|
# Сопоставляем по минимальному расстоянию центров
|
|||
|
|
used_t = set()
|
|||
|
|
for n in n_list:
|
|||
|
|
cx_n, cy_n = bbox_center(n["bbox"])
|
|||
|
|
best = None
|
|||
|
|
best_dist = float('inf')
|
|||
|
|
for i, t in enumerate(t_list):
|
|||
|
|
if i in used_t:
|
|||
|
|
continue
|
|||
|
|
cx_t, cy_t = bbox_center(t["bbox"])
|
|||
|
|
d = ((cx_n - cx_t)**2 + (cy_n - cy_t)**2)**0.5
|
|||
|
|
if d < best_dist:
|
|||
|
|
best_dist = d
|
|||
|
|
best = i
|
|||
|
|
|
|||
|
|
if best is not None and best_dist < 100: # совпадение
|
|||
|
|
both.append((n, t_list[best]))
|
|||
|
|
used_t.add(best)
|
|||
|
|
else:
|
|||
|
|
only_normal.append(n)
|
|||
|
|
|
|||
|
|
for i, t in enumerate(t_list):
|
|||
|
|
if i not in used_t:
|
|||
|
|
only_tiling.append(t)
|
|||
|
|
|
|||
|
|
# Рисуем
|
|||
|
|
for item in only_normal:
|
|||
|
|
x1, y1, x2, y2 = bbox_rect(item["bbox"])
|
|||
|
|
draw.rectangle([x1, y1, x2, y2], outline="green", width=3)
|
|||
|
|
|
|||
|
|
for item in only_tiling:
|
|||
|
|
x1, y1, x2, y2 = bbox_rect(item["bbox"])
|
|||
|
|
draw.rectangle([x1, y1, x2, y2], outline="red", width=3)
|
|||
|
|
cx, cy = bbox_center(item["bbox"])
|
|||
|
|
draw.text((cx, cy-15), item["text"], fill="red")
|
|||
|
|
|
|||
|
|
for n, t in both:
|
|||
|
|
# Используем bbox из tiling (крупнее)
|
|||
|
|
x1, y1, x2, y2 = bbox_rect(t["bbox"])
|
|||
|
|
draw.rectangle([x1, y1, x2, y2], outline="yellow", width=2)
|
|||
|
|
|
|||
|
|
img.save(out_path)
|
|||
|
|
print(f"[OK] Сохранено: {out_path}")
|
|||
|
|
print(f" Только обычный (зелёный): {len(only_normal)}")
|
|||
|
|
print(f" Только tiling (красный): {len(only_tiling)}")
|
|||
|
|
print(f" Оба (жёлтый): {len(both)}")
|
|||
|
|
|
|||
|
|
# Вывод новых чисел
|
|||
|
|
print(f"\nНовые числа от tiling OCR:")
|
|||
|
|
for item in sorted(only_tiling, key=lambda x: x["bbox"][0][1]):
|
|||
|
|
cx, cy = bbox_center(item["bbox"])
|
|||
|
|
print(f" {item['text']:>10} x={cx:>8.0f} y={cy:>8.0f}")
|
|||
|
|
|
|||
|
|
|
|||
|
|
def main():
|
|||
|
|
if len(sys.argv) < 4:
|
|||
|
|
print("Usage: python compare_ocr.py <png> <normal_ocr.json> <tiling_ocr.json>")
|
|||
|
|
sys.exit(1)
|
|||
|
|
|
|||
|
|
png = Path(sys.argv[1])
|
|||
|
|
normal = Path(sys.argv[2])
|
|||
|
|
tiling = Path(sys.argv[3])
|
|||
|
|
out = png.parent / f"{png.stem}_ocr_compare.png"
|
|||
|
|
|
|||
|
|
visualize_comparison(png, normal, tiling, out)
|
|||
|
|
|
|||
|
|
|
|||
|
|
if __name__ == "__main__":
|
|||
|
|
main()
|