opencode/compare_ocr.py

161 lines
5.1 KiB
Python
Raw Normal View History

#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
Визуализация сравнения: обычный OCR vs tiling OCR.
Рисует bbox зелёным (только обычный), красным (только tiling), жёлтым (оба).
"""
import sys
import json
import re
from pathlib import Path
from PIL import Image, ImageDraw
def load_ocr(path: Path):
"""Загружает OCR lines из JSON."""
data = json.loads(path.read_text(encoding="utf-8"))
if "lines" in data:
return data["lines"]
if "pages" in data:
lines = []
for page in data["pages"]:
lines.extend(page.get("ocr_lines", []))
return lines
return []
def bbox_center(box):
if isinstance(box[0], list):
xs = [p[0] for p in box]
ys = [p[1] for p in box]
else:
xs = [box[0], box[2]]
ys = [box[1], box[3]]
return sum(xs)/len(xs), sum(ys)/len(ys)
def bbox_rect(box):
if isinstance(box[0], list):
xs = [p[0] for p in box]
ys = [p[1] for p in box]
else:
xs = [box[0], box[2]]
ys = [box[1], box[3]]
return min(xs), min(ys), max(xs), max(ys)
def find_matches(text: str, list_b, iou_thresh=0.3):
"""Находит ближайший совпадающий bbox в list_b по IoU и тексту."""
matches = []
for b in list_b:
if b["text"].strip() != text.strip():
continue
# IoU
ax1, ay1, ax2, ay2 = bbox_rect(a["bbox"] if 'a' in dir() else None)
# ... (упрощённо: сравниваем по центру)
return matches
def visualize_comparison(png_path: Path, normal_ocr_path: Path, tiling_ocr_path: Path, out_path: Path):
"""Рисует сравнение."""
img = Image.open(png_path)
draw = ImageDraw.Draw(img)
normal = load_ocr(normal_ocr_path)
tiling = load_ocr(tiling_ocr_path)
# Индексы для быстрого поиска
normal_by_text = {}
for n in normal:
txt = n["text"].strip()
if re.match(r'^\d+([,.]\d+)?$', txt):
normal_by_text.setdefault(txt, []).append(n)
tiling_by_text = {}
for t in tiling:
txt = t["text"].strip()
if re.match(r'^\d+([,.]\d+)?$', txt):
tiling_by_text.setdefault(txt, []).append(t)
# Классификация
only_normal = [] # зелёный
only_tiling = [] # красный
both = [] # жёлтый
all_texts = set(normal_by_text.keys()) | set(tiling_by_text.keys())
for txt in all_texts:
n_list = normal_by_text.get(txt, [])
t_list = tiling_by_text.get(txt, [])
# Сопоставляем по минимальному расстоянию центров
used_t = set()
for n in n_list:
cx_n, cy_n = bbox_center(n["bbox"])
best = None
best_dist = float('inf')
for i, t in enumerate(t_list):
if i in used_t:
continue
cx_t, cy_t = bbox_center(t["bbox"])
d = ((cx_n - cx_t)**2 + (cy_n - cy_t)**2)**0.5
if d < best_dist:
best_dist = d
best = i
if best is not None and best_dist < 100: # совпадение
both.append((n, t_list[best]))
used_t.add(best)
else:
only_normal.append(n)
for i, t in enumerate(t_list):
if i not in used_t:
only_tiling.append(t)
# Рисуем
for item in only_normal:
x1, y1, x2, y2 = bbox_rect(item["bbox"])
draw.rectangle([x1, y1, x2, y2], outline="green", width=3)
for item in only_tiling:
x1, y1, x2, y2 = bbox_rect(item["bbox"])
draw.rectangle([x1, y1, x2, y2], outline="red", width=3)
cx, cy = bbox_center(item["bbox"])
draw.text((cx, cy-15), item["text"], fill="red")
for n, t in both:
# Используем bbox из tiling (крупнее)
x1, y1, x2, y2 = bbox_rect(t["bbox"])
draw.rectangle([x1, y1, x2, y2], outline="yellow", width=2)
img.save(out_path)
print(f"[OK] Сохранено: {out_path}")
print(f" Только обычный (зелёный): {len(only_normal)}")
print(f" Только tiling (красный): {len(only_tiling)}")
print(f" Оба (жёлтый): {len(both)}")
# Вывод новых чисел
print(f"\nНовые числа от tiling OCR:")
for item in sorted(only_tiling, key=lambda x: x["bbox"][0][1]):
cx, cy = bbox_center(item["bbox"])
print(f" {item['text']:>10} x={cx:>8.0f} y={cy:>8.0f}")
def main():
if len(sys.argv) < 4:
print("Usage: python compare_ocr.py <png> <normal_ocr.json> <tiling_ocr.json>")
sys.exit(1)
png = Path(sys.argv[1])
normal = Path(sys.argv[2])
tiling = Path(sys.argv[3])
out = png.parent / f"{png.stem}_ocr_compare.png"
visualize_comparison(png, normal, tiling, out)
if __name__ == "__main__":
main()