opencode/compare_ocr.py
Кирилл Блинов b5f7c6327e Add tiling OCR, preprocess and visualization tools
- tiling_ocr.py: split large drawings into overlapping tiles for better small-text recognition
- preprocess_for_ocr.py: CLAHE + unsharp mask for enhancing blueprint contrast
- visualize_dimensions.py: draw bounding boxes around detected dimension numbers
- compare_ocr.py: side-by-side visualization of normal vs tiling OCR results
- dimension_extractor.py: line-based dimension detection with pixel verification
- ocr_qwen.py: Alibaba Cloud qwen-vl-ocr client with resize and regex fallback parser
- test_qwen_ocr.py: standalone test for qwen OCR
- process_any_pdf.py: add --use-tiling flag to switch between normal and tiling OCR
2026-06-01 12:29:26 +03:00

161 lines
5.1 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
Визуализация сравнения: обычный OCR vs tiling OCR.
Рисует bbox зелёным (только обычный), красным (только tiling), жёлтым (оба).
"""
import sys
import json
import re
from pathlib import Path
from PIL import Image, ImageDraw
def load_ocr(path: Path):
"""Загружает OCR lines из JSON."""
data = json.loads(path.read_text(encoding="utf-8"))
if "lines" in data:
return data["lines"]
if "pages" in data:
lines = []
for page in data["pages"]:
lines.extend(page.get("ocr_lines", []))
return lines
return []
def bbox_center(box):
if isinstance(box[0], list):
xs = [p[0] for p in box]
ys = [p[1] for p in box]
else:
xs = [box[0], box[2]]
ys = [box[1], box[3]]
return sum(xs)/len(xs), sum(ys)/len(ys)
def bbox_rect(box):
if isinstance(box[0], list):
xs = [p[0] for p in box]
ys = [p[1] for p in box]
else:
xs = [box[0], box[2]]
ys = [box[1], box[3]]
return min(xs), min(ys), max(xs), max(ys)
def find_matches(text: str, list_b, iou_thresh=0.3):
"""Находит ближайший совпадающий bbox в list_b по IoU и тексту."""
matches = []
for b in list_b:
if b["text"].strip() != text.strip():
continue
# IoU
ax1, ay1, ax2, ay2 = bbox_rect(a["bbox"] if 'a' in dir() else None)
# ... (упрощённо: сравниваем по центру)
return matches
def visualize_comparison(png_path: Path, normal_ocr_path: Path, tiling_ocr_path: Path, out_path: Path):
"""Рисует сравнение."""
img = Image.open(png_path)
draw = ImageDraw.Draw(img)
normal = load_ocr(normal_ocr_path)
tiling = load_ocr(tiling_ocr_path)
# Индексы для быстрого поиска
normal_by_text = {}
for n in normal:
txt = n["text"].strip()
if re.match(r'^\d+([,.]\d+)?$', txt):
normal_by_text.setdefault(txt, []).append(n)
tiling_by_text = {}
for t in tiling:
txt = t["text"].strip()
if re.match(r'^\d+([,.]\d+)?$', txt):
tiling_by_text.setdefault(txt, []).append(t)
# Классификация
only_normal = [] # зелёный
only_tiling = [] # красный
both = [] # жёлтый
all_texts = set(normal_by_text.keys()) | set(tiling_by_text.keys())
for txt in all_texts:
n_list = normal_by_text.get(txt, [])
t_list = tiling_by_text.get(txt, [])
# Сопоставляем по минимальному расстоянию центров
used_t = set()
for n in n_list:
cx_n, cy_n = bbox_center(n["bbox"])
best = None
best_dist = float('inf')
for i, t in enumerate(t_list):
if i in used_t:
continue
cx_t, cy_t = bbox_center(t["bbox"])
d = ((cx_n - cx_t)**2 + (cy_n - cy_t)**2)**0.5
if d < best_dist:
best_dist = d
best = i
if best is not None and best_dist < 100: # совпадение
both.append((n, t_list[best]))
used_t.add(best)
else:
only_normal.append(n)
for i, t in enumerate(t_list):
if i not in used_t:
only_tiling.append(t)
# Рисуем
for item in only_normal:
x1, y1, x2, y2 = bbox_rect(item["bbox"])
draw.rectangle([x1, y1, x2, y2], outline="green", width=3)
for item in only_tiling:
x1, y1, x2, y2 = bbox_rect(item["bbox"])
draw.rectangle([x1, y1, x2, y2], outline="red", width=3)
cx, cy = bbox_center(item["bbox"])
draw.text((cx, cy-15), item["text"], fill="red")
for n, t in both:
# Используем bbox из tiling (крупнее)
x1, y1, x2, y2 = bbox_rect(t["bbox"])
draw.rectangle([x1, y1, x2, y2], outline="yellow", width=2)
img.save(out_path)
print(f"[OK] Сохранено: {out_path}")
print(f" Только обычный (зелёный): {len(only_normal)}")
print(f" Только tiling (красный): {len(only_tiling)}")
print(f" Оба (жёлтый): {len(both)}")
# Вывод новых чисел
print(f"\nНовые числа от tiling OCR:")
for item in sorted(only_tiling, key=lambda x: x["bbox"][0][1]):
cx, cy = bbox_center(item["bbox"])
print(f" {item['text']:>10} x={cx:>8.0f} y={cy:>8.0f}")
def main():
if len(sys.argv) < 4:
print("Usage: python compare_ocr.py <png> <normal_ocr.json> <tiling_ocr.json>")
sys.exit(1)
png = Path(sys.argv[1])
normal = Path(sys.argv[2])
tiling = Path(sys.argv[3])
out = png.parent / f"{png.stem}_ocr_compare.png"
visualize_comparison(png, normal, tiling, out)
if __name__ == "__main__":
main()