- tiling_ocr.py: split large drawings into overlapping tiles for better small-text recognition - preprocess_for_ocr.py: CLAHE + unsharp mask for enhancing blueprint contrast - visualize_dimensions.py: draw bounding boxes around detected dimension numbers - compare_ocr.py: side-by-side visualization of normal vs tiling OCR results - dimension_extractor.py: line-based dimension detection with pixel verification - ocr_qwen.py: Alibaba Cloud qwen-vl-ocr client with resize and regex fallback parser - test_qwen_ocr.py: standalone test for qwen OCR - process_any_pdf.py: add --use-tiling flag to switch between normal and tiling OCR
161 lines
5.1 KiB
Python
161 lines
5.1 KiB
Python
#!/usr/bin/env python3
|
||
# -*- coding: utf-8 -*-
|
||
"""
|
||
Визуализация сравнения: обычный OCR vs tiling OCR.
|
||
Рисует bbox зелёным (только обычный), красным (только tiling), жёлтым (оба).
|
||
"""
|
||
|
||
import sys
|
||
import json
|
||
import re
|
||
from pathlib import Path
|
||
from PIL import Image, ImageDraw
|
||
|
||
|
||
def load_ocr(path: Path):
|
||
"""Загружает OCR lines из JSON."""
|
||
data = json.loads(path.read_text(encoding="utf-8"))
|
||
if "lines" in data:
|
||
return data["lines"]
|
||
if "pages" in data:
|
||
lines = []
|
||
for page in data["pages"]:
|
||
lines.extend(page.get("ocr_lines", []))
|
||
return lines
|
||
return []
|
||
|
||
|
||
def bbox_center(box):
|
||
if isinstance(box[0], list):
|
||
xs = [p[0] for p in box]
|
||
ys = [p[1] for p in box]
|
||
else:
|
||
xs = [box[0], box[2]]
|
||
ys = [box[1], box[3]]
|
||
return sum(xs)/len(xs), sum(ys)/len(ys)
|
||
|
||
|
||
def bbox_rect(box):
|
||
if isinstance(box[0], list):
|
||
xs = [p[0] for p in box]
|
||
ys = [p[1] for p in box]
|
||
else:
|
||
xs = [box[0], box[2]]
|
||
ys = [box[1], box[3]]
|
||
return min(xs), min(ys), max(xs), max(ys)
|
||
|
||
|
||
def find_matches(text: str, list_b, iou_thresh=0.3):
|
||
"""Находит ближайший совпадающий bbox в list_b по IoU и тексту."""
|
||
matches = []
|
||
for b in list_b:
|
||
if b["text"].strip() != text.strip():
|
||
continue
|
||
# IoU
|
||
ax1, ay1, ax2, ay2 = bbox_rect(a["bbox"] if 'a' in dir() else None)
|
||
# ... (упрощённо: сравниваем по центру)
|
||
return matches
|
||
|
||
|
||
def visualize_comparison(png_path: Path, normal_ocr_path: Path, tiling_ocr_path: Path, out_path: Path):
|
||
"""Рисует сравнение."""
|
||
img = Image.open(png_path)
|
||
draw = ImageDraw.Draw(img)
|
||
|
||
normal = load_ocr(normal_ocr_path)
|
||
tiling = load_ocr(tiling_ocr_path)
|
||
|
||
# Индексы для быстрого поиска
|
||
normal_by_text = {}
|
||
for n in normal:
|
||
txt = n["text"].strip()
|
||
if re.match(r'^\d+([,.]\d+)?$', txt):
|
||
normal_by_text.setdefault(txt, []).append(n)
|
||
|
||
tiling_by_text = {}
|
||
for t in tiling:
|
||
txt = t["text"].strip()
|
||
if re.match(r'^\d+([,.]\d+)?$', txt):
|
||
tiling_by_text.setdefault(txt, []).append(t)
|
||
|
||
# Классификация
|
||
only_normal = [] # зелёный
|
||
only_tiling = [] # красный
|
||
both = [] # жёлтый
|
||
|
||
all_texts = set(normal_by_text.keys()) | set(tiling_by_text.keys())
|
||
|
||
for txt in all_texts:
|
||
n_list = normal_by_text.get(txt, [])
|
||
t_list = tiling_by_text.get(txt, [])
|
||
|
||
# Сопоставляем по минимальному расстоянию центров
|
||
used_t = set()
|
||
for n in n_list:
|
||
cx_n, cy_n = bbox_center(n["bbox"])
|
||
best = None
|
||
best_dist = float('inf')
|
||
for i, t in enumerate(t_list):
|
||
if i in used_t:
|
||
continue
|
||
cx_t, cy_t = bbox_center(t["bbox"])
|
||
d = ((cx_n - cx_t)**2 + (cy_n - cy_t)**2)**0.5
|
||
if d < best_dist:
|
||
best_dist = d
|
||
best = i
|
||
|
||
if best is not None and best_dist < 100: # совпадение
|
||
both.append((n, t_list[best]))
|
||
used_t.add(best)
|
||
else:
|
||
only_normal.append(n)
|
||
|
||
for i, t in enumerate(t_list):
|
||
if i not in used_t:
|
||
only_tiling.append(t)
|
||
|
||
# Рисуем
|
||
for item in only_normal:
|
||
x1, y1, x2, y2 = bbox_rect(item["bbox"])
|
||
draw.rectangle([x1, y1, x2, y2], outline="green", width=3)
|
||
|
||
for item in only_tiling:
|
||
x1, y1, x2, y2 = bbox_rect(item["bbox"])
|
||
draw.rectangle([x1, y1, x2, y2], outline="red", width=3)
|
||
cx, cy = bbox_center(item["bbox"])
|
||
draw.text((cx, cy-15), item["text"], fill="red")
|
||
|
||
for n, t in both:
|
||
# Используем bbox из tiling (крупнее)
|
||
x1, y1, x2, y2 = bbox_rect(t["bbox"])
|
||
draw.rectangle([x1, y1, x2, y2], outline="yellow", width=2)
|
||
|
||
img.save(out_path)
|
||
print(f"[OK] Сохранено: {out_path}")
|
||
print(f" Только обычный (зелёный): {len(only_normal)}")
|
||
print(f" Только tiling (красный): {len(only_tiling)}")
|
||
print(f" Оба (жёлтый): {len(both)}")
|
||
|
||
# Вывод новых чисел
|
||
print(f"\nНовые числа от tiling OCR:")
|
||
for item in sorted(only_tiling, key=lambda x: x["bbox"][0][1]):
|
||
cx, cy = bbox_center(item["bbox"])
|
||
print(f" {item['text']:>10} x={cx:>8.0f} y={cy:>8.0f}")
|
||
|
||
|
||
def main():
|
||
if len(sys.argv) < 4:
|
||
print("Usage: python compare_ocr.py <png> <normal_ocr.json> <tiling_ocr.json>")
|
||
sys.exit(1)
|
||
|
||
png = Path(sys.argv[1])
|
||
normal = Path(sys.argv[2])
|
||
tiling = Path(sys.argv[3])
|
||
out = png.parent / f"{png.stem}_ocr_compare.png"
|
||
|
||
visualize_comparison(png, normal, tiling, out)
|
||
|
||
|
||
if __name__ == "__main__":
|
||
main()
|