- tiling_ocr.py: split large drawings into overlapping tiles for better small-text recognition - preprocess_for_ocr.py: CLAHE + unsharp mask for enhancing blueprint contrast - visualize_dimensions.py: draw bounding boxes around detected dimension numbers - compare_ocr.py: side-by-side visualization of normal vs tiling OCR results - dimension_extractor.py: line-based dimension detection with pixel verification - ocr_qwen.py: Alibaba Cloud qwen-vl-ocr client with resize and regex fallback parser - test_qwen_ocr.py: standalone test for qwen OCR - process_any_pdf.py: add --use-tiling flag to switch between normal and tiling OCR
55 lines
1.9 KiB
Python
55 lines
1.9 KiB
Python
#!/usr/bin/env python3
|
|
# -*- coding: utf-8 -*-
|
|
"""
|
|
Визуализация найденных размерных чисел на PNG.
|
|
Рисует bbox вокруг чисел, извлечённых из OCR.
|
|
"""
|
|
|
|
import sys
|
|
import json
|
|
import re
|
|
from pathlib import Path
|
|
from PIL import Image, ImageDraw
|
|
|
|
|
|
def visualize_dimensions(ocr_json_path: Path, png_path: Path, out_path: Path):
|
|
"""Рисует bbox вокруг чисел на PNG."""
|
|
ocr = json.loads(ocr_json_path.read_text(encoding="utf-8"))
|
|
img = Image.open(png_path)
|
|
draw = ImageDraw.Draw(img)
|
|
|
|
found = 0
|
|
for page in ocr.get("pages", []):
|
|
for line in page.get("ocr_lines", []):
|
|
txt = line["text"].strip()
|
|
if re.match(r'^\d+([,.]\d+)?$', txt):
|
|
bbox = line.get("bbox")
|
|
if bbox:
|
|
# bbox: [[x1,y1],[x2,y2],[x3,y3],[x4,y4]]
|
|
if isinstance(bbox[0], list):
|
|
pts = [(p[0], p[1]) for p in bbox]
|
|
else:
|
|
pts = [(bbox[0], bbox[1]), (bbox[2], bbox[1]),
|
|
(bbox[2], bbox[3]), (bbox[0], bbox[3])]
|
|
draw.polygon(pts, outline="red", width=3)
|
|
# Подпись
|
|
x = min(p[0] for p in pts)
|
|
y = min(p[1] for p in pts)
|
|
draw.text((x, y-20), txt, fill="red")
|
|
found += 1
|
|
|
|
img.save(out_path)
|
|
print(f"[OK] Найдено {found} размерных чисел. Сохранено: {out_path}")
|
|
|
|
|
|
if __name__ == "__main__":
|
|
if len(sys.argv) < 3:
|
|
print("Usage: python visualize_dimensions.py <ocr_json> <png>")
|
|
sys.exit(1)
|
|
|
|
ocr_json = Path(sys.argv[1])
|
|
png = Path(sys.argv[2])
|
|
out = png.parent / f"{png.stem}_dims.png"
|
|
|
|
visualize_dimensions(ocr_json, png, out)
|