opencode/visualize_dimensions.py
Кирилл Блинов b5f7c6327e Add tiling OCR, preprocess and visualization tools
- tiling_ocr.py: split large drawings into overlapping tiles for better small-text recognition
- preprocess_for_ocr.py: CLAHE + unsharp mask for enhancing blueprint contrast
- visualize_dimensions.py: draw bounding boxes around detected dimension numbers
- compare_ocr.py: side-by-side visualization of normal vs tiling OCR results
- dimension_extractor.py: line-based dimension detection with pixel verification
- ocr_qwen.py: Alibaba Cloud qwen-vl-ocr client with resize and regex fallback parser
- test_qwen_ocr.py: standalone test for qwen OCR
- process_any_pdf.py: add --use-tiling flag to switch between normal and tiling OCR
2026-06-01 12:29:26 +03:00

55 lines
1.9 KiB
Python

#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
Визуализация найденных размерных чисел на PNG.
Рисует bbox вокруг чисел, извлечённых из OCR.
"""
import sys
import json
import re
from pathlib import Path
from PIL import Image, ImageDraw
def visualize_dimensions(ocr_json_path: Path, png_path: Path, out_path: Path):
"""Рисует bbox вокруг чисел на PNG."""
ocr = json.loads(ocr_json_path.read_text(encoding="utf-8"))
img = Image.open(png_path)
draw = ImageDraw.Draw(img)
found = 0
for page in ocr.get("pages", []):
for line in page.get("ocr_lines", []):
txt = line["text"].strip()
if re.match(r'^\d+([,.]\d+)?$', txt):
bbox = line.get("bbox")
if bbox:
# bbox: [[x1,y1],[x2,y2],[x3,y3],[x4,y4]]
if isinstance(bbox[0], list):
pts = [(p[0], p[1]) for p in bbox]
else:
pts = [(bbox[0], bbox[1]), (bbox[2], bbox[1]),
(bbox[2], bbox[3]), (bbox[0], bbox[3])]
draw.polygon(pts, outline="red", width=3)
# Подпись
x = min(p[0] for p in pts)
y = min(p[1] for p in pts)
draw.text((x, y-20), txt, fill="red")
found += 1
img.save(out_path)
print(f"[OK] Найдено {found} размерных чисел. Сохранено: {out_path}")
if __name__ == "__main__":
if len(sys.argv) < 3:
print("Usage: python visualize_dimensions.py <ocr_json> <png>")
sys.exit(1)
ocr_json = Path(sys.argv[1])
png = Path(sys.argv[2])
out = png.parent / f"{png.stem}_dims.png"
visualize_dimensions(ocr_json, png, out)