opencode/visualize_dimensions.py

55 lines
1.9 KiB
Python
Raw Normal View History

#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
Визуализация найденных размерных чисел на PNG.
Рисует bbox вокруг чисел, извлечённых из OCR.
"""
import sys
import json
import re
from pathlib import Path
from PIL import Image, ImageDraw
def visualize_dimensions(ocr_json_path: Path, png_path: Path, out_path: Path):
"""Рисует bbox вокруг чисел на PNG."""
ocr = json.loads(ocr_json_path.read_text(encoding="utf-8"))
img = Image.open(png_path)
draw = ImageDraw.Draw(img)
found = 0
for page in ocr.get("pages", []):
for line in page.get("ocr_lines", []):
txt = line["text"].strip()
if re.match(r'^\d+([,.]\d+)?$', txt):
bbox = line.get("bbox")
if bbox:
# bbox: [[x1,y1],[x2,y2],[x3,y3],[x4,y4]]
if isinstance(bbox[0], list):
pts = [(p[0], p[1]) for p in bbox]
else:
pts = [(bbox[0], bbox[1]), (bbox[2], bbox[1]),
(bbox[2], bbox[3]), (bbox[0], bbox[3])]
draw.polygon(pts, outline="red", width=3)
# Подпись
x = min(p[0] for p in pts)
y = min(p[1] for p in pts)
draw.text((x, y-20), txt, fill="red")
found += 1
img.save(out_path)
print(f"[OK] Найдено {found} размерных чисел. Сохранено: {out_path}")
if __name__ == "__main__":
if len(sys.argv) < 3:
print("Usage: python visualize_dimensions.py <ocr_json> <png>")
sys.exit(1)
ocr_json = Path(sys.argv[1])
png = Path(sys.argv[2])
out = png.parent / f"{png.stem}_dims.png"
visualize_dimensions(ocr_json, png, out)