opencode/layout_detector.py
Кирилл Блинов feeb02242b Add layout detection and multi-element extraction
- layout_detector.py: zone classification (drawing/table/title_block/notes) using line detection and text density analysis
- multi_element_extractor.py: extract dimensions, positions (П-1, X-1), GOST refs, steel grades, elevations, beam labels per zone
2026-06-01 12:29:32 +03:00

299 lines
11 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
Layout Detector — разделение страницы чертежа на зоны.
Зоны:
- "drawing" — схемы, виды, разрезы (линии + текст, разрежено)
- "table" — таблицы (плотные линии в сетке)
- "title_block" — штамп (нижний правый угол или низ страницы)
- "notes" — примечания, текстовые блоки
- "legend" — легенда/условные обозначения
Алгоритм:
1. Находит все линии на странице
2. Находит прямоугольники = таблицы
3. Анализирует плотность OCR текста
4. Классифицирует регионы
"""
import sys
import json
from pathlib import Path
from typing import List, Dict, Tuple
import cv2
import numpy as np
from PIL import Image
def find_all_lines(img_gray: np.ndarray, min_length: int = 40):
"""Находит все прямые линии (горизонтальные и вертикальные)."""
_, binary = cv2.threshold(img_gray, 180, 255, cv2.THRESH_BINARY_INV)
h, w = binary.shape
lines = []
# Горизонтальные
for y in range(h):
row = binary[y, :]
in_line = False
start = 0
for x in range(w):
if row[x] > 128:
if not in_line:
in_line = True
start = x
else:
if in_line:
length = x - start
if length >= min_length:
lines.append(("h", start, y, x-1, y))
in_line = False
if in_line:
length = w - start
if length >= min_length:
lines.append(("h", start, y, w-1, y))
# Вертикальные
for x in range(w):
col = binary[:, x]
in_line = False
start = 0
for y in range(h):
if col[y] > 128:
if not in_line:
in_line = True
start = y
else:
if in_line:
length = y - start
if length >= min_length:
lines.append(("v", x, start, x, y-1))
in_line = False
if in_line:
length = h - start
if length >= min_length:
lines.append(("v", x, start, x, h-1))
return lines
def find_rectangles(lines: List[Tuple], min_size: int = 100) -> List[Dict]:
"""Находит прямоугольники, образованные пересечением линий."""
horiz = [(l[1], l[2], l[3], l[4]) for l in lines if l[0] == "h"]
vert = [(l[1], l[2], l[3], l[4]) for l in lines if l[0] == "v"]
# Группируем горизонтальные по Y
from collections import defaultdict
h_by_y = defaultdict(list)
for x1, y1, x2, y2 in horiz:
h_by_y[y1].append((x1, x2))
# Группируем вертикальные по X
v_by_x = defaultdict(list)
for x1, y1, x2, y2 in vert:
v_by_x[x1].append((y1, y2))
rects = []
# Ищем пары горизонтальных линий с общими вертикальными
y_vals = sorted(h_by_y.keys())
for i in range(len(y_vals)):
for j in range(i+1, len(y_vals)):
y_top = y_vals[i]
y_bottom = y_vals[j]
# Ищем общий X-интервал
for x1_a, x2_a in h_by_y[y_top]:
for x1_b, x2_b in h_by_y[y_bottom]:
x_left = max(x1_a, x1_b)
x_right = min(x2_a, x2_b)
if x_right - x_left < min_size:
continue
# Проверяем, есть ли вертикальные линии на x_left и x_right
has_left = any(y_top <= y_bottom and not (y2 < y_top or y1 > y_bottom)
for y1, y2 in v_by_x.get(x_left, []))
has_right = any(y_top <= y_bottom and not (y2 < y_top or y1 > y_bottom)
for y1, y2 in v_by_x.get(x_right, []))
if has_left and has_right:
rects.append({
"x": x_left, "y": y_top,
"w": x_right - x_left, "h": y_bottom - y_top
})
# Фильтруем вложенные прямоугольники (оставляем только внешние)
filtered = []
for r in rects:
is_inner = False
for other in rects:
if r is other:
continue
if (r["x"] > other["x"] and r["y"] > other["y"] and
r["x"] + r["w"] < other["x"] + other["w"] and
r["y"] + r["h"] < other["y"] + other["h"]):
is_inner = True
break
if not is_inner:
filtered.append(r)
return filtered
def classify_regions(rects: List[Dict], ocr_lines: List[Dict], img_w: int, img_h: int) -> List[Dict]:
"""Классифицирует регионы страницы."""
regions = []
# 1. Таблицы = большие прямоугольники с высокой плотностью линий
for r in rects:
area = r["w"] * r["h"]
# Считаем OCR строки внутри
texts_in = [t for t in ocr_lines
if r["x"] <= t["cx"] <= r["x"] + r["w"]
and r["y"] <= t["cy"] <= r["y"] + r["h"]]
density = len(texts_in) / (area / 1000000) # текстов на мегапиксель
if density > 20: # высокая плотность = таблица
regions.append({
"type": "table",
"bbox": [r["x"], r["y"], r["x"]+r["w"], r["y"]+r["h"]],
"density": density,
"text_count": len(texts_in)
})
# 2. Определяем чертежи = области с линиями и текстом, но без плотной сетки
# Для простоты: левая половина, не покрытая таблицами
# Найдём ограничивающий bbox для всех "чертёжных" текстов
drawing_texts = [t for t in ocr_lines if t["cy"] < img_h * 0.75 and t["cx"] < img_w * 0.6]
if drawing_texts:
xs = [t["cx"] for t in drawing_texts]
ys = [t["cy"] for t in drawing_texts]
# Расширяем на 200px
dx = [t["cx"] - t["x1"] for t in drawing_texts if "x1" in t]
max_w = max(dx) if dx else 100
regions.append({
"type": "drawing",
"bbox": [max(0, min(xs)-max_w), max(0, min(ys)-100),
min(img_w, max(xs)+max_w), min(img_h, max(ys)+100)],
"text_count": len(drawing_texts)
})
# 3. Штамп = низ страницы, мелкий текст
title_texts = [t for t in ocr_lines if t["cy"] > img_h * 0.85]
if title_texts:
xs = [t["cx"] for t in title_texts]
ys = [t["cy"] for t in title_texts]
regions.append({
"type": "title_block",
"bbox": [min(xs)-50, min(ys)-50, max(xs)+50, max(ys)+50],
"text_count": len(title_texts)
})
# 4. Примечания = текстовые блоки
note_keywords = ["примечание", "общие указания", "границы", "размеры"]
note_texts = [t for t in ocr_lines
if any(kw in t["text"].lower() for kw in note_keywords)]
if note_texts:
xs = [t["cx"] for t in note_texts]
ys = [t["cy"] for t in note_texts]
regions.append({
"type": "notes",
"bbox": [min(xs)-100, min(ys)-100, max(xs)+100, max(ys)+100],
"text_count": len(note_texts)
})
return regions
def detect_layout(png_path: Path, ocr_path: Path) -> Dict:
"""Основная функция layout detection."""
img = cv2.imread(str(png_path), cv2.IMREAD_GRAYSCALE)
h, w = img.shape[:2]
# Загрузить OCR
ocr = json.loads(ocr_path.read_text(encoding="utf-8"))
# Собрать все OCR lines с координатами
all_texts = []
for page in ocr.get("pages", []):
for line in page.get("ocr_lines", []):
bbox = line.get("bbox", [])
if not bbox:
continue
if isinstance(bbox[0], list):
xs = [p[0] for p in bbox]
ys = [p[1] for p in bbox]
else:
xs = [bbox[0], bbox[2]]
ys = [bbox[1], bbox[3]]
all_texts.append({
"text": line["text"],
"cx": sum(xs)/len(xs),
"cy": sum(ys)/len(ys),
"x1": min(xs), "y1": min(ys),
"x2": max(xs), "y2": max(ys),
"bbox": bbox
})
# Найти линии
lines = find_all_lines(img)
print(f"[INFO] Найдено {len(lines)} линий")
# Найти прямоугольники
rects = find_rectangles(lines)
print(f"[INFO] Найдено {len(rects)} прямоугольников")
# Классифицировать
regions = classify_regions(rects, all_texts, w, h)
print(f"[INFO] Классифицировано {len(regions)} регионов")
for r in regions:
print(f" {r['type']}: bbox={r['bbox']}, texts={r.get('text_count', 0)}")
return {
"image_size": [w, h],
"regions": regions,
"rectangles": rects,
"line_count": len(lines)
}
def visualize_layout(png_path: Path, layout: Dict, out_path: Path):
"""Рисует зоны на изображении."""
img = Image.open(png_path)
draw = ImageDraw.Draw(img)
colors = {
"table": "blue",
"drawing": "green",
"title_block": "purple",
"notes": "orange"
}
for region in layout["regions"]:
x1, y1, x2, y2 = region["bbox"]
color = colors.get(region["type"], "red")
draw.rectangle([x1, y1, x2, y2], outline=color, width=4)
draw.text((x1+5, y1+5), region["type"], fill=color)
img.save(out_path)
print(f"[OK] Layout visualization: {out_path}")
def main():
if len(sys.argv) < 3:
print("Usage: python layout_detector.py <png> <ocr_json>")
sys.exit(1)
png = Path(sys.argv[1])
ocr = Path(sys.argv[2])
out_json = png.parent / "layout.json"
out_png = png.parent / f"{png.stem}_layout.png"
layout = detect_layout(png, ocr)
with open(out_json, "w", encoding="utf-8") as f:
json.dump(layout, f, ensure_ascii=False, indent=2)
print(f"[OK] Layout JSON: {out_json}")
visualize_layout(png, layout, out_png)
if __name__ == "__main__":
from PIL import ImageDraw
main()