- layout_detector.py: zone classification (drawing/table/title_block/notes) using line detection and text density analysis - multi_element_extractor.py: extract dimensions, positions (П-1, X-1), GOST refs, steel grades, elevations, beam labels per zone
299 lines
11 KiB
Python
299 lines
11 KiB
Python
#!/usr/bin/env python3
|
||
# -*- coding: utf-8 -*-
|
||
"""
|
||
Layout Detector — разделение страницы чертежа на зоны.
|
||
|
||
Зоны:
|
||
- "drawing" — схемы, виды, разрезы (линии + текст, разрежено)
|
||
- "table" — таблицы (плотные линии в сетке)
|
||
- "title_block" — штамп (нижний правый угол или низ страницы)
|
||
- "notes" — примечания, текстовые блоки
|
||
- "legend" — легенда/условные обозначения
|
||
|
||
Алгоритм:
|
||
1. Находит все линии на странице
|
||
2. Находит прямоугольники = таблицы
|
||
3. Анализирует плотность OCR текста
|
||
4. Классифицирует регионы
|
||
"""
|
||
|
||
import sys
|
||
import json
|
||
from pathlib import Path
|
||
from typing import List, Dict, Tuple
|
||
import cv2
|
||
import numpy as np
|
||
from PIL import Image
|
||
|
||
|
||
def find_all_lines(img_gray: np.ndarray, min_length: int = 40):
|
||
"""Находит все прямые линии (горизонтальные и вертикальные)."""
|
||
_, binary = cv2.threshold(img_gray, 180, 255, cv2.THRESH_BINARY_INV)
|
||
h, w = binary.shape
|
||
lines = []
|
||
|
||
# Горизонтальные
|
||
for y in range(h):
|
||
row = binary[y, :]
|
||
in_line = False
|
||
start = 0
|
||
for x in range(w):
|
||
if row[x] > 128:
|
||
if not in_line:
|
||
in_line = True
|
||
start = x
|
||
else:
|
||
if in_line:
|
||
length = x - start
|
||
if length >= min_length:
|
||
lines.append(("h", start, y, x-1, y))
|
||
in_line = False
|
||
if in_line:
|
||
length = w - start
|
||
if length >= min_length:
|
||
lines.append(("h", start, y, w-1, y))
|
||
|
||
# Вертикальные
|
||
for x in range(w):
|
||
col = binary[:, x]
|
||
in_line = False
|
||
start = 0
|
||
for y in range(h):
|
||
if col[y] > 128:
|
||
if not in_line:
|
||
in_line = True
|
||
start = y
|
||
else:
|
||
if in_line:
|
||
length = y - start
|
||
if length >= min_length:
|
||
lines.append(("v", x, start, x, y-1))
|
||
in_line = False
|
||
if in_line:
|
||
length = h - start
|
||
if length >= min_length:
|
||
lines.append(("v", x, start, x, h-1))
|
||
|
||
return lines
|
||
|
||
|
||
def find_rectangles(lines: List[Tuple], min_size: int = 100) -> List[Dict]:
|
||
"""Находит прямоугольники, образованные пересечением линий."""
|
||
horiz = [(l[1], l[2], l[3], l[4]) for l in lines if l[0] == "h"]
|
||
vert = [(l[1], l[2], l[3], l[4]) for l in lines if l[0] == "v"]
|
||
|
||
# Группируем горизонтальные по Y
|
||
from collections import defaultdict
|
||
h_by_y = defaultdict(list)
|
||
for x1, y1, x2, y2 in horiz:
|
||
h_by_y[y1].append((x1, x2))
|
||
|
||
# Группируем вертикальные по X
|
||
v_by_x = defaultdict(list)
|
||
for x1, y1, x2, y2 in vert:
|
||
v_by_x[x1].append((y1, y2))
|
||
|
||
rects = []
|
||
# Ищем пары горизонтальных линий с общими вертикальными
|
||
y_vals = sorted(h_by_y.keys())
|
||
for i in range(len(y_vals)):
|
||
for j in range(i+1, len(y_vals)):
|
||
y_top = y_vals[i]
|
||
y_bottom = y_vals[j]
|
||
# Ищем общий X-интервал
|
||
for x1_a, x2_a in h_by_y[y_top]:
|
||
for x1_b, x2_b in h_by_y[y_bottom]:
|
||
x_left = max(x1_a, x1_b)
|
||
x_right = min(x2_a, x2_b)
|
||
if x_right - x_left < min_size:
|
||
continue
|
||
# Проверяем, есть ли вертикальные линии на x_left и x_right
|
||
has_left = any(y_top <= y_bottom and not (y2 < y_top or y1 > y_bottom)
|
||
for y1, y2 in v_by_x.get(x_left, []))
|
||
has_right = any(y_top <= y_bottom and not (y2 < y_top or y1 > y_bottom)
|
||
for y1, y2 in v_by_x.get(x_right, []))
|
||
if has_left and has_right:
|
||
rects.append({
|
||
"x": x_left, "y": y_top,
|
||
"w": x_right - x_left, "h": y_bottom - y_top
|
||
})
|
||
|
||
# Фильтруем вложенные прямоугольники (оставляем только внешние)
|
||
filtered = []
|
||
for r in rects:
|
||
is_inner = False
|
||
for other in rects:
|
||
if r is other:
|
||
continue
|
||
if (r["x"] > other["x"] and r["y"] > other["y"] and
|
||
r["x"] + r["w"] < other["x"] + other["w"] and
|
||
r["y"] + r["h"] < other["y"] + other["h"]):
|
||
is_inner = True
|
||
break
|
||
if not is_inner:
|
||
filtered.append(r)
|
||
|
||
return filtered
|
||
|
||
|
||
def classify_regions(rects: List[Dict], ocr_lines: List[Dict], img_w: int, img_h: int) -> List[Dict]:
|
||
"""Классифицирует регионы страницы."""
|
||
regions = []
|
||
|
||
# 1. Таблицы = большие прямоугольники с высокой плотностью линий
|
||
for r in rects:
|
||
area = r["w"] * r["h"]
|
||
# Считаем OCR строки внутри
|
||
texts_in = [t for t in ocr_lines
|
||
if r["x"] <= t["cx"] <= r["x"] + r["w"]
|
||
and r["y"] <= t["cy"] <= r["y"] + r["h"]]
|
||
density = len(texts_in) / (area / 1000000) # текстов на мегапиксель
|
||
|
||
if density > 20: # высокая плотность = таблица
|
||
regions.append({
|
||
"type": "table",
|
||
"bbox": [r["x"], r["y"], r["x"]+r["w"], r["y"]+r["h"]],
|
||
"density": density,
|
||
"text_count": len(texts_in)
|
||
})
|
||
|
||
# 2. Определяем чертежи = области с линиями и текстом, но без плотной сетки
|
||
# Для простоты: левая половина, не покрытая таблицами
|
||
# Найдём ограничивающий bbox для всех "чертёжных" текстов
|
||
drawing_texts = [t for t in ocr_lines if t["cy"] < img_h * 0.75 and t["cx"] < img_w * 0.6]
|
||
if drawing_texts:
|
||
xs = [t["cx"] for t in drawing_texts]
|
||
ys = [t["cy"] for t in drawing_texts]
|
||
# Расширяем на 200px
|
||
dx = [t["cx"] - t["x1"] for t in drawing_texts if "x1" in t]
|
||
max_w = max(dx) if dx else 100
|
||
regions.append({
|
||
"type": "drawing",
|
||
"bbox": [max(0, min(xs)-max_w), max(0, min(ys)-100),
|
||
min(img_w, max(xs)+max_w), min(img_h, max(ys)+100)],
|
||
"text_count": len(drawing_texts)
|
||
})
|
||
|
||
# 3. Штамп = низ страницы, мелкий текст
|
||
title_texts = [t for t in ocr_lines if t["cy"] > img_h * 0.85]
|
||
if title_texts:
|
||
xs = [t["cx"] for t in title_texts]
|
||
ys = [t["cy"] for t in title_texts]
|
||
regions.append({
|
||
"type": "title_block",
|
||
"bbox": [min(xs)-50, min(ys)-50, max(xs)+50, max(ys)+50],
|
||
"text_count": len(title_texts)
|
||
})
|
||
|
||
# 4. Примечания = текстовые блоки
|
||
note_keywords = ["примечание", "общие указания", "границы", "размеры"]
|
||
note_texts = [t for t in ocr_lines
|
||
if any(kw in t["text"].lower() for kw in note_keywords)]
|
||
if note_texts:
|
||
xs = [t["cx"] for t in note_texts]
|
||
ys = [t["cy"] for t in note_texts]
|
||
regions.append({
|
||
"type": "notes",
|
||
"bbox": [min(xs)-100, min(ys)-100, max(xs)+100, max(ys)+100],
|
||
"text_count": len(note_texts)
|
||
})
|
||
|
||
return regions
|
||
|
||
|
||
def detect_layout(png_path: Path, ocr_path: Path) -> Dict:
|
||
"""Основная функция layout detection."""
|
||
img = cv2.imread(str(png_path), cv2.IMREAD_GRAYSCALE)
|
||
h, w = img.shape[:2]
|
||
|
||
# Загрузить OCR
|
||
ocr = json.loads(ocr_path.read_text(encoding="utf-8"))
|
||
|
||
# Собрать все OCR lines с координатами
|
||
all_texts = []
|
||
for page in ocr.get("pages", []):
|
||
for line in page.get("ocr_lines", []):
|
||
bbox = line.get("bbox", [])
|
||
if not bbox:
|
||
continue
|
||
if isinstance(bbox[0], list):
|
||
xs = [p[0] for p in bbox]
|
||
ys = [p[1] for p in bbox]
|
||
else:
|
||
xs = [bbox[0], bbox[2]]
|
||
ys = [bbox[1], bbox[3]]
|
||
all_texts.append({
|
||
"text": line["text"],
|
||
"cx": sum(xs)/len(xs),
|
||
"cy": sum(ys)/len(ys),
|
||
"x1": min(xs), "y1": min(ys),
|
||
"x2": max(xs), "y2": max(ys),
|
||
"bbox": bbox
|
||
})
|
||
|
||
# Найти линии
|
||
lines = find_all_lines(img)
|
||
print(f"[INFO] Найдено {len(lines)} линий")
|
||
|
||
# Найти прямоугольники
|
||
rects = find_rectangles(lines)
|
||
print(f"[INFO] Найдено {len(rects)} прямоугольников")
|
||
|
||
# Классифицировать
|
||
regions = classify_regions(rects, all_texts, w, h)
|
||
print(f"[INFO] Классифицировано {len(regions)} регионов")
|
||
for r in regions:
|
||
print(f" {r['type']}: bbox={r['bbox']}, texts={r.get('text_count', 0)}")
|
||
|
||
return {
|
||
"image_size": [w, h],
|
||
"regions": regions,
|
||
"rectangles": rects,
|
||
"line_count": len(lines)
|
||
}
|
||
|
||
|
||
def visualize_layout(png_path: Path, layout: Dict, out_path: Path):
|
||
"""Рисует зоны на изображении."""
|
||
img = Image.open(png_path)
|
||
draw = ImageDraw.Draw(img)
|
||
colors = {
|
||
"table": "blue",
|
||
"drawing": "green",
|
||
"title_block": "purple",
|
||
"notes": "orange"
|
||
}
|
||
|
||
for region in layout["regions"]:
|
||
x1, y1, x2, y2 = region["bbox"]
|
||
color = colors.get(region["type"], "red")
|
||
draw.rectangle([x1, y1, x2, y2], outline=color, width=4)
|
||
draw.text((x1+5, y1+5), region["type"], fill=color)
|
||
|
||
img.save(out_path)
|
||
print(f"[OK] Layout visualization: {out_path}")
|
||
|
||
|
||
def main():
|
||
if len(sys.argv) < 3:
|
||
print("Usage: python layout_detector.py <png> <ocr_json>")
|
||
sys.exit(1)
|
||
|
||
png = Path(sys.argv[1])
|
||
ocr = Path(sys.argv[2])
|
||
out_json = png.parent / "layout.json"
|
||
out_png = png.parent / f"{png.stem}_layout.png"
|
||
|
||
layout = detect_layout(png, ocr)
|
||
|
||
with open(out_json, "w", encoding="utf-8") as f:
|
||
json.dump(layout, f, ensure_ascii=False, indent=2)
|
||
print(f"[OK] Layout JSON: {out_json}")
|
||
|
||
visualize_layout(png, layout, out_png)
|
||
|
||
|
||
if __name__ == "__main__":
|
||
from PIL import ImageDraw
|
||
main()
|