opencode/layout_detector.py

#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
Layout Detector — разделение страницы чертежа на зоны.

Зоны:
- "drawing" — схемы, виды, разрезы (линии + текст, разрежено)
- "table" — таблицы (плотные линии в сетке)
- "title_block" — штамп (нижний правый угол или низ страницы)
- "notes" — примечания, текстовые блоки
- "legend" — легенда/условные обозначения

Алгоритм:
1. Находит все линии на странице
2. Находит прямоугольники = таблицы
3. Анализирует плотность OCR текста
4. Классифицирует регионы
"""

import sys
import json
from pathlib import Path
from typing import List, Dict, Tuple
import cv2
import numpy as np
from PIL import Image


def find_all_lines(img_gray: np.ndarray, min_length: int = 40):
    """Находит все прямые линии (горизонтальные и вертикальные)."""
    _, binary = cv2.threshold(img_gray, 180, 255, cv2.THRESH_BINARY_INV)
    h, w = binary.shape
    lines = []

    # Горизонтальные
    for y in range(h):
        row = binary[y, :]
        in_line = False
        start = 0
        for x in range(w):
            if row[x] > 128:
                if not in_line:
                    in_line = True
                    start = x
            else:
                if in_line:
                    length = x - start
                    if length >= min_length:
                        lines.append(("h", start, y, x-1, y))
                    in_line = False
        if in_line:
            length = w - start
            if length >= min_length:
                lines.append(("h", start, y, w-1, y))

    # Вертикальные
    for x in range(w):
        col = binary[:, x]
        in_line = False
        start = 0
        for y in range(h):
            if col[y] > 128:
                if not in_line:
                    in_line = True
                    start = y
            else:
                if in_line:
                    length = y - start
                    if length >= min_length:
                        lines.append(("v", x, start, x, y-1))
                    in_line = False
        if in_line:
            length = h - start
            if length >= min_length:
                lines.append(("v", x, start, x, h-1))

    return lines


def find_rectangles(lines: List[Tuple], min_size: int = 100) -> List[Dict]:
    """Находит прямоугольники, образованные пересечением линий."""
    horiz = [(l[1], l[2], l[3], l[4]) for l in lines if l[0] == "h"]
    vert = [(l[1], l[2], l[3], l[4]) for l in lines if l[0] == "v"]

    # Группируем горизонтальные по Y
    from collections import defaultdict
    h_by_y = defaultdict(list)
    for x1, y1, x2, y2 in horiz:
        h_by_y[y1].append((x1, x2))

    # Группируем вертикальные по X
    v_by_x = defaultdict(list)
    for x1, y1, x2, y2 in vert:
        v_by_x[x1].append((y1, y2))

    rects = []
    # Ищем пары горизонтальных линий с общими вертикальными
    y_vals = sorted(h_by_y.keys())
    for i in range(len(y_vals)):
        for j in range(i+1, len(y_vals)):
            y_top = y_vals[i]
            y_bottom = y_vals[j]
            # Ищем общий X-интервал
            for x1_a, x2_a in h_by_y[y_top]:
                for x1_b, x2_b in h_by_y[y_bottom]:
                    x_left = max(x1_a, x1_b)
                    x_right = min(x2_a, x2_b)
                    if x_right - x_left < min_size:
                        continue
                    # Проверяем, есть ли вертикальные линии на x_left и x_right
                    has_left = any(y_top <= y_bottom and not (y2 < y_top or y1 > y_bottom)
                                   for y1, y2 in v_by_x.get(x_left, []))
                    has_right = any(y_top <= y_bottom and not (y2 < y_top or y1 > y_bottom)
                                    for y1, y2 in v_by_x.get(x_right, []))
                    if has_left and has_right:
                        rects.append({
                            "x": x_left, "y": y_top,
                            "w": x_right - x_left, "h": y_bottom - y_top
                        })

    # Фильтруем вложенные прямоугольники (оставляем только внешние)
    filtered = []
    for r in rects:
        is_inner = False
        for other in rects:
            if r is other:
                continue
            if (r["x"] > other["x"] and r["y"] > other["y"] and
                r["x"] + r["w"] < other["x"] + other["w"] and
                r["y"] + r["h"] < other["y"] + other["h"]):
                is_inner = True
                break
        if not is_inner:
            filtered.append(r)

    return filtered


def classify_regions(rects: List[Dict], ocr_lines: List[Dict], img_w: int, img_h: int) -> List[Dict]:
    """Классифицирует регионы страницы."""
    regions = []

    # 1. Таблицы = большие прямоугольники с высокой плотностью линий
    for r in rects:
        area = r["w"] * r["h"]
        # Считаем OCR строки внутри
        texts_in = [t for t in ocr_lines
                    if r["x"] <= t["cx"] <= r["x"] + r["w"]
                    and r["y"] <= t["cy"] <= r["y"] + r["h"]]
        density = len(texts_in) / (area / 1000000)  # текстов на мегапиксель

        if density > 20:  # высокая плотность = таблица
            regions.append({
                "type": "table",
                "bbox": [r["x"], r["y"], r["x"]+r["w"], r["y"]+r["h"]],
                "density": density,
                "text_count": len(texts_in)
            })

    # 2. Определяем чертежи = области с линиями и текстом, но без плотной сетки
    # Для простоты: левая половина, не покрытая таблицами
    # Найдём ограничивающий bbox для всех "чертёжных" текстов
    drawing_texts = [t for t in ocr_lines if t["cy"] < img_h * 0.75 and t["cx"] < img_w * 0.6]
    if drawing_texts:
        xs = [t["cx"] for t in drawing_texts]
        ys = [t["cy"] for t in drawing_texts]
        # Расширяем на 200px
        dx = [t["cx"] - t["x1"] for t in drawing_texts if "x1" in t]
        max_w = max(dx) if dx else 100
        regions.append({
            "type": "drawing",
            "bbox": [max(0, min(xs)-max_w), max(0, min(ys)-100),
                     min(img_w, max(xs)+max_w), min(img_h, max(ys)+100)],
            "text_count": len(drawing_texts)
        })

    # 3. Штамп = низ страницы, мелкий текст
    title_texts = [t for t in ocr_lines if t["cy"] > img_h * 0.85]
    if title_texts:
        xs = [t["cx"] for t in title_texts]
        ys = [t["cy"] for t in title_texts]
        regions.append({
            "type": "title_block",
            "bbox": [min(xs)-50, min(ys)-50, max(xs)+50, max(ys)+50],
            "text_count": len(title_texts)
        })

    # 4. Примечания = текстовые блоки
    note_keywords = ["примечание", "общие указания", "границы", "размеры"]
    note_texts = [t for t in ocr_lines
                  if any(kw in t["text"].lower() for kw in note_keywords)]
    if note_texts:
        xs = [t["cx"] for t in note_texts]
        ys = [t["cy"] for t in note_texts]
        regions.append({
            "type": "notes",
            "bbox": [min(xs)-100, min(ys)-100, max(xs)+100, max(ys)+100],
            "text_count": len(note_texts)
        })

    return regions


def detect_layout(png_path: Path, ocr_path: Path) -> Dict:
    """Основная функция layout detection."""
    img = cv2.imread(str(png_path), cv2.IMREAD_GRAYSCALE)
    h, w = img.shape[:2]

    # Загрузить OCR
    ocr = json.loads(ocr_path.read_text(encoding="utf-8"))

    # Собрать все OCR lines с координатами
    all_texts = []
    for page in ocr.get("pages", []):
        for line in page.get("ocr_lines", []):
            bbox = line.get("bbox", [])
            if not bbox:
                continue
            if isinstance(bbox[0], list):
                xs = [p[0] for p in bbox]
                ys = [p[1] for p in bbox]
            else:
                xs = [bbox[0], bbox[2]]
                ys = [bbox[1], bbox[3]]
            all_texts.append({
                "text": line["text"],
                "cx": sum(xs)/len(xs),
                "cy": sum(ys)/len(ys),
                "x1": min(xs), "y1": min(ys),
                "x2": max(xs), "y2": max(ys),
                "bbox": bbox
            })

    # Найти линии
    lines = find_all_lines(img)
    print(f"[INFO] Найдено {len(lines)} линий")

    # Найти прямоугольники
    rects = find_rectangles(lines)
    print(f"[INFO] Найдено {len(rects)} прямоугольников")

    # Классифицировать
    regions = classify_regions(rects, all_texts, w, h)
    print(f"[INFO] Классифицировано {len(regions)} регионов")
    for r in regions:
        print(f"  {r['type']}: bbox={r['bbox']}, texts={r.get('text_count', 0)}")

    return {
        "image_size": [w, h],
        "regions": regions,
        "rectangles": rects,
        "line_count": len(lines)
    }


def visualize_layout(png_path: Path, layout: Dict, out_path: Path):
    """Рисует зоны на изображении."""
    img = Image.open(png_path)
    draw = ImageDraw.Draw(img)
    colors = {
        "table": "blue",
        "drawing": "green",
        "title_block": "purple",
        "notes": "orange"
    }

    for region in layout["regions"]:
        x1, y1, x2, y2 = region["bbox"]
        color = colors.get(region["type"], "red")
        draw.rectangle([x1, y1, x2, y2], outline=color, width=4)
        draw.text((x1+5, y1+5), region["type"], fill=color)

    img.save(out_path)
    print(f"[OK] Layout visualization: {out_path}")


def main():
    if len(sys.argv) < 3:
        print("Usage: python layout_detector.py <png> <ocr_json>")
        sys.exit(1)

    png = Path(sys.argv[1])
    ocr = Path(sys.argv[2])
    out_json = png.parent / "layout.json"
    out_png = png.parent / f"{png.stem}_layout.png"

    layout = detect_layout(png, ocr)

    with open(out_json, "w", encoding="utf-8") as f:
        json.dump(layout, f, ensure_ascii=False, indent=2)
    print(f"[OK] Layout JSON: {out_json}")

    visualize_layout(png, layout, out_png)


if __name__ == "__main__":
    from PIL import ImageDraw
    main()