opencode/vlm_describer.py

#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
VLM Describer — объективное извлечение структуры чертежа.

Отправляет PNG в qwen-vl-plus (DashScope API) с промптом на фактическое
описание содержимого. НЕ ищет ошибки, НЕ оценивает качество.

Результат: <output_folder>/vlm_extraction.json — структурированное описание
каждой страницы для использования в RAG и cross-verification.

Использование:
    python vlm_describer.py <output_folder> [--model MODEL]

Требует DASHSCOPE_API_KEY в .env или окружении.
"""

import os
import sys
import json
import base64
import io
import re
from pathlib import Path
from typing import List, Dict, Tuple
from PIL import Image
from openai import OpenAI

# ------------------------------------------------------------------
# Конфигурация
# ------------------------------------------------------------------
API_KEY = None
BASE_URL = "https://dashscope-intl.aliyuncs.com/compatible-mode/v1"
DEFAULT_MODEL = "qwen-vl-plus"


def _load_api_key():
    global API_KEY
    if API_KEY:
        return API_KEY
    env_candidates = [
        Path(__file__).parent / ".env",
        Path(__file__).parent.parent / ".env",
    ]
    for env_path in env_candidates:
        if env_path.exists():
            for line in env_path.read_text().splitlines():
                if line.startswith("DASHSCOPE_API_KEY="):
                    API_KEY = line.split("=", 1)[1].strip()
                    os.environ["DASHSCOPE_API_KEY"] = API_KEY
                    return API_KEY
    API_KEY = os.environ.get("DASHSCOPE_API_KEY")
    return API_KEY


EXTRACTION_PROMPT = (
    "Ты — система распознавания чертежей. Опиши объективно, что изображено на этой странице. "
    "НЕ ищи ошибки, НЕ оценивай качество. Просто перечисли факты.\n\n"
    "Ответь СТРОГО в формате JSON (без markdown):\n"
    "{\n"
    '  "page_type": "plan / section / elevation / specification / detail / general_view / table / unknown",\n'
    '  "title": "заголовок или null",\n'
    '  "beams": ["Балка Б-1"],\n'
    '  "positions": ["П-1"],\n'
    '  "gosts": ["ГОСТ ..."],\n'
    '  "description": "2-3 предложения о содержимом"\n'
    "}\n\n"
    "ПРАВИЛА:\n"
    "- Только реальные элементы с чертежа, не придумывай\n"
    "- Пустой массив [] если нет элементов данного типа\n"
    "- НЕ включай массы из таблиц в размеры\n"
    "- Описание — только факты, без оценок"
)


def resize_image(image_path: Path, max_size: int = 2048) -> Tuple[str, float, Tuple[int, int]]:
    img = Image.open(image_path)
    orig_w, orig_h = img.size
    
    if max(orig_w, orig_h) <= max_size:
        with open(image_path, "rb") as f:
            b64 = base64.b64encode(f.read()).decode("utf-8")
        return b64, 1.0, (orig_w, orig_h)
    
    scale = max_size / max(orig_w, orig_h)
    new_w = int(orig_w * scale)
    new_h = int(orig_h * scale)
    img_resized = img.resize((new_w, new_h), Image.LANCZOS)
    
    buf = io.BytesIO()
    img_resized.save(buf, format="PNG")
    b64 = base64.b64encode(buf.getvalue()).decode("utf-8")
    
    return b64, scale, (orig_w, orig_h)


def parse_json_response(text: str) -> Dict:
    """Парсит JSON из ответа VLM."""
    text = text.strip()
    if text.startswith("```"):
        text = re.sub(r"^```[a-zA-Z]*\n?", "", text)
        text = re.sub(r"\n?```$", "", text)
        text = text.strip()
    
    json_match = re.search(r'\{[\s\S]*\}', text)
    if json_match:
        text = json_match.group(0)
    
    try:
        return json.loads(text)
    except json.JSONDecodeError as e:
        print(f"[WARN] Не удалось распарсить JSON: {e}")
        print(f"[WARN] Raw preview: {text[:500]}")
        return {
            "page_type": "unknown",
            "title": None,
            "elements": [],
            "beams": [],
            "positions": [],
            "dimensions": [],
            "gosts": [],
            "tables": [],
            "description": text[:500] if text else "",
            "parse_error": str(e)
        }


def describe_page(image_path: Path, model: str) -> Dict:
    """Отправляет PNG в qwen-vl API, получает структурированное описание."""
    api_key = _load_api_key()
    if not api_key:
        raise RuntimeError("DASHSCOPE_API_KEY not found in .env or environment")
    
    client = OpenAI(api_key=api_key, base_url=BASE_URL)
    
    b64, scale, (orig_w, orig_h) = resize_image(image_path, max_size=2048)
    data_url = f"data:image/png;base64,{b64}"
    
    response = client.chat.completions.create(
        model=model,
        messages=[
            {
                "role": "user",
                "content": [
                    {"type": "text", "text": EXTRACTION_PROMPT},
                    {"type": "image_url", "image_url": {"url": data_url}},
                ],
            }
        ],
        temperature=0.1,  # низкая температура — меньше галлюцинаций
        max_tokens=8192,
    )
    raw = response.choices[0].message.content.strip()
    
    # Сохранить raw для отладки
    debug_path = image_path.parent / f"{image_path.stem}_vlm_raw.txt"
    debug_path.write_text(raw, encoding="utf-8")
    
    result = parse_json_response(raw)
    
    result["_meta"] = {
        "image": image_path.name,
        "original_size": [orig_w, orig_h],
        "scale": scale,
    }
    
    return result


def run_vlm_describer(folder: Path, model: str = DEFAULT_MODEL):
    """Запускает VLM Describer для всех PNG в папке."""
    png_files = sorted(folder.glob("page_*.png"))
    if not png_files:
        print(f"[ERR] В папке {folder} не найдены page_*.png")
        sys.exit(1)

    out_path = folder / "vlm_extraction.json"
    extractions = {}

    print(f"[INFO] VLM Describer: {len(png_files)} страниц")
    print(f"[INFO] API: DashScope ({BASE_URL})")
    print(f"[INFO] Модель: {model}\n")

    for i, png in enumerate(png_files, 1):
        print(f"[{i}/{len(png_files)}] {png.name} ...", end=" ", flush=True)
        try:
            data = describe_page(png, model)
            extractions[png.name] = data
            elem_count = len(data.get("beams", [])) + len(data.get("positions", [])) + len(data.get("gosts", []))
            print(f"OK ({elem_count} элементов)")
        except Exception as e:
            print(f"ERR: {e}")
            extractions[png.name] = {
                "page_type": "unknown",
                "error": str(e),
                "elements": [],
                "beams": [],
                "positions": [],
                "dimensions": [],
                "gosts": [],
                "tables": [],
                "description": ""
            }

    with open(out_path, "w", encoding="utf-8") as f:
        json.dump(extractions, f, ensure_ascii=False, indent=2)

    total_elems = sum(
        len(v.get("beams", [])) + len(v.get("positions", [])) + len(v.get("gosts", []))
        for v in extractions.values()
    )
    print(f"\n[OK] VLM extraction сохранён: {out_path}")
    print(f"     Страниц: {len(png_files)}, Всего элементов: {total_elems}")


def main():
    import argparse
    parser = argparse.ArgumentParser(description="VLM Describer для чертежей")
    parser.add_argument("folder", help="Папка с page_*.png")
    parser.add_argument("--model", default=DEFAULT_MODEL, help="Имя модели (default: qwen-vl-plus)")
    args = parser.parse_args()

    folder = Path(args.folder)
    run_vlm_describer(folder, args.model)


if __name__ == "__main__":
    main()
-												Add RAG pipeline: LightRAG indexer, OpenCode API, VLM describer, and test tools

- Add rag_indexer.py: build LightRAG index from OCR with OpenCode API
- Add rag_query.py: query the knowledge graph
- Add vlm_describer.py: generate VLM descriptions via LM Studio
- Add test_model.py: quick check for LightRAG-compatible models
- Add run_pipeline.sh and run_pipeline.bat: full OCR → VLM → RAG pipeline
- Fix rapidocr import (rapidocr_onnxruntime)
- Fix process_any_pdf.py paths for cross-platform use
- Add .env.example, README_RAG.md, AGENTS.md
- Update .gitignore for outputs and secrets

											
										
										
											2026-05-29 06:54:37 +00:00
+								#!/usr/bin/env python3
 								# -*- coding: utf-8 -*-
 								"""
-												Add VLM tools: Describer, QC checker, and GOST validator

- vlm_describer.py: objective extraction (beams, positions, GOSTs, dimensions) via qwen-vl-plus API. No error detection — only factual observation.
- vlm_qc_checker.py: VLM-based QC (deprecated in favor of rules-only QC)
- gost_dimension_validator.py: validate GOST references and dimension chains against known standards

											
										
										
											2026-06-01 09:29:58 +00:00
+								VLM Describer — объективное извлечение структуры чертежа.
-												Add RAG pipeline: LightRAG indexer, OpenCode API, VLM describer, and test tools

- Add rag_indexer.py: build LightRAG index from OCR with OpenCode API
- Add rag_query.py: query the knowledge graph
- Add vlm_describer.py: generate VLM descriptions via LM Studio
- Add test_model.py: quick check for LightRAG-compatible models
- Add run_pipeline.sh and run_pipeline.bat: full OCR → VLM → RAG pipeline
- Fix rapidocr import (rapidocr_onnxruntime)
- Fix process_any_pdf.py paths for cross-platform use
- Add .env.example, README_RAG.md, AGENTS.md
- Update .gitignore for outputs and secrets

											
										
										
											2026-05-29 06:54:37 +00:00
-												Add VLM tools: Describer, QC checker, and GOST validator

- vlm_describer.py: objective extraction (beams, positions, GOSTs, dimensions) via qwen-vl-plus API. No error detection — only factual observation.
- vlm_qc_checker.py: VLM-based QC (deprecated in favor of rules-only QC)
- gost_dimension_validator.py: validate GOST references and dimension chains against known standards

											
										
										
											2026-06-01 09:29:58 +00:00
+								Отправляет PNG в qwen-vl-plus (DashScope API) с промптом на фактическое
 								описание содержимого. НЕ ищет ошибки, НЕ оценивает качество.
 								Результат: <output_folder>/vlm_extraction.json — структурированное описание
 								каждой страницы для использования в RAG и cross-verification.
-												Add RAG pipeline: LightRAG indexer, OpenCode API, VLM describer, and test tools

- Add rag_indexer.py: build LightRAG index from OCR with OpenCode API
- Add rag_query.py: query the knowledge graph
- Add vlm_describer.py: generate VLM descriptions via LM Studio
- Add test_model.py: quick check for LightRAG-compatible models
- Add run_pipeline.sh and run_pipeline.bat: full OCR → VLM → RAG pipeline
- Fix rapidocr import (rapidocr_onnxruntime)
- Fix process_any_pdf.py paths for cross-platform use
- Add .env.example, README_RAG.md, AGENTS.md
- Update .gitignore for outputs and secrets

											
										
										
											2026-05-29 06:54:37 +00:00
 								Использование:
-												Add VLM tools: Describer, QC checker, and GOST validator

- vlm_describer.py: objective extraction (beams, positions, GOSTs, dimensions) via qwen-vl-plus API. No error detection — only factual observation.
- vlm_qc_checker.py: VLM-based QC (deprecated in favor of rules-only QC)
- gost_dimension_validator.py: validate GOST references and dimension chains against known standards

											
										
										
											2026-06-01 09:29:58 +00:00
+								    python vlm_describer.py <output_folder> [--model MODEL]
-												Add RAG pipeline: LightRAG indexer, OpenCode API, VLM describer, and test tools

- Add rag_indexer.py: build LightRAG index from OCR with OpenCode API
- Add rag_query.py: query the knowledge graph
- Add vlm_describer.py: generate VLM descriptions via LM Studio
- Add test_model.py: quick check for LightRAG-compatible models
- Add run_pipeline.sh and run_pipeline.bat: full OCR → VLM → RAG pipeline
- Fix rapidocr import (rapidocr_onnxruntime)
- Fix process_any_pdf.py paths for cross-platform use
- Add .env.example, README_RAG.md, AGENTS.md
- Update .gitignore for outputs and secrets

											
										
										
											2026-05-29 06:54:37 +00:00
-												Add VLM tools: Describer, QC checker, and GOST validator

- vlm_describer.py: objective extraction (beams, positions, GOSTs, dimensions) via qwen-vl-plus API. No error detection — only factual observation.
- vlm_qc_checker.py: VLM-based QC (deprecated in favor of rules-only QC)
- gost_dimension_validator.py: validate GOST references and dimension chains against known standards

											
										
										
											2026-06-01 09:29:58 +00:00
+								Требует DASHSCOPE_API_KEY в .env или окружении.
-												Add RAG pipeline: LightRAG indexer, OpenCode API, VLM describer, and test tools

- Add rag_indexer.py: build LightRAG index from OCR with OpenCode API
- Add rag_query.py: query the knowledge graph
- Add vlm_describer.py: generate VLM descriptions via LM Studio
- Add test_model.py: quick check for LightRAG-compatible models
- Add run_pipeline.sh and run_pipeline.bat: full OCR → VLM → RAG pipeline
- Fix rapidocr import (rapidocr_onnxruntime)
- Fix process_any_pdf.py paths for cross-platform use
- Add .env.example, README_RAG.md, AGENTS.md
- Update .gitignore for outputs and secrets

											
										
										
											2026-05-29 06:54:37 +00:00
+								"""
 								import os
 								import sys
 								import json
 								import base64
-												Add VLM tools: Describer, QC checker, and GOST validator

- vlm_describer.py: objective extraction (beams, positions, GOSTs, dimensions) via qwen-vl-plus API. No error detection — only factual observation.
- vlm_qc_checker.py: VLM-based QC (deprecated in favor of rules-only QC)
- gost_dimension_validator.py: validate GOST references and dimension chains against known standards

											
										
										
											2026-06-01 09:29:58 +00:00
+								import io
 								import re
-												Add RAG pipeline: LightRAG indexer, OpenCode API, VLM describer, and test tools

- Add rag_indexer.py: build LightRAG index from OCR with OpenCode API
- Add rag_query.py: query the knowledge graph
- Add vlm_describer.py: generate VLM descriptions via LM Studio
- Add test_model.py: quick check for LightRAG-compatible models
- Add run_pipeline.sh and run_pipeline.bat: full OCR → VLM → RAG pipeline
- Fix rapidocr import (rapidocr_onnxruntime)
- Fix process_any_pdf.py paths for cross-platform use
- Add .env.example, README_RAG.md, AGENTS.md
- Update .gitignore for outputs and secrets

											
										
										
											2026-05-29 06:54:37 +00:00
+								from pathlib import Path
-												Add VLM tools: Describer, QC checker, and GOST validator

- vlm_describer.py: objective extraction (beams, positions, GOSTs, dimensions) via qwen-vl-plus API. No error detection — only factual observation.
- vlm_qc_checker.py: VLM-based QC (deprecated in favor of rules-only QC)
- gost_dimension_validator.py: validate GOST references and dimension chains against known standards

											
										
										
											2026-06-01 09:29:58 +00:00
+								from typing import List, Dict, Tuple
 								from PIL import Image
-												Add RAG pipeline: LightRAG indexer, OpenCode API, VLM describer, and test tools

- Add rag_indexer.py: build LightRAG index from OCR with OpenCode API
- Add rag_query.py: query the knowledge graph
- Add vlm_describer.py: generate VLM descriptions via LM Studio
- Add test_model.py: quick check for LightRAG-compatible models
- Add run_pipeline.sh and run_pipeline.bat: full OCR → VLM → RAG pipeline
- Fix rapidocr import (rapidocr_onnxruntime)
- Fix process_any_pdf.py paths for cross-platform use
- Add .env.example, README_RAG.md, AGENTS.md
- Update .gitignore for outputs and secrets

											
										
										
											2026-05-29 06:54:37 +00:00
+								from openai import OpenAI
 								# ------------------------------------------------------------------
-												Add VLM tools: Describer, QC checker, and GOST validator

- vlm_describer.py: objective extraction (beams, positions, GOSTs, dimensions) via qwen-vl-plus API. No error detection — only factual observation.
- vlm_qc_checker.py: VLM-based QC (deprecated in favor of rules-only QC)
- gost_dimension_validator.py: validate GOST references and dimension chains against known standards

											
										
										
											2026-06-01 09:29:58 +00:00
+								# Конфигурация
-												Add RAG pipeline: LightRAG indexer, OpenCode API, VLM describer, and test tools

- Add rag_indexer.py: build LightRAG index from OCR with OpenCode API
- Add rag_query.py: query the knowledge graph
- Add vlm_describer.py: generate VLM descriptions via LM Studio
- Add test_model.py: quick check for LightRAG-compatible models
- Add run_pipeline.sh and run_pipeline.bat: full OCR → VLM → RAG pipeline
- Fix rapidocr import (rapidocr_onnxruntime)
- Fix process_any_pdf.py paths for cross-platform use
- Add .env.example, README_RAG.md, AGENTS.md
- Update .gitignore for outputs and secrets

											
										
										
											2026-05-29 06:54:37 +00:00
+								# ------------------------------------------------------------------
-												Add VLM tools: Describer, QC checker, and GOST validator

- vlm_describer.py: objective extraction (beams, positions, GOSTs, dimensions) via qwen-vl-plus API. No error detection — only factual observation.
- vlm_qc_checker.py: VLM-based QC (deprecated in favor of rules-only QC)
- gost_dimension_validator.py: validate GOST references and dimension chains against known standards

											
										
										
											2026-06-01 09:29:58 +00:00
+								API_KEY = None
 								BASE_URL = "https://dashscope-intl.aliyuncs.com/compatible-mode/v1"
 								DEFAULT_MODEL = "qwen-vl-plus"
 								def _load_api_key():
 								    global API_KEY
 								    if API_KEY:
 								        return API_KEY
 								    env_candidates = [
 								        Path(__file__).parent / ".env",
 								        Path(__file__).parent.parent / ".env",
 								    ]
 								    for env_path in env_candidates:
 								        if env_path.exists():
 								            for line in env_path.read_text().splitlines():
 								                if line.startswith("DASHSCOPE_API_KEY="):
 								                    API_KEY = line.split("=", 1)[1].strip()
 								                    os.environ["DASHSCOPE_API_KEY"] = API_KEY
 								                    return API_KEY
 								    API_KEY = os.environ.get("DASHSCOPE_API_KEY")
 								    return API_KEY
 								EXTRACTION_PROMPT = (
 								    "Ты — система распознавания чертежей. Опиши объективно, что изображено на этой странице. "
 								    "НЕ ищи ошибки, НЕ оценивай качество. Просто перечисли факты.\n\n"
 								    "Ответь СТРОГО в формате JSON (без markdown):\n"
 								    "{\n"
 								    '  "page_type": "plan / section / elevation / specification / detail / general_view / table / unknown",\n'
 								    '  "title": "заголовок или null",\n'
 								    '  "beams": ["Балка Б-1"],\n'
 								    '  "positions": ["П-1"],\n'
 								    '  "gosts": ["ГОСТ ..."],\n'
 								    '  "description": "2-3 предложения о содержимом"\n'
 								    "}\n\n"
 								    "ПРАВИЛА:\n"
 								    "- Только реальные элементы с чертежа, не придумывай\n"
 								    "- Пустой массив [] если нет элементов данного типа\n"
 								    "- НЕ включай массы из таблиц в размеры\n"
 								    "- Описание — только факты, без оценок"
-												Add RAG pipeline: LightRAG indexer, OpenCode API, VLM describer, and test tools

- Add rag_indexer.py: build LightRAG index from OCR with OpenCode API
- Add rag_query.py: query the knowledge graph
- Add vlm_describer.py: generate VLM descriptions via LM Studio
- Add test_model.py: quick check for LightRAG-compatible models
- Add run_pipeline.sh and run_pipeline.bat: full OCR → VLM → RAG pipeline
- Fix rapidocr import (rapidocr_onnxruntime)
- Fix process_any_pdf.py paths for cross-platform use
- Add .env.example, README_RAG.md, AGENTS.md
- Update .gitignore for outputs and secrets

											
										
										
											2026-05-29 06:54:37 +00:00
+								)
-												Add VLM tools: Describer, QC checker, and GOST validator

- vlm_describer.py: objective extraction (beams, positions, GOSTs, dimensions) via qwen-vl-plus API. No error detection — only factual observation.
- vlm_qc_checker.py: VLM-based QC (deprecated in favor of rules-only QC)
- gost_dimension_validator.py: validate GOST references and dimension chains against known standards

											
										
										
											2026-06-01 09:29:58 +00:00
+								def resize_image(image_path: Path, max_size: int = 2048) -> Tuple[str, float, Tuple[int, int]]:
 								    img = Image.open(image_path)
 								    orig_w, orig_h = img.size
 								    if max(orig_w, orig_h) <= max_size:
 								        with open(image_path, "rb") as f:
 								            b64 = base64.b64encode(f.read()).decode("utf-8")
 								        return b64, 1.0, (orig_w, orig_h)
 								    scale = max_size / max(orig_w, orig_h)
 								    new_w = int(orig_w * scale)
 								    new_h = int(orig_h * scale)
 								    img_resized = img.resize((new_w, new_h), Image.LANCZOS)
 								    buf = io.BytesIO()
 								    img_resized.save(buf, format="PNG")
 								    b64 = base64.b64encode(buf.getvalue()).decode("utf-8")
 								    return b64, scale, (orig_w, orig_h)
 								def parse_json_response(text: str) -> Dict:
 								    """Парсит JSON из ответа VLM."""
 								    text = text.strip()
 								    if text.startswith("```"):
 								        text = re.sub(r"^```[a-zA-Z]*\n?", "", text)
 								        text = re.sub(r"\n?```$", "", text)
 								        text = text.strip()
 								    json_match = re.search(r'\{[\s\S]*\}', text)
 								    if json_match:
 								        text = json_match.group(0)
 								    try:
 								        return json.loads(text)
 								    except json.JSONDecodeError as e:
 								        print(f"[WARN] Не удалось распарсить JSON: {e}")
 								        print(f"[WARN] Raw preview: {text[:500]}")
 								        return {
 								            "page_type": "unknown",
 								            "title": None,
 								            "elements": [],
 								            "beams": [],
 								            "positions": [],
 								            "dimensions": [],
 								            "gosts": [],
 								            "tables": [],
 								            "description": text[:500] if text else "",
 								            "parse_error": str(e)
 								        }
 								def describe_page(image_path: Path, model: str) -> Dict:
 								    """Отправляет PNG в qwen-vl API, получает структурированное описание."""
 								    api_key = _load_api_key()
 								    if not api_key:
 								        raise RuntimeError("DASHSCOPE_API_KEY not found in .env or environment")
 								    client = OpenAI(api_key=api_key, base_url=BASE_URL)
 								    b64, scale, (orig_w, orig_h) = resize_image(image_path, max_size=2048)
-												Add RAG pipeline: LightRAG indexer, OpenCode API, VLM describer, and test tools

- Add rag_indexer.py: build LightRAG index from OCR with OpenCode API
- Add rag_query.py: query the knowledge graph
- Add vlm_describer.py: generate VLM descriptions via LM Studio
- Add test_model.py: quick check for LightRAG-compatible models
- Add run_pipeline.sh and run_pipeline.bat: full OCR → VLM → RAG pipeline
- Fix rapidocr import (rapidocr_onnxruntime)
- Fix process_any_pdf.py paths for cross-platform use
- Add .env.example, README_RAG.md, AGENTS.md
- Update .gitignore for outputs and secrets

											
										
										
											2026-05-29 06:54:37 +00:00
+								    data_url = f"data:image/png;base64,{b64}"
-												Add VLM tools: Describer, QC checker, and GOST validator

- vlm_describer.py: objective extraction (beams, positions, GOSTs, dimensions) via qwen-vl-plus API. No error detection — only factual observation.
- vlm_qc_checker.py: VLM-based QC (deprecated in favor of rules-only QC)
- gost_dimension_validator.py: validate GOST references and dimension chains against known standards

											
										
										
											2026-06-01 09:29:58 +00:00
-												Add RAG pipeline: LightRAG indexer, OpenCode API, VLM describer, and test tools

- Add rag_indexer.py: build LightRAG index from OCR with OpenCode API
- Add rag_query.py: query the knowledge graph
- Add vlm_describer.py: generate VLM descriptions via LM Studio
- Add test_model.py: quick check for LightRAG-compatible models
- Add run_pipeline.sh and run_pipeline.bat: full OCR → VLM → RAG pipeline
- Fix rapidocr import (rapidocr_onnxruntime)
- Fix process_any_pdf.py paths for cross-platform use
- Add .env.example, README_RAG.md, AGENTS.md
- Update .gitignore for outputs and secrets

											
										
										
											2026-05-29 06:54:37 +00:00
+								    response = client.chat.completions.create(
 								        model=model,
 								        messages=[
 								            {
 								                "role": "user",
 								                "content": [
-												Add VLM tools: Describer, QC checker, and GOST validator

- vlm_describer.py: objective extraction (beams, positions, GOSTs, dimensions) via qwen-vl-plus API. No error detection — only factual observation.
- vlm_qc_checker.py: VLM-based QC (deprecated in favor of rules-only QC)
- gost_dimension_validator.py: validate GOST references and dimension chains against known standards

											
										
										
											2026-06-01 09:29:58 +00:00
+								                    {"type": "text", "text": EXTRACTION_PROMPT},
-												Add RAG pipeline: LightRAG indexer, OpenCode API, VLM describer, and test tools

- Add rag_indexer.py: build LightRAG index from OCR with OpenCode API
- Add rag_query.py: query the knowledge graph
- Add vlm_describer.py: generate VLM descriptions via LM Studio
- Add test_model.py: quick check for LightRAG-compatible models
- Add run_pipeline.sh and run_pipeline.bat: full OCR → VLM → RAG pipeline
- Fix rapidocr import (rapidocr_onnxruntime)
- Fix process_any_pdf.py paths for cross-platform use
- Add .env.example, README_RAG.md, AGENTS.md
- Update .gitignore for outputs and secrets

											
										
										
											2026-05-29 06:54:37 +00:00
+								                    {"type": "image_url", "image_url": {"url": data_url}},
 								                ],
 								            }
 								        ],
-												Add VLM tools: Describer, QC checker, and GOST validator

- vlm_describer.py: objective extraction (beams, positions, GOSTs, dimensions) via qwen-vl-plus API. No error detection — only factual observation.
- vlm_qc_checker.py: VLM-based QC (deprecated in favor of rules-only QC)
- gost_dimension_validator.py: validate GOST references and dimension chains against known standards

											
										
										
											2026-06-01 09:29:58 +00:00
+								        temperature=0.1,  # низкая температура — меньше галлюцинаций
 								        max_tokens=8192,
-												Add RAG pipeline: LightRAG indexer, OpenCode API, VLM describer, and test tools

- Add rag_indexer.py: build LightRAG index from OCR with OpenCode API
- Add rag_query.py: query the knowledge graph
- Add vlm_describer.py: generate VLM descriptions via LM Studio
- Add test_model.py: quick check for LightRAG-compatible models
- Add run_pipeline.sh and run_pipeline.bat: full OCR → VLM → RAG pipeline
- Fix rapidocr import (rapidocr_onnxruntime)
- Fix process_any_pdf.py paths for cross-platform use
- Add .env.example, README_RAG.md, AGENTS.md
- Update .gitignore for outputs and secrets

											
										
										
											2026-05-29 06:54:37 +00:00
+								    )
-												Add VLM tools: Describer, QC checker, and GOST validator

- vlm_describer.py: objective extraction (beams, positions, GOSTs, dimensions) via qwen-vl-plus API. No error detection — only factual observation.
- vlm_qc_checker.py: VLM-based QC (deprecated in favor of rules-only QC)
- gost_dimension_validator.py: validate GOST references and dimension chains against known standards

											
										
										
											2026-06-01 09:29:58 +00:00
+								    raw = response.choices[0].message.content.strip()
 								    # Сохранить raw для отладки
 								    debug_path = image_path.parent / f"{image_path.stem}_vlm_raw.txt"
 								    debug_path.write_text(raw, encoding="utf-8")
 								    result = parse_json_response(raw)
 								    result["_meta"] = {
 								        "image": image_path.name,
 								        "original_size": [orig_w, orig_h],
 								        "scale": scale,
 								    }
 								    return result
 								def run_vlm_describer(folder: Path, model: str = DEFAULT_MODEL):
 								    """Запускает VLM Describer для всех PNG в папке."""
-												Add RAG pipeline: LightRAG indexer, OpenCode API, VLM describer, and test tools

- Add rag_indexer.py: build LightRAG index from OCR with OpenCode API
- Add rag_query.py: query the knowledge graph
- Add vlm_describer.py: generate VLM descriptions via LM Studio
- Add test_model.py: quick check for LightRAG-compatible models
- Add run_pipeline.sh and run_pipeline.bat: full OCR → VLM → RAG pipeline
- Fix rapidocr import (rapidocr_onnxruntime)
- Fix process_any_pdf.py paths for cross-platform use
- Add .env.example, README_RAG.md, AGENTS.md
- Update .gitignore for outputs and secrets

											
										
										
											2026-05-29 06:54:37 +00:00
+								    png_files = sorted(folder.glob("page_*.png"))
 								    if not png_files:
 								        print(f"[ERR] В папке {folder} не найдены page_*.png")
 								        sys.exit(1)
-												Add VLM tools: Describer, QC checker, and GOST validator

- vlm_describer.py: objective extraction (beams, positions, GOSTs, dimensions) via qwen-vl-plus API. No error detection — only factual observation.
- vlm_qc_checker.py: VLM-based QC (deprecated in favor of rules-only QC)
- gost_dimension_validator.py: validate GOST references and dimension chains against known standards

											
										
										
											2026-06-01 09:29:58 +00:00
+								    out_path = folder / "vlm_extraction.json"
 								    extractions = {}
-												Add RAG pipeline: LightRAG indexer, OpenCode API, VLM describer, and test tools

- Add rag_indexer.py: build LightRAG index from OCR with OpenCode API
- Add rag_query.py: query the knowledge graph
- Add vlm_describer.py: generate VLM descriptions via LM Studio
- Add test_model.py: quick check for LightRAG-compatible models
- Add run_pipeline.sh and run_pipeline.bat: full OCR → VLM → RAG pipeline
- Fix rapidocr import (rapidocr_onnxruntime)
- Fix process_any_pdf.py paths for cross-platform use
- Add .env.example, README_RAG.md, AGENTS.md
- Update .gitignore for outputs and secrets

											
										
										
											2026-05-29 06:54:37 +00:00
-												Add VLM tools: Describer, QC checker, and GOST validator

- vlm_describer.py: objective extraction (beams, positions, GOSTs, dimensions) via qwen-vl-plus API. No error detection — only factual observation.
- vlm_qc_checker.py: VLM-based QC (deprecated in favor of rules-only QC)
- gost_dimension_validator.py: validate GOST references and dimension chains against known standards

											
										
										
											2026-06-01 09:29:58 +00:00
+								    print(f"[INFO] VLM Describer: {len(png_files)} страниц")
 								    print(f"[INFO] API: DashScope ({BASE_URL})")
-												Add RAG pipeline: LightRAG indexer, OpenCode API, VLM describer, and test tools

- Add rag_indexer.py: build LightRAG index from OCR with OpenCode API
- Add rag_query.py: query the knowledge graph
- Add vlm_describer.py: generate VLM descriptions via LM Studio
- Add test_model.py: quick check for LightRAG-compatible models
- Add run_pipeline.sh and run_pipeline.bat: full OCR → VLM → RAG pipeline
- Fix rapidocr import (rapidocr_onnxruntime)
- Fix process_any_pdf.py paths for cross-platform use
- Add .env.example, README_RAG.md, AGENTS.md
- Update .gitignore for outputs and secrets

											
										
										
											2026-05-29 06:54:37 +00:00
+								    print(f"[INFO] Модель: {model}\n")
 								    for i, png in enumerate(png_files, 1):
 								        print(f"[{i}/{len(png_files)}] {png.name} ...", end=" ", flush=True)
 								        try:
-												Add VLM tools: Describer, QC checker, and GOST validator

- vlm_describer.py: objective extraction (beams, positions, GOSTs, dimensions) via qwen-vl-plus API. No error detection — only factual observation.
- vlm_qc_checker.py: VLM-based QC (deprecated in favor of rules-only QC)
- gost_dimension_validator.py: validate GOST references and dimension chains against known standards

											
										
										
											2026-06-01 09:29:58 +00:00
+								            data = describe_page(png, model)
 								            extractions[png.name] = data
 								            elem_count = len(data.get("beams", [])) + len(data.get("positions", [])) + len(data.get("gosts", []))
 								            print(f"OK ({elem_count} элементов)")
-												Add RAG pipeline: LightRAG indexer, OpenCode API, VLM describer, and test tools

- Add rag_indexer.py: build LightRAG index from OCR with OpenCode API
- Add rag_query.py: query the knowledge graph
- Add vlm_describer.py: generate VLM descriptions via LM Studio
- Add test_model.py: quick check for LightRAG-compatible models
- Add run_pipeline.sh and run_pipeline.bat: full OCR → VLM → RAG pipeline
- Fix rapidocr import (rapidocr_onnxruntime)
- Fix process_any_pdf.py paths for cross-platform use
- Add .env.example, README_RAG.md, AGENTS.md
- Update .gitignore for outputs and secrets

											
										
										
											2026-05-29 06:54:37 +00:00
+								        except Exception as e:
 								            print(f"ERR: {e}")
-												Add VLM tools: Describer, QC checker, and GOST validator

- vlm_describer.py: objective extraction (beams, positions, GOSTs, dimensions) via qwen-vl-plus API. No error detection — only factual observation.
- vlm_qc_checker.py: VLM-based QC (deprecated in favor of rules-only QC)
- gost_dimension_validator.py: validate GOST references and dimension chains against known standards

											
										
										
											2026-06-01 09:29:58 +00:00
+								            extractions[png.name] = {
 								                "page_type": "unknown",
 								                "error": str(e),
 								                "elements": [],
 								                "beams": [],
 								                "positions": [],
 								                "dimensions": [],
 								                "gosts": [],
 								                "tables": [],
 								                "description": ""
 								            }
-												Add RAG pipeline: LightRAG indexer, OpenCode API, VLM describer, and test tools

- Add rag_indexer.py: build LightRAG index from OCR with OpenCode API
- Add rag_query.py: query the knowledge graph
- Add vlm_describer.py: generate VLM descriptions via LM Studio
- Add test_model.py: quick check for LightRAG-compatible models
- Add run_pipeline.sh and run_pipeline.bat: full OCR → VLM → RAG pipeline
- Fix rapidocr import (rapidocr_onnxruntime)
- Fix process_any_pdf.py paths for cross-platform use
- Add .env.example, README_RAG.md, AGENTS.md
- Update .gitignore for outputs and secrets

											
										
										
											2026-05-29 06:54:37 +00:00
 								    with open(out_path, "w", encoding="utf-8") as f:
-												Add VLM tools: Describer, QC checker, and GOST validator

- vlm_describer.py: objective extraction (beams, positions, GOSTs, dimensions) via qwen-vl-plus API. No error detection — only factual observation.
- vlm_qc_checker.py: VLM-based QC (deprecated in favor of rules-only QC)
- gost_dimension_validator.py: validate GOST references and dimension chains against known standards

											
										
										
											2026-06-01 09:29:58 +00:00
+								        json.dump(extractions, f, ensure_ascii=False, indent=2)
-												Add RAG pipeline: LightRAG indexer, OpenCode API, VLM describer, and test tools

- Add rag_indexer.py: build LightRAG index from OCR with OpenCode API
- Add rag_query.py: query the knowledge graph
- Add vlm_describer.py: generate VLM descriptions via LM Studio
- Add test_model.py: quick check for LightRAG-compatible models
- Add run_pipeline.sh and run_pipeline.bat: full OCR → VLM → RAG pipeline
- Fix rapidocr import (rapidocr_onnxruntime)
- Fix process_any_pdf.py paths for cross-platform use
- Add .env.example, README_RAG.md, AGENTS.md
- Update .gitignore for outputs and secrets

											
										
										
											2026-05-29 06:54:37 +00:00
-												Add VLM tools: Describer, QC checker, and GOST validator

- vlm_describer.py: objective extraction (beams, positions, GOSTs, dimensions) via qwen-vl-plus API. No error detection — only factual observation.
- vlm_qc_checker.py: VLM-based QC (deprecated in favor of rules-only QC)
- gost_dimension_validator.py: validate GOST references and dimension chains against known standards

											
										
										
											2026-06-01 09:29:58 +00:00
+								    total_elems = sum(
 								        len(v.get("beams", [])) + len(v.get("positions", [])) + len(v.get("gosts", []))
 								        for v in extractions.values()
 								    )
 								    print(f"\n[OK] VLM extraction сохранён: {out_path}")
 								    print(f"     Страниц: {len(png_files)}, Всего элементов: {total_elems}")
-												Add RAG pipeline: LightRAG indexer, OpenCode API, VLM describer, and test tools

- Add rag_indexer.py: build LightRAG index from OCR with OpenCode API
- Add rag_query.py: query the knowledge graph
- Add vlm_describer.py: generate VLM descriptions via LM Studio
- Add test_model.py: quick check for LightRAG-compatible models
- Add run_pipeline.sh and run_pipeline.bat: full OCR → VLM → RAG pipeline
- Fix rapidocr import (rapidocr_onnxruntime)
- Fix process_any_pdf.py paths for cross-platform use
- Add .env.example, README_RAG.md, AGENTS.md
- Update .gitignore for outputs and secrets

											
										
										
											2026-05-29 06:54:37 +00:00
 								def main():
-												Add VLM tools: Describer, QC checker, and GOST validator

- vlm_describer.py: objective extraction (beams, positions, GOSTs, dimensions) via qwen-vl-plus API. No error detection — only factual observation.
- vlm_qc_checker.py: VLM-based QC (deprecated in favor of rules-only QC)
- gost_dimension_validator.py: validate GOST references and dimension chains against known standards

											
										
										
											2026-06-01 09:29:58 +00:00
+								    import argparse
 								    parser = argparse.ArgumentParser(description="VLM Describer для чертежей")
-												Add RAG pipeline: LightRAG indexer, OpenCode API, VLM describer, and test tools

- Add rag_indexer.py: build LightRAG index from OCR with OpenCode API
- Add rag_query.py: query the knowledge graph
- Add vlm_describer.py: generate VLM descriptions via LM Studio
- Add test_model.py: quick check for LightRAG-compatible models
- Add run_pipeline.sh and run_pipeline.bat: full OCR → VLM → RAG pipeline
- Fix rapidocr import (rapidocr_onnxruntime)
- Fix process_any_pdf.py paths for cross-platform use
- Add .env.example, README_RAG.md, AGENTS.md
- Update .gitignore for outputs and secrets

											
										
										
											2026-05-29 06:54:37 +00:00
+								    parser.add_argument("folder", help="Папка с page_*.png")
-												Add VLM tools: Describer, QC checker, and GOST validator

- vlm_describer.py: objective extraction (beams, positions, GOSTs, dimensions) via qwen-vl-plus API. No error detection — only factual observation.
- vlm_qc_checker.py: VLM-based QC (deprecated in favor of rules-only QC)
- gost_dimension_validator.py: validate GOST references and dimension chains against known standards

											
										
										
											2026-06-01 09:29:58 +00:00
+								    parser.add_argument("--model", default=DEFAULT_MODEL, help="Имя модели (default: qwen-vl-plus)")
-												Add RAG pipeline: LightRAG indexer, OpenCode API, VLM describer, and test tools

- Add rag_indexer.py: build LightRAG index from OCR with OpenCode API
- Add rag_query.py: query the knowledge graph
- Add vlm_describer.py: generate VLM descriptions via LM Studio
- Add test_model.py: quick check for LightRAG-compatible models
- Add run_pipeline.sh and run_pipeline.bat: full OCR → VLM → RAG pipeline
- Fix rapidocr import (rapidocr_onnxruntime)
- Fix process_any_pdf.py paths for cross-platform use
- Add .env.example, README_RAG.md, AGENTS.md
- Update .gitignore for outputs and secrets

											
										
										
											2026-05-29 06:54:37 +00:00
+								    args = parser.parse_args()
 								    folder = Path(args.folder)
-												Add VLM tools: Describer, QC checker, and GOST validator

- vlm_describer.py: objective extraction (beams, positions, GOSTs, dimensions) via qwen-vl-plus API. No error detection — only factual observation.
- vlm_qc_checker.py: VLM-based QC (deprecated in favor of rules-only QC)
- gost_dimension_validator.py: validate GOST references and dimension chains against known standards

											
										
										
											2026-06-01 09:29:58 +00:00
+								    run_vlm_describer(folder, args.model)
-												Add RAG pipeline: LightRAG indexer, OpenCode API, VLM describer, and test tools

- Add rag_indexer.py: build LightRAG index from OCR with OpenCode API
- Add rag_query.py: query the knowledge graph
- Add vlm_describer.py: generate VLM descriptions via LM Studio
- Add test_model.py: quick check for LightRAG-compatible models
- Add run_pipeline.sh and run_pipeline.bat: full OCR → VLM → RAG pipeline
- Fix rapidocr import (rapidocr_onnxruntime)
- Fix process_any_pdf.py paths for cross-platform use
- Add .env.example, README_RAG.md, AGENTS.md
- Update .gitignore for outputs and secrets

											
										
										
											2026-05-29 06:54:37 +00:00
 								if __name__ == "__main__":
 								    main()