235 lines
8.9 KiB
Python
235 lines
8.9 KiB
Python
|
|
#!/usr/bin/env python3
|
|||
|
|
# -*- coding: utf-8 -*-
|
|||
|
|
"""
|
|||
|
|
VLM-based Quality Control checker for blueprints через Alibaba Cloud API.
|
|||
|
|
|
|||
|
|
Отправляет каждую страницу PNG в qwen-vl-plus (DashScope API)
|
|||
|
|
с промптом, просящим найти проблемы качества чертежа.
|
|||
|
|
|
|||
|
|
Результат: <output_folder>/vlm_qc_report.json — тот же формат,
|
|||
|
|
что и dimension_qc_report.json, для совместимости с viewer.
|
|||
|
|
|
|||
|
|
Использование:
|
|||
|
|
python vlm_qc_checker.py <output_folder> [--model MODEL]
|
|||
|
|
|
|||
|
|
Требует DASHSCOPE_API_KEY в .env или окружении.
|
|||
|
|
"""
|
|||
|
|
|
|||
|
|
import os
|
|||
|
|
import sys
|
|||
|
|
import json
|
|||
|
|
import base64
|
|||
|
|
import io
|
|||
|
|
import re
|
|||
|
|
from pathlib import Path
|
|||
|
|
from typing import List, Dict, Tuple
|
|||
|
|
from PIL import Image
|
|||
|
|
from openai import OpenAI
|
|||
|
|
|
|||
|
|
# ------------------------------------------------------------------
|
|||
|
|
# Конфигурация
|
|||
|
|
# ------------------------------------------------------------------
|
|||
|
|
API_KEY = None
|
|||
|
|
BASE_URL = "https://dashscope-intl.aliyuncs.com/compatible-mode/v1"
|
|||
|
|
DEFAULT_MODEL = "qwen-vl-plus" # vision model для анализа чертежей
|
|||
|
|
|
|||
|
|
|
|||
|
|
def _load_api_key():
|
|||
|
|
global API_KEY
|
|||
|
|
if API_KEY:
|
|||
|
|
return API_KEY
|
|||
|
|
env_candidates = [
|
|||
|
|
Path(__file__).parent / ".env",
|
|||
|
|
Path(__file__).parent.parent / ".env",
|
|||
|
|
]
|
|||
|
|
for env_path in env_candidates:
|
|||
|
|
if env_path.exists():
|
|||
|
|
for line in env_path.read_text().splitlines():
|
|||
|
|
if line.startswith("DASHSCOPE_API_KEY="):
|
|||
|
|
API_KEY = line.split("=", 1)[1].strip()
|
|||
|
|
os.environ["DASHSCOPE_API_KEY"] = API_KEY
|
|||
|
|
return API_KEY
|
|||
|
|
API_KEY = os.environ.get("DASHSCOPE_API_KEY")
|
|||
|
|
return API_KEY
|
|||
|
|
|
|||
|
|
|
|||
|
|
QC_PROMPT = (
|
|||
|
|
"Ты — опытный инженер-конструктор. Проанализируй этот чертёж и найди ошибки "
|
|||
|
|
"и проблемы в простановке размеров, расположении элементов и оформлении.\n\n"
|
|||
|
|
"Ищи такие проблемы:\n"
|
|||
|
|
"1. Пересечение размерных линий друг с другом\n"
|
|||
|
|
"2. Размеры, наложенные на текст или линии\n"
|
|||
|
|
"3. Неправильное расположение размеров (слишком близко к контуру, внутри объекта)\n"
|
|||
|
|
"4. Пропущенные размеры (есть линии, но нет чисел)\n"
|
|||
|
|
"5. Неправильные стрелки размеров\n"
|
|||
|
|
"6. Размеры вне зоны видимости (слишком далеко)\n"
|
|||
|
|
"7. Некорректные цепочки размеров (разрывы)\n"
|
|||
|
|
"8. Плохая читаемость размеров (маленький шрифт, плохой контраст)\n\n"
|
|||
|
|
"Ответь СТРОГО в формате JSON-массива (без markdown, без ```):\n"
|
|||
|
|
'[\n'
|
|||
|
|
' {\n'
|
|||
|
|
' "type": "DIMENSION_OVERLAP",\n'
|
|||
|
|
' "severity": "warning",\n'
|
|||
|
|
' "message": "Описание проблемы на русском",\n'
|
|||
|
|
' "bbox": [[x1,y1],[x2,y2],[x3,y3],[x4,y4]]\n'
|
|||
|
|
' }\n'
|
|||
|
|
']\n\n'
|
|||
|
|
"Если проблем нет — верни пустой массив [].\n"
|
|||
|
|
"severity: 'error' (критично), 'warning' (стоит исправить), 'info' (замечание).\n"
|
|||
|
|
"bbox — координаты проблемной зоны в пикселях (если можешь определить),"
|
|||
|
|
" иначе верни null."
|
|||
|
|
)
|
|||
|
|
|
|||
|
|
|
|||
|
|
def resize_image(image_path: Path, max_size: int = 2048) -> Tuple[str, float, Tuple[int, int]]:
|
|||
|
|
"""
|
|||
|
|
Уменьшает изображение до max_size по длинной стороне для экономии токенов.
|
|||
|
|
Возвращает (base64_string, scale_factor, (orig_w, orig_h)).
|
|||
|
|
"""
|
|||
|
|
img = Image.open(image_path)
|
|||
|
|
orig_w, orig_h = img.size
|
|||
|
|
|
|||
|
|
if max(orig_w, orig_h) <= max_size:
|
|||
|
|
with open(image_path, "rb") as f:
|
|||
|
|
b64 = base64.b64encode(f.read()).decode("utf-8")
|
|||
|
|
return b64, 1.0, (orig_w, orig_h)
|
|||
|
|
|
|||
|
|
scale = max_size / max(orig_w, orig_h)
|
|||
|
|
new_w = int(orig_w * scale)
|
|||
|
|
new_h = int(orig_h * scale)
|
|||
|
|
img_resized = img.resize((new_w, new_h), Image.LANCZOS)
|
|||
|
|
|
|||
|
|
buf = io.BytesIO()
|
|||
|
|
img_resized.save(buf, format="PNG")
|
|||
|
|
b64 = base64.b64encode(buf.getvalue()).decode("utf-8")
|
|||
|
|
|
|||
|
|
return b64, scale, (orig_w, orig_h)
|
|||
|
|
|
|||
|
|
|
|||
|
|
def parse_vlm_response(text: str) -> list:
|
|||
|
|
"""Парсит JSON из ответа VLM."""
|
|||
|
|
text = text.strip()
|
|||
|
|
if text.startswith("```"):
|
|||
|
|
text = re.sub(r"^```[a-zA-Z]*\n", "", text)
|
|||
|
|
text = re.sub(r"\n```$", "", text)
|
|||
|
|
text = text.strip()
|
|||
|
|
|
|||
|
|
json_match = re.search(r'\[[\s\S]*\]', text)
|
|||
|
|
if json_match:
|
|||
|
|
text = json_match.group(0)
|
|||
|
|
|
|||
|
|
try:
|
|||
|
|
data = json.loads(text)
|
|||
|
|
if isinstance(data, list):
|
|||
|
|
return data
|
|||
|
|
elif isinstance(data, dict) and "issues" in data:
|
|||
|
|
return data["issues"]
|
|||
|
|
else:
|
|||
|
|
return []
|
|||
|
|
except json.JSONDecodeError as e:
|
|||
|
|
print(f"[WARN] Не удалось распарсить JSON: {e}")
|
|||
|
|
print(f"[WARN] Raw text: {text[:500]}")
|
|||
|
|
return []
|
|||
|
|
|
|||
|
|
|
|||
|
|
def analyze_page(image_path: Path, model: str) -> list:
|
|||
|
|
"""Отправляет PNG в qwen-vl API, получает список issues."""
|
|||
|
|
api_key = _load_api_key()
|
|||
|
|
if not api_key:
|
|||
|
|
raise RuntimeError("DASHSCOPE_API_KEY not found in .env or environment")
|
|||
|
|
|
|||
|
|
client = OpenAI(api_key=api_key, base_url=BASE_URL)
|
|||
|
|
|
|||
|
|
b64, scale, (orig_w, orig_h) = resize_image(image_path, max_size=2048)
|
|||
|
|
data_url = f"data:image/png;base64,{b64}"
|
|||
|
|
|
|||
|
|
response = client.chat.completions.create(
|
|||
|
|
model=model,
|
|||
|
|
messages=[
|
|||
|
|
{
|
|||
|
|
"role": "user",
|
|||
|
|
"content": [
|
|||
|
|
{"type": "text", "text": QC_PROMPT},
|
|||
|
|
{"type": "image_url", "image_url": {"url": data_url}},
|
|||
|
|
],
|
|||
|
|
}
|
|||
|
|
],
|
|||
|
|
temperature=0.2,
|
|||
|
|
max_tokens=4096,
|
|||
|
|
)
|
|||
|
|
raw = response.choices[0].message.content.strip()
|
|||
|
|
|
|||
|
|
# Сохранить raw для отладки
|
|||
|
|
debug_path = image_path.parent / f"{image_path.stem}_vlm_raw.txt"
|
|||
|
|
debug_path.write_text(raw, encoding="utf-8")
|
|||
|
|
|
|||
|
|
issues = parse_vlm_response(raw)
|
|||
|
|
|
|||
|
|
# Масштабировать bbox обратно к оригиналу
|
|||
|
|
if scale != 1.0:
|
|||
|
|
for issue in issues:
|
|||
|
|
bbox = issue.get("bbox")
|
|||
|
|
if bbox and isinstance(bbox, list):
|
|||
|
|
for point in bbox:
|
|||
|
|
if isinstance(point, list) and len(point) == 2:
|
|||
|
|
point[0] = round(point[0] / scale)
|
|||
|
|
point[1] = round(point[1] / scale)
|
|||
|
|
|
|||
|
|
return issues
|
|||
|
|
|
|||
|
|
|
|||
|
|
def run_vlm_qc(folder: Path, model: str = DEFAULT_MODEL):
|
|||
|
|
"""Запускает VLM-QC для всех PNG в папке."""
|
|||
|
|
png_files = sorted(folder.glob("page_*.png"))
|
|||
|
|
if not png_files:
|
|||
|
|
print(f"[ERR] В папке {folder} не найдены page_*.png")
|
|||
|
|
sys.exit(1)
|
|||
|
|
|
|||
|
|
out_path = folder / "vlm_qc_report.json"
|
|||
|
|
report = {"errors": [], "warnings": [], "infos": [], "source": "vlm"}
|
|||
|
|
|
|||
|
|
print(f"[INFO] VLM QC: {len(png_files)} страниц")
|
|||
|
|
print(f"[INFO] API: DashScope ({BASE_URL})")
|
|||
|
|
print(f"[INFO] Модель: {model}\n")
|
|||
|
|
|
|||
|
|
for i, png in enumerate(png_files, 1):
|
|||
|
|
print(f"[{i}/{len(png_files)}] {png.name} ...", end=" ", flush=True)
|
|||
|
|
try:
|
|||
|
|
issues = analyze_page(png, model)
|
|||
|
|
|
|||
|
|
page_num = int(png.stem.split("_")[1])
|
|||
|
|
for issue in issues:
|
|||
|
|
issue["page"] = page_num
|
|||
|
|
issue["source"] = "vlm"
|
|||
|
|
sev = issue.get("severity", "warning")
|
|||
|
|
if sev not in ("error", "warning", "info"):
|
|||
|
|
sev = "warning"
|
|||
|
|
report[f"{sev}s"].append(issue)
|
|||
|
|
|
|||
|
|
print(f"OK ({len(issues)} issues)")
|
|||
|
|
except Exception as e:
|
|||
|
|
print(f"ERR: {e}")
|
|||
|
|
|
|||
|
|
with open(out_path, "w", encoding="utf-8") as f:
|
|||
|
|
json.dump(report, f, ensure_ascii=False, indent=2)
|
|||
|
|
|
|||
|
|
total = sum(len(report[k]) for k in ["errors", "warnings", "infos"])
|
|||
|
|
print(f"\n[OK] VLM QC сохранён: {out_path}")
|
|||
|
|
print(f" Всего замечаний: {total}")
|
|||
|
|
print(f" Ошибки: {len(report['errors'])}, Предупреждения: {len(report['warnings'])}, Инфо: {len(report['infos'])})")
|
|||
|
|
|
|||
|
|
|
|||
|
|
def main():
|
|||
|
|
import argparse
|
|||
|
|
parser = argparse.ArgumentParser(description="VLM QC для чертежей через qwen-vl API")
|
|||
|
|
parser.add_argument("folder", help="Папка с page_*.png")
|
|||
|
|
parser.add_argument("--model", default=DEFAULT_MODEL, help="Имя модели (default: qwen-vl-plus)")
|
|||
|
|
args = parser.parse_args()
|
|||
|
|
|
|||
|
|
folder = Path(args.folder)
|
|||
|
|
run_vlm_qc(folder, args.model)
|
|||
|
|
|
|||
|
|
|
|||
|
|
if __name__ == "__main__":
|
|||
|
|
main()
|