opencode/run_pipeline.sh

39 lines
1.1 KiB
Bash
Raw Permalink Normal View History

#!/bin/bash
# run_pipeline.sh — полный pipeline: OCR → VLM → RAG индекс
set -e
PDF="${1:-123.pdf}"
OUT="${2:-output_123}"
PYTHON="/opt/homebrew/bin/python3.11"
echo "=============================================="
echo " Pipeline: $PDF$OUT"
echo "=============================================="
# 1. OCR
echo ""
echo "[1/4] OCR: PDF → PNG + JSON"
$PYTHON process_any_pdf.py "$PDF" "$OUT"
# 2. VLM descriptions
echo ""
echo "[2/4] VLM: PNG → описания (LM Studio)"
$PYTHON vlm_describer.py "$OUT" --model qwen/qwen3-vl-4b
# 3. RAG Index
echo ""
echo "[3/4] RAG индекс: JSON + VLM → граф знаний"
rm -rf "$OUT/lightrag_cache"
$PYTHON rag_indexer.py "$OUT" --backend opencode --model nemotron-3-super-free --vlm-desc
# 4. Test query
echo ""
echo "[4/4] Тестовый запрос"
$PYTHON rag_query.py "$OUT" "Какие оси есть в чертеже?" --backend opencode --mode hybrid
echo ""
echo "=============================================="
echo " Готово! Индекс в $OUT/lightrag_cache"
echo "=============================================="