opencode/run_pipeline.bat
Кирилл Блинов c756a5766b Add RAG pipeline: LightRAG indexer, OpenCode API, VLM describer, and test tools
- Add rag_indexer.py: build LightRAG index from OCR with OpenCode API
- Add rag_query.py: query the knowledge graph
- Add vlm_describer.py: generate VLM descriptions via LM Studio
- Add test_model.py: quick check for LightRAG-compatible models
- Add run_pipeline.sh and run_pipeline.bat: full OCR → VLM → RAG pipeline
- Fix rapidocr import (rapidocr_onnxruntime)
- Fix process_any_pdf.py paths for cross-platform use
- Add .env.example, README_RAG.md, AGENTS.md
- Update .gitignore for outputs and secrets
2026-05-29 09:54:37 +03:00

54 lines
1.5 KiB
Batchfile

@echo off
REM run_pipeline.bat — полный pipeline: OCR → VLM → RAG индекс
REM Требования: Python 3.11+, LM Studio запущен
set PDF=%1
if "%PDF%"=="" set PDF=123.pdf
set OUT=%2
if "%OUT%"=="" set OUT=output_%PDF:.pdf=%
echo ==============================================
echo Pipeline: %PDF%%OUT%
echo ==============================================
REM Найти python3.11
where python3.11 >nul 2>nul
if %errorlevel%==0 (
set PYTHON=python3.11
) else (
where python >nul 2>nul
if %errorlevel%==0 (
set PYTHON=python
) else (
echo [ERR] Python не найден
exit /b 1
)
)
REM 1. OCR
echo.
echo [1/4] OCR: PDF → PNG + JSON
%PYTHON% process_any_pdf.py "%PDF%" "%OUT%"
REM 2. VLM descriptions
echo.
echo [2/4] VLM: PNG → описания (LM Studio)
%PYTHON% vlm_describer.py "%OUT%" --model qwen/qwen3-vl-4b
REM 3. RAG Index
echo.
echo [3/4] RAG индекс: JSON + VLM → граф знаний
if exist "%OUT%\lightrag_cache" rmdir /s /q "%OUT%\lightrag_cache"
%PYTHON% rag_indexer.py "%OUT%" --backend opencode --model nemotron-3-super-free --vlm-desc
REM 4. Test query
echo.
echo [4/4] Тестовый запрос
%PYTHON% rag_query.py "%OUT%" "Какие оси есть в чертеже?" --backend opencode --mode hybrid
echo.
echo ==============================================
echo Готово! Индекс в %OUT%\lightrag_cache
echo ==============================================