54 lines
1.5 KiB
Batchfile
54 lines
1.5 KiB
Batchfile
|
|
@echo off
|
||
|
|
REM run_pipeline.bat — полный pipeline: OCR → VLM → RAG индекс
|
||
|
|
REM Требования: Python 3.11+, LM Studio запущен
|
||
|
|
|
||
|
|
set PDF=%1
|
||
|
|
if "%PDF%"=="" set PDF=123.pdf
|
||
|
|
|
||
|
|
set OUT=%2
|
||
|
|
if "%OUT%"=="" set OUT=output_%PDF:.pdf=%
|
||
|
|
|
||
|
|
echo ==============================================
|
||
|
|
echo Pipeline: %PDF% → %OUT%
|
||
|
|
echo ==============================================
|
||
|
|
|
||
|
|
REM Найти python3.11
|
||
|
|
where python3.11 >nul 2>nul
|
||
|
|
if %errorlevel%==0 (
|
||
|
|
set PYTHON=python3.11
|
||
|
|
) else (
|
||
|
|
where python >nul 2>nul
|
||
|
|
if %errorlevel%==0 (
|
||
|
|
set PYTHON=python
|
||
|
|
) else (
|
||
|
|
echo [ERR] Python не найден
|
||
|
|
exit /b 1
|
||
|
|
)
|
||
|
|
)
|
||
|
|
|
||
|
|
REM 1. OCR
|
||
|
|
echo.
|
||
|
|
echo [1/4] OCR: PDF → PNG + JSON
|
||
|
|
%PYTHON% process_any_pdf.py "%PDF%" "%OUT%"
|
||
|
|
|
||
|
|
REM 2. VLM descriptions
|
||
|
|
echo.
|
||
|
|
echo [2/4] VLM: PNG → описания (LM Studio)
|
||
|
|
%PYTHON% vlm_describer.py "%OUT%" --model qwen/qwen3-vl-4b
|
||
|
|
|
||
|
|
REM 3. RAG Index
|
||
|
|
echo.
|
||
|
|
echo [3/4] RAG индекс: JSON + VLM → граф знаний
|
||
|
|
if exist "%OUT%\lightrag_cache" rmdir /s /q "%OUT%\lightrag_cache"
|
||
|
|
%PYTHON% rag_indexer.py "%OUT%" --backend opencode --model nemotron-3-super-free --vlm-desc
|
||
|
|
|
||
|
|
REM 4. Test query
|
||
|
|
echo.
|
||
|
|
echo [4/4] Тестовый запрос
|
||
|
|
%PYTHON% rag_query.py "%OUT%" "Какие оси есть в чертеже?" --backend opencode --mode hybrid
|
||
|
|
|
||
|
|
echo.
|
||
|
|
echo ==============================================
|
||
|
|
echo Готово! Индекс в %OUT%\lightrag_cache
|
||
|
|
echo ==============================================
|