- New: src/rag/engine/ — in-process hybrid search (FTS5 BM25 + sqlite-vec + LLM rerank) - New: src/rag/qmd/ — compatibility layer (qmd_query, qmd_chat, qmd_chat_stream, qmd_index_*) - New: src/ingest/stub_writer.py — .md stubs for binary files (videos, archives) - New: scripts/deploy.sh + scripts/pull_models.sh + Makefile + .env.example - Removed: LightRAG, sentence-transformers embedding via separate package, rag_standalone/ - Removed: @nousresearch/qmd npm dep (package not published); Node.js from Dockerfile - Updated: tests/ (46 passed), docker-compose, .dockerignore, config.yaml, README Engine: in-process Python (no daemon, no npm), sentence-transformers 384-dim, RRF fusion (k=60), BM25 + vector with numpy fallback. WebSocket API unchanged. Deploy: 'git clone' + 'make init' + 'make pull-models MODELS_SOURCE=...' + 'make up'. Models (5.83 GB) live outside git; pulled via rsync from dev host.
49 lines
1.4 KiB
YAML
49 lines
1.4 KiB
YAML
services:
|
|
transcription:
|
|
build:
|
|
context: .
|
|
dockerfile: Dockerfile.rag
|
|
image: transcription-transcription:latest
|
|
container_name: transcription_service
|
|
ports:
|
|
- "8000:8000"
|
|
env_file:
|
|
- .env
|
|
environment:
|
|
- PYTHONUNBUFFERED=1
|
|
- HF_TOKEN=${HF_TOKEN}
|
|
- OPENCODE_API_KEY=${OPENCODE_API_KEY}
|
|
- OPENCODE_URL=${OPENCODE_URL:-https://opencode.ai/zen/v1}
|
|
- QMD_COLLECTION_ROOT=${QMD_COLLECTION_ROOT:-/app/processed}
|
|
- RAG_EMBED_MODEL=${RAG_EMBED_MODEL:-sentence-transformers/paraphrase-multilingual-MiniLM-L12-v2}
|
|
- HF_HOME=/root/.cache/huggingface
|
|
- NLTK_DATA=/root/nltk_data
|
|
- JWT_SECRET=${JWT_SECRET:-change-me-in-production}
|
|
- AUTH_ADMIN_PASSWORD=${AUTH_ADMIN_PASSWORD:-admin123}
|
|
volumes:
|
|
- uploads:/app/uploads
|
|
- processed:/app/processed
|
|
- tmp:/app/tmp
|
|
- ./config.yaml:/app/config.yaml:ro
|
|
- ./backend:/app/backend:ro
|
|
- ./src:/app/src:ro
|
|
- ./scripts:/app/scripts:ro
|
|
- ./models/huggingface:/root/.cache/huggingface
|
|
- ./models/nltk_data:/root/nltk_data:ro
|
|
- data:/app/data
|
|
restart: unless-stopped
|
|
entrypoint: ["uvicorn"]
|
|
command: ["backend.main:app", "--host", "0.0.0.0", "--port", "8000"]
|
|
healthcheck:
|
|
test: ["CMD", "curl", "-f", "http://localhost:8000/api/health"]
|
|
interval: 30s
|
|
timeout: 10s
|
|
retries: 3
|
|
start_period: 60s
|
|
|
|
volumes:
|
|
uploads:
|
|
processed:
|
|
tmp:
|
|
data:
|