transcription/tests/test_rag_stream_ws.py
keboss-m eee8f4c8a4 Replace LightRAG with native Python RAG engine + add deploy tooling
- New: src/rag/engine/ — in-process hybrid search (FTS5 BM25 + sqlite-vec + LLM rerank)
- New: src/rag/qmd/ — compatibility layer (qmd_query, qmd_chat, qmd_chat_stream, qmd_index_*)
- New: src/ingest/stub_writer.py — .md stubs for binary files (videos, archives)
- New: scripts/deploy.sh + scripts/pull_models.sh + Makefile + .env.example
- Removed: LightRAG, sentence-transformers embedding via separate package, rag_standalone/
- Removed: @nousresearch/qmd npm dep (package not published); Node.js from Dockerfile
- Updated: tests/ (46 passed), docker-compose, .dockerignore, config.yaml, README

Engine: in-process Python (no daemon, no npm), sentence-transformers 384-dim,
RRF fusion (k=60), BM25 + vector with numpy fallback. WebSocket API unchanged.

Deploy: 'git clone' + 'make init' + 'make pull-models MODELS_SOURCE=...' + 'make up'.
Models (5.83 GB) live outside git; pulled via rsync from dev host.
2026-06-10 14:24:01 +03:00

60 lines
1.9 KiB
Python

"""Smoke-test WebSocket RAG streaming against running service."""
import asyncio
import json
import sys
import urllib.error
import urllib.request
import websockets
async def main() -> int:
login_body = json.dumps({"username": "admin", "password": "admin123"}).encode()
login_req = urllib.request.Request(
"http://127.0.0.1:8000/api/auth/login",
data=login_body,
headers={"Content-Type": "application/json"},
method="POST",
)
try:
with urllib.request.urlopen(login_req, timeout=10) as resp:
token = json.loads(resp.read().decode())["access_token"]
except urllib.error.HTTPError as e:
print(f"Login failed: {e.read().decode()}", file=sys.stderr)
return 1
uri = f"ws://127.0.0.1:8000/ws?token={token}"
types: list[str] = []
chunks = 0
async with websockets.connect(uri, open_timeout=10) as ws:
await ws.send(json.dumps({
"action": "rag_query_global",
"question": "Кратко: что есть в базе знаний?",
"history": [],
"chat_mode": "hybrid",
"retrieval_mode": "naive",
}))
while len(types) < 200:
raw = await asyncio.wait_for(ws.recv(), timeout=180)
msg = json.loads(raw)
msg_type = msg.get("type")
types.append(msg_type)
if msg_type == "rag_chunk":
chunks += 1
if msg_type in ("rag_response", "rag_error"):
if msg_type == "rag_error":
print(f"RAG error: {msg.get('error')}", file=sys.stderr)
return 1
print(f"OK: types={types[:5]}... chunks={chunks} answer_len={len(msg.get('answer', ''))}")
return 0
print("No terminal message received", file=sys.stderr)
return 1
if __name__ == "__main__":
raise SystemExit(asyncio.run(main()))