transcription/tests/test_native_engine_e2e.py
keboss-m eee8f4c8a4 Replace LightRAG with native Python RAG engine + add deploy tooling
- New: src/rag/engine/ — in-process hybrid search (FTS5 BM25 + sqlite-vec + LLM rerank)
- New: src/rag/qmd/ — compatibility layer (qmd_query, qmd_chat, qmd_chat_stream, qmd_index_*)
- New: src/ingest/stub_writer.py — .md stubs for binary files (videos, archives)
- New: scripts/deploy.sh + scripts/pull_models.sh + Makefile + .env.example
- Removed: LightRAG, sentence-transformers embedding via separate package, rag_standalone/
- Removed: @nousresearch/qmd npm dep (package not published); Node.js from Dockerfile
- Updated: tests/ (46 passed), docker-compose, .dockerignore, config.yaml, README

Engine: in-process Python (no daemon, no npm), sentence-transformers 384-dim,
RRF fusion (k=60), BM25 + vector with numpy fallback. WebSocket API unchanged.

Deploy: 'git clone' + 'make init' + 'make pull-models MODELS_SOURCE=...' + 'make up'.
Models (5.83 GB) live outside git; pulled via rsync from dev host.
2026-06-10 14:24:01 +03:00

123 lines
4.6 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

"""End-to-end тест: ingest → search → context → chat-stream.
Не мокает движок — ставит заглушку только на OpenCode-клиент.
"""
import tempfile
import unittest
from pathlib import Path
from unittest.mock import AsyncMock, MagicMock, patch
from src.rag.qmd import indexer as qmd_indexer
from src.rag.qmd import query as qmd_query
SAMPLE_MEETING = (
"# Совещание 2026-06-10\n\n"
"Участники: Иванов, Петров, Сидорова.\n\n"
"## Повестка\n"
"1. Ход строительства 3-й очереди.\n"
"2. Авторизация подрядчиков в системе.\n"
"3. Сроки сдачи.\n\n"
"## Решения\n"
"- Завершить фундамент до 15 июля.\n"
"- Выдать JWT-токены подрядчикам.\n"
"- Срок сдачи — 30 ноября 2027.\n"
)
class E2ETestCase(unittest.IsolatedAsyncioTestCase):
async def asyncSetUp(self):
from src.rag.qmd.collections import get_project_collection_dir
self._tmp = tempfile.TemporaryDirectory()
self.tmp = Path(self._tmp.name)
self.coll_dir = get_project_collection_dir("merakom", "2026", self.tmp)
self.coll_dir.mkdir(parents=True, exist_ok=True)
self.engines = []
async def asyncTearDown(self):
from src.rag.engine import invalidate_engine
for eng in self.engines:
eng.close()
invalidate_engine(self.coll_dir)
import time
time.sleep(0.05)
try:
self._tmp.cleanup()
except (PermissionError, OSError):
pass
async def test_e2e_meeting_index_search_chat(self):
# 1. Ingest через Engine.index_file напрямую (в self.coll_dir).
from src.rag.engine import get_or_create_engine
eng = get_or_create_engine(self.coll_dir)
eng.warmup()
self.engines.append(eng)
body = self.tmp / "meeting.txt"
summary = self.tmp / "meeting_summary.md"
body.write_text(SAMPLE_MEETING, encoding="utf-8")
summary.write_text("# Краткое\nОбсуждали строительство и JWT.", encoding="utf-8")
r1 = eng.index_file(body)
r2 = eng.index_file(summary)
self.assertFalse(r1.skipped)
self.assertFalse(r2.skipped)
# 2. Search через тот же engine.
hits = eng.query("сдача объекта", limit=3, use_rerank=False)
self.assertGreater(len(hits), 0)
# top hit должен относиться к meeting.txt
self.assertTrue(any("meeting.txt" in h.file_path for h in hits))
# 3. Chat-stream с подменой OpenCode.
fake_chunks = [
MagicMock(choices=[MagicMock(delta=MagicMock(content="Сдача "))]),
MagicMock(choices=[MagicMock(delta=MagicMock(content="30 ноября "))]),
MagicMock(choices=[MagicMock(delta=MagicMock(content="2027."))]),
]
class _FakeStream:
def __init__(self):
self._i = 0
def __aiter__(self):
return self
async def __anext__(self):
if self._i >= len(fake_chunks):
raise StopAsyncIteration
item = fake_chunks[self._i]
self._i += 1
return item
async def _fake_create(*args, **kwargs):
if kwargs.get("stream"):
return _FakeStream()
return MagicMock(choices=[MagicMock(message=MagicMock(content="final"))])
with patch("src.rag.qmd.query.AsyncOpenAI") as fake_cls:
fake_instance = MagicMock()
fake_instance.chat = MagicMock()
fake_instance.chat.completions = MagicMock()
fake_instance.chat.completions.create = AsyncMock(side_effect=_fake_create)
fake_cls.return_value = fake_instance
events = []
async for ev in qmd_query.qmd_chat_stream(
question="Когда сдача?",
org_slug="merakom",
history=[],
project_slug="2026",
api_key="test-key",
use_rerank=False,
):
events.append(ev)
types = [e["type"] for e in events]
self.assertEqual(types[0], "context")
self.assertIn("chunk", types)
self.assertEqual(types[-1], "done")
self.assertEqual(events[-1]["answer"], "Сдача 30 ноября 2027.")
# Контекст непустой и содержит source-annotation
self.assertIn("[source:", events[0]["context"])