- New: src/rag/engine/ — in-process hybrid search (FTS5 BM25 + sqlite-vec + LLM rerank) - New: src/rag/qmd/ — compatibility layer (qmd_query, qmd_chat, qmd_chat_stream, qmd_index_*) - New: src/ingest/stub_writer.py — .md stubs for binary files (videos, archives) - New: scripts/deploy.sh + scripts/pull_models.sh + Makefile + .env.example - Removed: LightRAG, sentence-transformers embedding via separate package, rag_standalone/ - Removed: @nousresearch/qmd npm dep (package not published); Node.js from Dockerfile - Updated: tests/ (46 passed), docker-compose, .dockerignore, config.yaml, README Engine: in-process Python (no daemon, no npm), sentence-transformers 384-dim, RRF fusion (k=60), BM25 + vector with numpy fallback. WebSocket API unchanged. Deploy: 'git clone' + 'make init' + 'make pull-models MODELS_SOURCE=...' + 'make up'. Models (5.83 GB) live outside git; pulled via rsync from dev host.
67 lines
2.5 KiB
Python
67 lines
2.5 KiB
Python
"""Tests for stub_writer (binary file -> .md frontmatter stub)."""
|
|
|
|
import tempfile
|
|
import unittest
|
|
from pathlib import Path
|
|
|
|
from src.ingest.stub_writer import write_stub, _infer_kind
|
|
|
|
|
|
class StubWriterTestCase(unittest.TestCase):
|
|
def test_infer_kind_video(self):
|
|
self.assertEqual(_infer_kind(Path("meeting.mp4")), "video")
|
|
self.assertEqual(_infer_kind(Path("recording.MKV")), "video")
|
|
|
|
def test_infer_kind_audio(self):
|
|
self.assertEqual(_infer_kind(Path("track.wav")), "audio")
|
|
|
|
def test_infer_kind_image(self):
|
|
self.assertEqual(_infer_kind(Path("photo.png")), "image")
|
|
|
|
def test_infer_kind_archive(self):
|
|
self.assertEqual(_infer_kind(Path("backup.zip")), "archive")
|
|
|
|
def test_infer_kind_other(self):
|
|
self.assertEqual(_infer_kind(Path("unknown.xyz")), "other")
|
|
|
|
def test_write_stub_creates_md(self):
|
|
with tempfile.TemporaryDirectory() as tmp:
|
|
mp4 = Path(tmp) / "video.mp4"
|
|
mp4.write_bytes(b"fake-mp4-content")
|
|
stub = write_stub(mp4, project="2026")
|
|
self.assertEqual(stub, mp4.with_suffix(".mp4.md"))
|
|
self.assertTrue(stub.exists())
|
|
text = stub.read_text(encoding="utf-8")
|
|
self.assertIn("source: video.mp4", text)
|
|
self.assertIn("kind: video", text)
|
|
self.assertIn("project: 2026", text)
|
|
self.assertIn("size: 16", text)
|
|
self.assertIn("Бинарный файл", text)
|
|
|
|
def test_write_stub_overwrites(self):
|
|
with tempfile.TemporaryDirectory() as tmp:
|
|
mp4 = Path(tmp) / "video.mp4"
|
|
mp4.write_bytes(b"a" * 10)
|
|
stub = write_stub(mp4, project="p1")
|
|
stub.write_text("OLD", encoding="utf-8")
|
|
stub2 = write_stub(mp4, project="p1")
|
|
self.assertEqual(stub, stub2)
|
|
self.assertNotIn("OLD", stub.read_text(encoding="utf-8"))
|
|
|
|
def test_write_stub_missing_file_raises(self):
|
|
with tempfile.TemporaryDirectory() as tmp:
|
|
missing = Path(tmp) / "nope.mp4"
|
|
with self.assertRaises(FileNotFoundError):
|
|
write_stub(missing, project="x")
|
|
|
|
def test_explicit_kind_overrides_inference(self):
|
|
with tempfile.TemporaryDirectory() as tmp:
|
|
f = Path(tmp) / "weird.bin"
|
|
f.write_bytes(b"x")
|
|
stub = write_stub(f, project="p", kind="custom")
|
|
self.assertIn("kind: custom", stub.read_text(encoding="utf-8"))
|
|
|
|
|
|
if __name__ == "__main__":
|
|
unittest.main()
|