2026-06-01 15:54:25 +00:00
|
|
|
"""Org-scoped filesystem paths."""
|
|
|
|
|
|
|
|
|
|
import json
|
Replace LightRAG with native Python RAG engine + add deploy tooling
- New: src/rag/engine/ — in-process hybrid search (FTS5 BM25 + sqlite-vec + LLM rerank)
- New: src/rag/qmd/ — compatibility layer (qmd_query, qmd_chat, qmd_chat_stream, qmd_index_*)
- New: src/ingest/stub_writer.py — .md stubs for binary files (videos, archives)
- New: scripts/deploy.sh + scripts/pull_models.sh + Makefile + .env.example
- Removed: LightRAG, sentence-transformers embedding via separate package, rag_standalone/
- Removed: @nousresearch/qmd npm dep (package not published); Node.js from Dockerfile
- Updated: tests/ (46 passed), docker-compose, .dockerignore, config.yaml, README
Engine: in-process Python (no daemon, no npm), sentence-transformers 384-dim,
RRF fusion (k=60), BM25 + vector with numpy fallback. WebSocket API unchanged.
Deploy: 'git clone' + 'make init' + 'make pull-models MODELS_SOURCE=...' + 'make up'.
Models (5.83 GB) live outside git; pulled via rsync from dev host.
2026-06-10 11:24:01 +00:00
|
|
|
import os
|
2026-06-01 15:54:25 +00:00
|
|
|
from datetime import datetime
|
|
|
|
|
from pathlib import Path
|
|
|
|
|
|
|
|
|
|
DATA_ROOT = Path("data")
|
|
|
|
|
UPLOAD_ROOT = Path("uploads")
|
|
|
|
|
PROCESSED_ROOT = Path("processed")
|
|
|
|
|
RAG_CACHE_DIRNAME = "lightrag_caches"
|
Replace LightRAG with native Python RAG engine + add deploy tooling
- New: src/rag/engine/ — in-process hybrid search (FTS5 BM25 + sqlite-vec + LLM rerank)
- New: src/rag/qmd/ — compatibility layer (qmd_query, qmd_chat, qmd_chat_stream, qmd_index_*)
- New: src/ingest/stub_writer.py — .md stubs for binary files (videos, archives)
- New: scripts/deploy.sh + scripts/pull_models.sh + Makefile + .env.example
- Removed: LightRAG, sentence-transformers embedding via separate package, rag_standalone/
- Removed: @nousresearch/qmd npm dep (package not published); Node.js from Dockerfile
- Updated: tests/ (46 passed), docker-compose, .dockerignore, config.yaml, README
Engine: in-process Python (no daemon, no npm), sentence-transformers 384-dim,
RRF fusion (k=60), BM25 + vector with numpy fallback. WebSocket API unchanged.
Deploy: 'git clone' + 'make init' + 'make pull-models MODELS_SOURCE=...' + 'make up'.
Models (5.83 GB) live outside git; pulled via rsync from dev host.
2026-06-10 11:24:01 +00:00
|
|
|
QMD_COLLECTIONS_DIRNAME = "qmd_collections"
|
2026-06-01 15:54:25 +00:00
|
|
|
MEETINGS_DIRNAME = "meetings"
|
2026-06-01 16:16:23 +00:00
|
|
|
DOCUMENTS_DIRNAME = "documents"
|
2026-06-01 15:54:25 +00:00
|
|
|
|
|
|
|
|
|
|
|
|
|
def org_upload_dir(org_slug: str, user_id: int) -> Path:
|
|
|
|
|
path = UPLOAD_ROOT / org_slug / str(user_id)
|
|
|
|
|
path.mkdir(parents=True, exist_ok=True)
|
|
|
|
|
return path
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def org_meetings_dir(org_slug: str) -> Path:
|
|
|
|
|
path = PROCESSED_ROOT / org_slug / MEETINGS_DIRNAME
|
|
|
|
|
path.mkdir(parents=True, exist_ok=True)
|
|
|
|
|
return path
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def org_rag_index_dir(org_slug: str) -> Path:
|
Replace LightRAG with native Python RAG engine + add deploy tooling
- New: src/rag/engine/ — in-process hybrid search (FTS5 BM25 + sqlite-vec + LLM rerank)
- New: src/rag/qmd/ — compatibility layer (qmd_query, qmd_chat, qmd_chat_stream, qmd_index_*)
- New: src/ingest/stub_writer.py — .md stubs for binary files (videos, archives)
- New: scripts/deploy.sh + scripts/pull_models.sh + Makefile + .env.example
- Removed: LightRAG, sentence-transformers embedding via separate package, rag_standalone/
- Removed: @nousresearch/qmd npm dep (package not published); Node.js from Dockerfile
- Updated: tests/ (46 passed), docker-compose, .dockerignore, config.yaml, README
Engine: in-process Python (no daemon, no npm), sentence-transformers 384-dim,
RRF fusion (k=60), BM25 + vector with numpy fallback. WebSocket API unchanged.
Deploy: 'git clone' + 'make init' + 'make pull-models MODELS_SOURCE=...' + 'make up'.
Models (5.83 GB) live outside git; pulled via rsync from dev host.
2026-06-10 11:24:01 +00:00
|
|
|
"""Legacy: путь к lightrag_caches/<org>/ (deprecated, kept for migration)."""
|
2026-06-01 15:54:25 +00:00
|
|
|
path = PROCESSED_ROOT / org_slug / RAG_CACHE_DIRNAME
|
|
|
|
|
path.mkdir(parents=True, exist_ok=True)
|
|
|
|
|
return path
|
|
|
|
|
|
|
|
|
|
|
Replace LightRAG with native Python RAG engine + add deploy tooling
- New: src/rag/engine/ — in-process hybrid search (FTS5 BM25 + sqlite-vec + LLM rerank)
- New: src/rag/qmd/ — compatibility layer (qmd_query, qmd_chat, qmd_chat_stream, qmd_index_*)
- New: src/ingest/stub_writer.py — .md stubs for binary files (videos, archives)
- New: scripts/deploy.sh + scripts/pull_models.sh + Makefile + .env.example
- Removed: LightRAG, sentence-transformers embedding via separate package, rag_standalone/
- Removed: @nousresearch/qmd npm dep (package not published); Node.js from Dockerfile
- Updated: tests/ (46 passed), docker-compose, .dockerignore, config.yaml, README
Engine: in-process Python (no daemon, no npm), sentence-transformers 384-dim,
RRF fusion (k=60), BM25 + vector with numpy fallback. WebSocket API unchanged.
Deploy: 'git clone' + 'make init' + 'make pull-models MODELS_SOURCE=...' + 'make up'.
Models (5.83 GB) live outside git; pulled via rsync from dev host.
2026-06-10 11:24:01 +00:00
|
|
|
def org_qmd_root(org_slug: str) -> Path:
|
|
|
|
|
"""Корень qmd-коллекций организации: ``processed/<org>/qmd_collections/``."""
|
|
|
|
|
env_root = os.environ.get("QMD_COLLECTION_ROOT")
|
|
|
|
|
base = Path(env_root) if env_root else PROCESSED_ROOT
|
|
|
|
|
path = base / org_slug / QMD_COLLECTIONS_DIRNAME
|
|
|
|
|
path.mkdir(parents=True, exist_ok=True)
|
|
|
|
|
return path
|
|
|
|
|
|
|
|
|
|
|
2026-06-01 16:16:23 +00:00
|
|
|
def org_documents_dir(org_slug: str) -> Path:
|
|
|
|
|
path = PROCESSED_ROOT / org_slug / DOCUMENTS_DIRNAME
|
|
|
|
|
path.mkdir(parents=True, exist_ok=True)
|
|
|
|
|
return path
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def resolve_document_path(org_slug: str, rel_path: str) -> Path:
|
|
|
|
|
base = org_documents_dir(org_slug).resolve()
|
|
|
|
|
full = (base / rel_path).resolve()
|
|
|
|
|
if not str(full).startswith(str(base)):
|
|
|
|
|
raise ValueError("Invalid path")
|
|
|
|
|
return full
|
|
|
|
|
|
|
|
|
|
|
2026-06-01 15:54:25 +00:00
|
|
|
def resolve_meeting_path(org_slug: str, rel_path: str) -> Path:
|
|
|
|
|
"""Resolve relative path under org meetings dir; reject traversal."""
|
|
|
|
|
base = org_meetings_dir(org_slug).resolve()
|
|
|
|
|
full = (base / rel_path).resolve()
|
|
|
|
|
if not str(full).startswith(str(base)):
|
|
|
|
|
raise ValueError("Invalid path")
|
|
|
|
|
return full
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def write_folder_project_meta(folder_path: Path, project_slug: str) -> None:
|
|
|
|
|
meta = {
|
|
|
|
|
"project_slug": project_slug.strip().lower(),
|
|
|
|
|
"created_at": datetime.now().isoformat(),
|
|
|
|
|
}
|
|
|
|
|
(folder_path / ".project.json").write_text(
|
|
|
|
|
json.dumps(meta, ensure_ascii=False),
|
|
|
|
|
encoding="utf-8",
|
|
|
|
|
)
|