- New: src/rag/engine/ — in-process hybrid search (FTS5 BM25 + sqlite-vec + LLM rerank) - New: src/rag/qmd/ — compatibility layer (qmd_query, qmd_chat, qmd_chat_stream, qmd_index_*) - New: src/ingest/stub_writer.py — .md stubs for binary files (videos, archives) - New: scripts/deploy.sh + scripts/pull_models.sh + Makefile + .env.example - Removed: LightRAG, sentence-transformers embedding via separate package, rag_standalone/ - Removed: @nousresearch/qmd npm dep (package not published); Node.js from Dockerfile - Updated: tests/ (46 passed), docker-compose, .dockerignore, config.yaml, README Engine: in-process Python (no daemon, no npm), sentence-transformers 384-dim, RRF fusion (k=60), BM25 + vector with numpy fallback. WebSocket API unchanged. Deploy: 'git clone' + 'make init' + 'make pull-models MODELS_SOURCE=...' + 'make up'. Models (5.83 GB) live outside git; pulled via rsync from dev host.
77 lines
2.3 KiB
Python
77 lines
2.3 KiB
Python
"""Org-scoped filesystem paths."""
|
|
|
|
import json
|
|
import os
|
|
from datetime import datetime
|
|
from pathlib import Path
|
|
|
|
DATA_ROOT = Path("data")
|
|
UPLOAD_ROOT = Path("uploads")
|
|
PROCESSED_ROOT = Path("processed")
|
|
RAG_CACHE_DIRNAME = "lightrag_caches"
|
|
QMD_COLLECTIONS_DIRNAME = "qmd_collections"
|
|
MEETINGS_DIRNAME = "meetings"
|
|
DOCUMENTS_DIRNAME = "documents"
|
|
|
|
|
|
def org_upload_dir(org_slug: str, user_id: int) -> Path:
|
|
path = UPLOAD_ROOT / org_slug / str(user_id)
|
|
path.mkdir(parents=True, exist_ok=True)
|
|
return path
|
|
|
|
|
|
def org_meetings_dir(org_slug: str) -> Path:
|
|
path = PROCESSED_ROOT / org_slug / MEETINGS_DIRNAME
|
|
path.mkdir(parents=True, exist_ok=True)
|
|
return path
|
|
|
|
|
|
def org_rag_index_dir(org_slug: str) -> Path:
|
|
"""Legacy: путь к lightrag_caches/<org>/ (deprecated, kept for migration)."""
|
|
path = PROCESSED_ROOT / org_slug / RAG_CACHE_DIRNAME
|
|
path.mkdir(parents=True, exist_ok=True)
|
|
return path
|
|
|
|
|
|
def org_qmd_root(org_slug: str) -> Path:
|
|
"""Корень qmd-коллекций организации: ``processed/<org>/qmd_collections/``."""
|
|
env_root = os.environ.get("QMD_COLLECTION_ROOT")
|
|
base = Path(env_root) if env_root else PROCESSED_ROOT
|
|
path = base / org_slug / QMD_COLLECTIONS_DIRNAME
|
|
path.mkdir(parents=True, exist_ok=True)
|
|
return path
|
|
|
|
|
|
def org_documents_dir(org_slug: str) -> Path:
|
|
path = PROCESSED_ROOT / org_slug / DOCUMENTS_DIRNAME
|
|
path.mkdir(parents=True, exist_ok=True)
|
|
return path
|
|
|
|
|
|
def resolve_document_path(org_slug: str, rel_path: str) -> Path:
|
|
base = org_documents_dir(org_slug).resolve()
|
|
full = (base / rel_path).resolve()
|
|
if not str(full).startswith(str(base)):
|
|
raise ValueError("Invalid path")
|
|
return full
|
|
|
|
|
|
def resolve_meeting_path(org_slug: str, rel_path: str) -> Path:
|
|
"""Resolve relative path under org meetings dir; reject traversal."""
|
|
base = org_meetings_dir(org_slug).resolve()
|
|
full = (base / rel_path).resolve()
|
|
if not str(full).startswith(str(base)):
|
|
raise ValueError("Invalid path")
|
|
return full
|
|
|
|
|
|
def write_folder_project_meta(folder_path: Path, project_slug: str) -> None:
|
|
meta = {
|
|
"project_slug": project_slug.strip().lower(),
|
|
"created_at": datetime.now().isoformat(),
|
|
}
|
|
(folder_path / ".project.json").write_text(
|
|
json.dumps(meta, ensure_ascii=False),
|
|
encoding="utf-8",
|
|
)
|