transcription/backend/main.py
keboss-m 8df14e3102 Add multi-tenant auth with org projects, roles, and personal workspaces.
JWT login, org-scoped storage and RAG, admin/director/user roles, user-owned projects, login UI, and legacy data migration.

Co-authored-by: Cursor <cursoragent@cursor.com>
2026-06-01 18:54:25 +03:00

401 lines
14 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

"""FastAPI backend для сервиса транскрибации."""
import json
import sys
from contextlib import asynccontextmanager
from pathlib import Path
from typing import List, Optional
from fastapi import Depends, FastAPI, File, Form, HTTPException, UploadFile, WebSocket, WebSocketDisconnect
from fastapi.middleware.cors import CORSMiddleware
from fastapi.responses import FileResponse, HTMLResponse
from fastapi.staticfiles import StaticFiles
from backend.auth.deps import get_current_user, get_user_from_token, require_admin
from backend.auth.models import UserContext
from backend.auth.routes import admin_router, router as auth_router
from backend.auth import database as auth_db
from backend.auth.service import ensure_project_access, list_accessible_projects
from backend.paths import org_meetings_dir, org_rag_index_dir, resolve_meeting_path
from backend.queue import (
delete_folder,
get_all_tasks,
get_download_path,
get_processed_tree,
get_queue_info,
get_task_status,
read_file_content,
save_upload,
set_progress_callback,
start_workers,
stop_workers,
)
sys.path.insert(0, str(Path(__file__).parent.parent))
from src.config import load_config, resolve_opencode_credentials
from src.rag.formatter import format_global_document
from src.rag.indexer import get_project_names, index_meeting
from src.rag.query import rag_chat
STATIC_DIR = Path(__file__).parent / "static"
class ConnectionManager:
def __init__(self):
self.active_connections: list[tuple[WebSocket, Optional[UserContext]]] = []
async def connect(self, websocket: WebSocket, user: Optional[UserContext] = None):
await websocket.accept()
self.active_connections.append((websocket, user))
def disconnect(self, websocket: WebSocket):
self.active_connections = [(ws, u) for ws, u in self.active_connections if ws is not websocket]
def set_user(self, websocket: WebSocket, user: UserContext):
self.active_connections = [(ws, user if ws is websocket else u) for ws, u in self.active_connections]
async def broadcast(self, message: dict):
for conn, _user in self.active_connections:
try:
await conn.send_json(message)
except Exception:
pass
manager = ConnectionManager()
set_progress_callback(manager.broadcast)
@asynccontextmanager
async def lifespan(app: FastAPI):
config = load_config()
auth_db.init_db(config)
auth_db.bootstrap_from_config(config)
org_slug = config.get("auth", {}).get("bootstrap", {}).get("org_slug", "merakom")
auth_db.migrate_legacy_data(org_slug)
queue_cfg = config.get("queue", {})
transcribe_workers = int(queue_cfg.get("transcribe_workers", 2))
postprocess_workers = int(queue_cfg.get("postprocess_workers", 1))
print("🚀 Запуск рабочих процессов...")
start_workers(transcribe_workers=transcribe_workers, postprocess_workers=postprocess_workers)
yield
print("🛑 Остановка рабочих процессов...")
stop_workers()
app = FastAPI(title="Transcription Service", version="2.0.0", lifespan=lifespan)
app.add_middleware(
CORSMiddleware,
allow_origins=["*"],
allow_credentials=True,
allow_methods=["*"],
allow_headers=["*"],
)
app.include_router(auth_router)
app.include_router(admin_router)
def _org_index_dir(user: UserContext) -> Path:
return org_rag_index_dir(user.org_slug)
async def _list_rag_project_slugs(user: UserContext) -> List[str]:
projects = await get_project_names(_org_index_dir(user))
return user.filter_projects(projects)
async def _rag_chat_for_user(user: UserContext, question: str, history: list, project_name: Optional[str], mode: str):
if project_name:
ensure_project_access(user, project_name)
elif not user.can_global_search():
raise HTTPException(status_code=403, detail="Глобальный поиск доступен только администратору")
config = load_config()
rag_cfg = config.get("rag", {})
api_key, base_url = resolve_opencode_credentials(config)
return await rag_chat(
question=question,
working_dir_base=_org_index_dir(user),
history=history,
api_key=api_key,
project_name=project_name,
base_url=base_url,
chat_model=rag_cfg.get("chat_model", "deepseek-v4-flash-free"),
mode=mode,
index_model=rag_cfg.get("index_model", "mimo-v2.5-free"),
)
@app.get("/", response_class=HTMLResponse)
async def root():
index_path = STATIC_DIR / "index.html"
if index_path.exists():
return index_path.read_text(encoding="utf-8")
return "<h1>Transcription Service</h1>"
@app.get("/login", response_class=HTMLResponse)
async def login_page():
login_path = STATIC_DIR / "login.html"
if login_path.exists():
return login_path.read_text(encoding="utf-8")
return "<h1>Login page missing</h1>"
@app.post("/upload")
async def upload_file(
file: UploadFile = File(...),
project: str = Form(...),
user: UserContext = Depends(get_current_user),
):
content = await file.read()
try:
task_id, _ = await save_upload(content, file.filename or "upload.bin", user, project)
except PermissionError as e:
raise HTTPException(status_code=403, detail=str(e)) from e
return {
"task_id": task_id,
"file": file.filename,
"project": project,
"status": "queued",
"message": "Файл добавлен в очередь обработки",
"queue": get_queue_info(user),
}
@app.post("/upload-batch")
async def upload_batch(
files: List[UploadFile] = File(...),
project: str = Form(...),
user: UserContext = Depends(get_current_user),
):
if not files:
return {"error": "Не переданы файлы", "uploaded": 0, "tasks": []}
results = []
for file in files:
content = await file.read()
try:
task_id, _ = await save_upload(content, file.filename or "upload.bin", user, project)
except PermissionError as e:
raise HTTPException(status_code=403, detail=str(e)) from e
results.append({"task_id": task_id, "file": file.filename, "project": project, "status": "queued"})
return {
"uploaded": len(results),
"tasks": results,
"queue": get_queue_info(user),
"message": f"{len(results)} файл(ов) добавлено в очередь",
}
@app.websocket("/ws")
async def websocket_endpoint(websocket: WebSocket):
token = websocket.query_params.get("token")
user = get_user_from_token(token)
if not user:
await websocket.close(code=4401)
return
await manager.connect(websocket, user)
try:
while True:
data = await websocket.receive_text()
msg = json.loads(data)
if msg.get("action") == "get_tasks":
await websocket.send_json({
"type": "tasks_list",
"tasks": get_all_tasks(user),
"queue": get_queue_info(user),
})
elif msg.get("action") == "get_tree":
await websocket.send_json({
"type": "file_tree",
"tree": get_processed_tree(user),
})
elif msg.get("action") in ("rag_query", "rag_query_global"):
await _handle_rag_query_ws(websocket, msg, user)
except WebSocketDisconnect:
manager.disconnect(websocket)
except Exception:
manager.disconnect(websocket)
@app.get("/api/tasks")
async def api_tasks(user: UserContext = Depends(get_current_user)):
return {"tasks": get_all_tasks(user), "queue": get_queue_info(user)}
@app.get("/api/tasks/{task_id}")
async def api_task(task_id: str, user: UserContext = Depends(get_current_user)):
status = get_task_status(task_id, user)
if not status:
return {"error": "Task not found"}
return status
@app.get("/api/files")
async def api_files(user: UserContext = Depends(get_current_user)):
return {"tree": get_processed_tree(user)}
@app.get("/api/files/content")
async def api_file_content(path: str, user: UserContext = Depends(get_current_user)):
try:
content = read_file_content(user, path)
return {"content": content, "path": path}
except PermissionError as e:
raise HTTPException(status_code=403, detail=str(e)) from e
except Exception as e:
return {"error": str(e)}
@app.get("/api/files/download")
async def api_download(path: str, user: UserContext = Depends(get_current_user)):
try:
file_path = get_download_path(user, path)
return FileResponse(file_path, filename=file_path.name)
except PermissionError as e:
raise HTTPException(status_code=403, detail=str(e)) from e
except Exception as e:
return {"error": str(e)}
@app.delete("/api/folders/{folder_name:path}")
async def api_delete_folder(folder_name: str, user: UserContext = Depends(get_current_user)):
try:
delete_folder(user, folder_name)
return {"deleted": folder_name}
except PermissionError as e:
raise HTTPException(status_code=403, detail=str(e)) from e
except Exception as e:
return {"error": str(e)}
@app.get("/api/rag/projects")
async def api_rag_projects(user: UserContext = Depends(get_current_user)):
try:
db_projects = {p["slug"] for p in list_accessible_projects(user)}
indexed = await _list_rag_project_slugs(user)
merged = sorted(db_projects | set(indexed))
if not user.is_admin:
merged = user.filter_projects(merged)
return {"projects": merged}
except Exception as e:
return {"error": str(e), "projects": []}
@app.post("/api/rag/query")
async def api_rag_query(payload: dict, user: UserContext = Depends(get_current_user)):
try:
result = await _rag_chat_for_user(
user,
payload.get("question", ""),
payload.get("history", []),
payload.get("project"),
payload.get("mode", "hybrid"),
)
return {"answer": result["answer"], "context": result["context"], "project": result["project"]}
except HTTPException:
raise
except PermissionError as e:
raise HTTPException(status_code=403, detail=str(e)) from e
except Exception as e:
return {"error": str(e)}
@app.post("/api/rag/query-global")
async def api_rag_query_global(payload: dict, user: UserContext = Depends(get_current_user)):
try:
result = await _rag_chat_for_user(
user,
payload.get("question", ""),
payload.get("history", []),
None,
payload.get("mode", "hybrid"),
)
return {"answer": result["answer"], "context": result["context"], "project": None}
except HTTPException:
raise
except Exception as e:
return {"error": str(e)}
@app.post("/api/rag/index/{folder_name:path}")
async def api_rag_index_folder(folder_name: str, user: UserContext = Depends(get_current_user)):
try:
folder_path = resolve_meeting_path(user.org_slug, folder_name)
if not folder_path.exists():
return {"error": "Folder not found"}
from backend.queue import _folder_project_slug
project = _folder_project_slug(folder_path.name, org_meetings_dir(user.org_slug))
if not project:
return {"error": "Project metadata not found"}
ensure_project_access(user, project)
txt_files = list(folder_path.glob("*.txt"))
if not txt_files:
return {"error": "No .txt protocol found in folder"}
doc_text = txt_files[0].read_text(encoding="utf-8")
config = load_config()
rag_cfg = config.get("rag", {})
api_key, base_url = resolve_opencode_credentials(config)
metadata = {"project": project, "section": "Общие вопросы", "topic": "Переиндексация"}
global_doc_text = format_global_document(doc_text, metadata)
await index_meeting(
doc_text=doc_text,
global_doc_text=global_doc_text,
project_name=project,
working_dir_base=_org_index_dir(user),
model=rag_cfg.get("index_model", "mimo-v2.5-free"),
api_key=api_key,
base_url=base_url,
)
return {"indexed": folder_name, "project": project}
except PermissionError as e:
raise HTTPException(status_code=403, detail=str(e)) from e
except Exception as e:
return {"error": str(e)}
async def _handle_rag_query_ws(websocket: WebSocket, msg: dict, user: UserContext):
try:
project = msg.get("project")
if msg.get("action") == "rag_query_global":
project = None
result = await _rag_chat_for_user(
user,
msg.get("question", ""),
msg.get("history", []),
project,
msg.get("mode", "hybrid"),
)
await websocket.send_json({
"type": "rag_response",
"answer": result["answer"],
"context": result["context"],
"project": result["project"],
})
except HTTPException as e:
await websocket.send_json({"type": "rag_error", "error": e.detail})
except PermissionError as e:
await websocket.send_json({"type": "rag_error", "error": str(e)})
except Exception as e:
await websocket.send_json({"type": "rag_error", "error": str(e)})
@app.get("/api/health")
async def health():
return {"status": "ok"}
app.mount("/static", StaticFiles(directory=str(STATIC_DIR)), name="static")