"""Перегенерация пустых или отсутствующих summary для уже обработанных совещаний.""" import argparse import asyncio import json import sys from pathlib import Path sys.path.insert(0, str(Path(__file__).parent.parent)) from backend.paths import PROCESSED_ROOT, MEETINGS_DIRNAME from src.config import load_config, resolve_opencode_credentials from src.rag.formatter import build_meeting_text_only, format_summary_markdown from src.rag.parser import classify_meeting, generate_meeting_brief def _summary_body(summary_path: Path) -> str: if not summary_path.exists(): return "" text = summary_path.read_text(encoding="utf-8") if "## Суть" not in text: return "" return text.split("## Суть", 1)[-1].strip() def _find_meeting_jobs(org_slug: str) -> list[dict]: meetings_dir = PROCESSED_ROOT / org_slug / MEETINGS_DIRNAME if not meetings_dir.exists(): return [] jobs = [] for folder in sorted(meetings_dir.iterdir()): if not folder.is_dir(): continue segments_files = list(folder.glob("*_segments.json")) if not segments_files: continue segments_path = segments_files[0] stem = segments_path.name[: -len("_segments.json")] summary_path = folder / f"{stem}_summary.md" jobs.append({ "folder": folder, "stem": stem, "segments_path": segments_path, "summary_path": summary_path, "display_name": f"{stem}.webm", }) return jobs async def _regenerate_job(job: dict, config: dict, dry_run: bool) -> bool: body = _summary_body(job["summary_path"]) if body: return False segments = json.loads(job["segments_path"].read_text(encoding="utf-8")) meeting_text = build_meeting_text_only(segments) rag_cfg = config.get("rag", {}) api_key, base_url = resolve_opencode_credentials(config) meta_path = job["folder"] / ".project.json" project = "unknown" if meta_path.exists(): try: project = json.loads(meta_path.read_text(encoding="utf-8")).get("project_slug", project) except Exception: pass if not api_key: print(f"[skip] {job['folder'].name}: нет API-ключа") return False if dry_run: print(f"[dry-run] {job['folder'].name}") return True sections = rag_cfg.get("sections", ["Общие вопросы"]) metadata = await classify_meeting( text=meeting_text, project=project, sections=sections, api_key=api_key, base_url=base_url, model=rag_cfg.get("index_model", "mimo-v2.5-free"), chunk_size=int(rag_cfg.get("classify_chunk_size", 7000)), ) brief = await generate_meeting_brief( text=meeting_text, metadata=metadata, api_key=api_key, base_url=base_url, model=rag_cfg.get("summary_model", "deepseek-v4-flash-free"), chunk_size=int(rag_cfg.get("summary_chunk_size", 10000)), ) if not (brief or "").strip(): print(f"[warn] {job['folder'].name}: LLM вернул пустой brief") return False summary_md = format_summary_markdown(metadata, brief, job["display_name"]) job["summary_path"].write_text(summary_md, encoding="utf-8") print(f"[ok] {job['folder'].name} ({len(brief)} символов)") return True async def main(): parser = argparse.ArgumentParser(description="Backfill пустых summary совещаний") parser.add_argument("--org", default="merakom", help="org_slug") parser.add_argument("--dry-run", action="store_true", help="Только показать, что будет обновлено") args = parser.parse_args() config = load_config() jobs = _find_meeting_jobs(args.org) updated = 0 for job in jobs: if await _regenerate_job(job, config, args.dry_run): updated += 1 print(f"Готово: {updated} из {len(jobs)} совещаний") if __name__ == "__main__": asyncio.run(main())