transcription/scripts/setup_local_models.ps1
keboss-m fee9b9acb1 Add RAG, summary pipeline, and split transcribe/postprocess queue.
Separate ASR (2 workers) from summary/RAG post-processing, add LightRAG chat API, batch upload fixes, and local model mounts for Docker deployment.

Co-authored-by: Cursor <cursoragent@cursor.com>
2026-06-01 17:40:58 +03:00

58 lines
2.8 KiB
PowerShell

# Подготовка локальной папки models/ для офлайн-развёртывания.
# Использование: .\scripts\setup_local_models.ps1
$ErrorActionPreference = "Stop"
$ProjectRoot = Split-Path $PSScriptRoot -Parent
$ModelsDir = Join-Path $ProjectRoot "models"
$NltkBackup = Join-Path (Split-Path $ProjectRoot -Parent) ".nltk_data_backup"
Write-Host "==> Папка моделей: $ModelsDir"
New-Item -ItemType Directory -Force -Path $ModelsDir | Out-Null
# 1. NLTK (punkt, punkt_tab)
$NltkTarget = Join-Path $ModelsDir "nltk_data"
if (-not (Test-Path "$NltkTarget\tokenizers\punkt")) {
if (Test-Path $NltkBackup) {
Write-Host "==> Копирование NLTK data из $NltkBackup"
New-Item -ItemType Directory -Force -Path $NltkTarget | Out-Null
Copy-Item -Recurse -Force "$NltkBackup\*" $NltkTarget
} else {
Write-Warning "NLTK backup не найден: $NltkBackup"
}
} else {
Write-Host "==> NLTK data уже на месте"
}
# 2. HuggingFace модели из Docker-образа (если есть)
$HubTarget = Join-Path $ModelsDir "huggingface\hub"
$Marker = Join-Path $HubTarget "models--Systran--faster-whisper-large-v3"
if (-not (Test-Path $Marker)) {
$Image = "transcription-transcription:latest"
$Exists = docker images -q $Image 2>$null
if ($Exists) {
Write-Host "==> Извлечение моделей из Docker-образа $Image (~5 GB, несколько минут)..."
docker rm -f model_extract 2>$null | Out-Null
docker create --name model_extract $Image | Out-Null
New-Item -ItemType Directory -Force -Path (Split-Path $HubTarget -Parent) | Out-Null
docker cp model_extract:/root/.cache/huggingface (Join-Path $ModelsDir "huggingface")
docker rm model_extract | Out-Null
Write-Host "==> Модели Whisper/Alignment/Diarization скопированы"
} else {
Write-Warning "Образ $Image не найден. Сначала соберите: docker compose build"
Write-Warning "Или запустите: python scripts/download_models.py (скачает модели из интернета)"
}
} else {
Write-Host "==> HuggingFace модели уже на месте"
}
# 3. Sentence-transformers для RAG (скачивается при первом запуске, опционально предзагрузка)
Write-Host ""
Write-Host "Готово. Структура:"
Get-ChildItem $ModelsDir -Directory | ForEach-Object {
$size = (Get-ChildItem $_.FullName -Recurse -File -ErrorAction SilentlyContinue | Measure-Object Length -Sum).Sum
Write-Host (" {0}: {1:N2} GB" -f $_.Name, ($size / 1GB))
}
Write-Host ""
Write-Host "Запуск сервиса: docker compose up --build -d"