58 lines
2.8 KiB
PowerShell
58 lines
2.8 KiB
PowerShell
|
|
# Подготовка локальной папки models/ для офлайн-развёртывания.
|
||
|
|
# Использование: .\scripts\setup_local_models.ps1
|
||
|
|
|
||
|
|
$ErrorActionPreference = "Stop"
|
||
|
|
$ProjectRoot = Split-Path $PSScriptRoot -Parent
|
||
|
|
$ModelsDir = Join-Path $ProjectRoot "models"
|
||
|
|
$NltkBackup = Join-Path (Split-Path $ProjectRoot -Parent) ".nltk_data_backup"
|
||
|
|
|
||
|
|
Write-Host "==> Папка моделей: $ModelsDir"
|
||
|
|
|
||
|
|
New-Item -ItemType Directory -Force -Path $ModelsDir | Out-Null
|
||
|
|
|
||
|
|
# 1. NLTK (punkt, punkt_tab)
|
||
|
|
$NltkTarget = Join-Path $ModelsDir "nltk_data"
|
||
|
|
if (-not (Test-Path "$NltkTarget\tokenizers\punkt")) {
|
||
|
|
if (Test-Path $NltkBackup) {
|
||
|
|
Write-Host "==> Копирование NLTK data из $NltkBackup"
|
||
|
|
New-Item -ItemType Directory -Force -Path $NltkTarget | Out-Null
|
||
|
|
Copy-Item -Recurse -Force "$NltkBackup\*" $NltkTarget
|
||
|
|
} else {
|
||
|
|
Write-Warning "NLTK backup не найден: $NltkBackup"
|
||
|
|
}
|
||
|
|
} else {
|
||
|
|
Write-Host "==> NLTK data уже на месте"
|
||
|
|
}
|
||
|
|
|
||
|
|
# 2. HuggingFace модели из Docker-образа (если есть)
|
||
|
|
$HubTarget = Join-Path $ModelsDir "huggingface\hub"
|
||
|
|
$Marker = Join-Path $HubTarget "models--Systran--faster-whisper-large-v3"
|
||
|
|
if (-not (Test-Path $Marker)) {
|
||
|
|
$Image = "transcription-transcription:latest"
|
||
|
|
$Exists = docker images -q $Image 2>$null
|
||
|
|
if ($Exists) {
|
||
|
|
Write-Host "==> Извлечение моделей из Docker-образа $Image (~5 GB, несколько минут)..."
|
||
|
|
docker rm -f model_extract 2>$null | Out-Null
|
||
|
|
docker create --name model_extract $Image | Out-Null
|
||
|
|
New-Item -ItemType Directory -Force -Path (Split-Path $HubTarget -Parent) | Out-Null
|
||
|
|
docker cp model_extract:/root/.cache/huggingface (Join-Path $ModelsDir "huggingface")
|
||
|
|
docker rm model_extract | Out-Null
|
||
|
|
Write-Host "==> Модели Whisper/Alignment/Diarization скопированы"
|
||
|
|
} else {
|
||
|
|
Write-Warning "Образ $Image не найден. Сначала соберите: docker compose build"
|
||
|
|
Write-Warning "Или запустите: python scripts/download_models.py (скачает модели из интернета)"
|
||
|
|
}
|
||
|
|
} else {
|
||
|
|
Write-Host "==> HuggingFace модели уже на месте"
|
||
|
|
}
|
||
|
|
|
||
|
|
# 3. Sentence-transformers для RAG (скачивается при первом запуске, опционально предзагрузка)
|
||
|
|
Write-Host ""
|
||
|
|
Write-Host "Готово. Структура:"
|
||
|
|
Get-ChildItem $ModelsDir -Directory | ForEach-Object {
|
||
|
|
$size = (Get-ChildItem $_.FullName -Recurse -File -ErrorAction SilentlyContinue | Measure-Object Length -Sum).Sum
|
||
|
|
Write-Host (" {0}: {1:N2} GB" -f $_.Name, ($size / 1GB))
|
||
|
|
}
|
||
|
|
Write-Host ""
|
||
|
|
Write-Host "Запуск сервиса: docker compose up --build -d"
|