56 lines
2.4 KiB
Bash
56 lines
2.4 KiB
Bash
|
|
#!/usr/bin/env bash
|
||
|
|
# scripts/pull_models.sh — загрузка моделей (rsync из источника + fallback на download).
|
||
|
|
#
|
||
|
|
# Использование:
|
||
|
|
# MODELS_SOURCE=user@host:/opt/transcription/models/ bash scripts/pull_models.sh
|
||
|
|
# bash scripts/pull_models.sh # если MODELS_SOURCE не задан — попробует скачать из интернета
|
||
|
|
#
|
||
|
|
# Переменные окружения:
|
||
|
|
# MODELS_SOURCE — rsync-источник (user@host:path) — приоритетный способ
|
||
|
|
# MODELS_DIR — локальная папка (по умолчанию ./models)
|
||
|
|
# FORCE_DOWNLOAD — если "1", игнорирует кэш и качает заново
|
||
|
|
|
||
|
|
set -euo pipefail
|
||
|
|
|
||
|
|
SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
|
||
|
|
PROJECT_ROOT="$(cd "$SCRIPT_DIR/.." && pwd)"
|
||
|
|
MODELS_DIR="${MODELS_DIR:-$PROJECT_ROOT/models}"
|
||
|
|
MARKER="$MODELS_DIR/huggingface/hub/models--Systran--faster-whisper-large-v3"
|
||
|
|
|
||
|
|
# Если модели уже есть — выходим
|
||
|
|
if [ -d "$MARKER" ] && [ "${FORCE_DOWNLOAD:-0}" != "1" ]; then
|
||
|
|
echo "[pull_models] Models already present at $MODELS_DIR — skipping"
|
||
|
|
exit 0
|
||
|
|
fi
|
||
|
|
|
||
|
|
# 1. Приоритет: rsync
|
||
|
|
if [ -n "${MODELS_SOURCE:-}" ]; then
|
||
|
|
echo "[pull_models] Pulling models from $MODELS_SOURCE ..."
|
||
|
|
mkdir -p "$MODELS_DIR/huggingface"
|
||
|
|
rsync -avz --progress "$MODELS_SOURCE/huggingface/" "$MODELS_DIR/huggingface/"
|
||
|
|
rsync -avz --progress "$MODELS_SOURCE/nltk_data/" "$MODELS_DIR/nltk_data/" 2>/dev/null || true
|
||
|
|
echo "[pull_models] Done."
|
||
|
|
exit 0
|
||
|
|
fi
|
||
|
|
|
||
|
|
# 2. Fallback: скачать из интернета (если rsync не задан)
|
||
|
|
echo "[pull_models] MODELS_SOURCE not set, falling back to download_models.py ..."
|
||
|
|
cd "$PROJECT_ROOT"
|
||
|
|
|
||
|
|
# Проверяем, что python доступен
|
||
|
|
if ! command -v python >/dev/null 2>&1; then
|
||
|
|
echo "[pull_models] ERROR: python not found in PATH" >&2
|
||
|
|
exit 1
|
||
|
|
fi
|
||
|
|
|
||
|
|
# Устанавливаем минимальный набор для download
|
||
|
|
pip install --quiet --no-cache-dir openai-whisper 2>/dev/null || true
|
||
|
|
pip install --quiet --no-cache-dir "whisperx @ git+https://github.com/m-bain/whisperx.git" 2>/dev/null || true
|
||
|
|
|
||
|
|
python scripts/download_models.py || {
|
||
|
|
echo "[pull_models] ERROR: download_models.py failed." >&2
|
||
|
|
echo "Hint: set MODELS_SOURCE=user@host:/opt/transcription/models/ and retry." >&2
|
||
|
|
exit 1
|
||
|
|
}
|
||
|
|
echo "[pull_models] Downloaded to $MODELS_DIR"
|