vad : add download-vad-model scripts (#3149)

* vad : add download-vad-model scripts

This commit adds a script to download VAD models.

* vad : add vad model download script for windows [no ci]

Refs: https://github.com/ggml-org/whisper.cpp/issues/3146
This commit is contained in:
Daniel Bevenius 2025-05-14 16:47:18 +02:00 committed by GitHub
parent 3882a099e1
commit 96d791ae61
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
2 changed files with 204 additions and 0 deletions

View File

@ -0,0 +1,99 @@
@echo off
rem Save the original working directory
set "orig_dir=%CD%"
rem Get the script directory
set "script_dir=%~dp0"
rem Check if the script directory contains "\bin\" (case-insensitive)
echo %script_dir% | findstr /i "\\bin\\" >nul
if %ERRORLEVEL%==0 (
rem If script is in a \bin\ directory, use the original working directory as default download path
set "default_download_path=%orig_dir%"
) else (
rem Otherwise, use script directory
pushd %~dp0
set "default_download_path=%CD%"
popd
)
rem Set the root path to be the parent directory of the script
for %%d in (%~dp0..) do set "root_path=%%~fd"
rem Count number of arguments passed to script
set argc=0
for %%x in (%*) do set /A argc+=1
set models=silero-v5.1.2
rem If argc is not equal to 1 or 2, print usage information and exit
if %argc% NEQ 1 (
if %argc% NEQ 2 (
echo.
echo Usage: download-vad-model.cmd model [models_path]
CALL :list_models
goto :eof
)
)
if %argc% EQU 2 (
set models_path=%2
) else (
set models_path=%default_download_path%
)
set model=%1
for %%b in (%models%) do (
if "%%b"=="%model%" (
CALL :download_model
goto :eof
)
)
echo Invalid model: %model%
CALL :list_models
goto :eof
:download_model
echo Downloading vad model %model%...
if exist "%models_path%\\ggml-%model%.bin" (
echo Model %model% already exists. Skipping download.
goto :eof
)
PowerShell -NoProfile -ExecutionPolicy Bypass -Command "Start-BitsTransfer -Source https://huggingface.co/ggml-org/whisper-vad/resolve/main/ggml-%model%.bin -Destination \"%models_path%\\ggml-%model%.bin\""
if %ERRORLEVEL% neq 0 (
echo Failed to download ggml model %model%
echo Please try again later or download the original Whisper model files and convert them yourself.
goto :eof
)
rem Check if 'whisper-cli' is available in the system PATH
where whisper-cli >nul 2>&1
if %ERRORLEVEL%==0 (
rem If found, suggest 'whisper-cli' (relying on PATH resolution)
set "whisper_cmd=whisper-cli"
) else (
rem If not found, suggest the local build version
set "whisper_cmd=%root_path%\build\bin\Release\whisper-cli.exe"
)
echo Done! Model %model% saved in %models_path%\ggml-%model%.bin
echo You can now use it like this:
echo %whisper_cmd% -vm %models_path%\ggml-%model%.bin --vad -m models/ggml-base.en.bin -f samples\jfk.wav
goto :eof
:list_models
echo.
echo Available models:
(for %%a in (%models%) do (
echo %%a
))
echo.
exit /b

105
models/download-vad-model.sh Executable file
View File

@ -0,0 +1,105 @@
#!/bin/sh
# This script downloads Whisper VAD model files that have already been converted
# to ggml format. This way you don't have to convert them yourself.
src="https://huggingface.co/ggml-org/whisper-vad"
pfx="resolve/main/ggml"
BOLD="\033[1m"
RESET='\033[0m'
# get the path of this script
get_script_path() {
if [ -x "$(command -v realpath)" ]; then
dirname "$(realpath "$0")"
else
_ret="$(cd -- "$(dirname "$0")" >/dev/null 2>&1 || exit ; pwd -P)"
echo "$_ret"
fi
}
script_path="$(get_script_path)"
# Check if the script is inside a /bin/ directory
case "$script_path" in
*/bin) default_download_path="$PWD" ;; # Use current directory as default download path if in /bin/
*) default_download_path="$script_path" ;; # Otherwise, use script directory
esac
models_path="${2:-$default_download_path}"
# Whisper VAD models
models="silero-v5.1.2"
# list available models
list_models() {
printf "\n"
printf "Available models:"
model_class=""
for model in $models; do
this_model_class="${model%%[.-]*}"
if [ "$this_model_class" != "$model_class" ]; then
printf "\n "
model_class=$this_model_class
fi
printf " %s" "$model"
done
printf "\n\n"
}
if [ "$#" -lt 1 ] || [ "$#" -gt 2 ]; then
printf "Usage: %s <model> [models_path]\n" "$0"
list_models
exit 1
fi
model=$1
if ! echo "$models" | grep -q -w "$model"; then
printf "Invalid model: %s\n" "$model"
list_models
exit 1
fi
# download ggml model
printf "Downloading ggml model %s from '%s' ...\n" "$model" "$src"
cd "$models_path" || exit
if [ -f "ggml-$model.bin" ]; then
printf "Model %s already exists. Skipping download.\n" "$model"
exit 0
fi
if [ -x "$(command -v wget2)" ]; then
wget2 --no-config --progress bar -O ggml-"$model".bin $src/$pfx-"$model".bin
elif [ -x "$(command -v wget)" ]; then
wget --no-config --quiet --show-progress -O ggml-"$model".bin $src/$pfx-"$model".bin
elif [ -x "$(command -v curl)" ]; then
curl -L --output ggml-"$model".bin $src/$pfx-"$model".bin
else
printf "Either wget or curl is required to download models.\n"
exit 1
fi
if [ $? -ne 0 ]; then
printf "Failed to download ggml model %s \n" "$model"
printf "Please try again later or download the original Whisper model files and convert them yourself.\n"
exit 1
fi
# Check if 'whisper-cli' is available in the system PATH
if command -v whisper-cli >/dev/null 2>&1; then
# If found, use 'whisper-cli' (relying on PATH resolution)
whisper_cmd="whisper-cli"
else
# If not found, use the local build version
whisper_cmd="./build/bin/whisper-cli"
fi
printf "Done! Model '%s' saved in '%s/ggml-%s.bin'\n" "$model" "$models_path" "$model"
printf "You can now use it like this:\n\n"
printf " $ %s -vm %s/ggml-%s.bin --vad -f samples/jfk.wav -m models/ggml-base.en.bin\n" "$whisper_cmd" "$models_path" "$model"
printf "\n"