models : make all scripts to be POSIX Compliant (#1725)

* download-coreml-model: make it POSIX-compliant

* download-ggml-model: posix compliant (2nd)

* minor edit

* forgot to add newline

* generate-coreml-interface: far more straightforward

* generate-coreml-model: done with the posix thingy

* typo

* Update download-ggml-model.sh

* fix

* fix typo

* another fix

* Update download-coreml-model.sh

* Update download-ggml-model.sh

* Update download-coreml-model.sh
This commit is contained in:
Sơn Phan Trung 2024-01-12 19:11:04 +07:00 committed by GitHub
parent 6dcee35129
commit d05b7ee90e
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
4 changed files with 81 additions and 80 deletions

View File

@ -1,4 +1,4 @@
#!/bin/bash #!/bin/sh
# This script downloads Whisper model files that have already been converted to Core ML format. # This script downloads Whisper model files that have already been converted to Core ML format.
# This way you don't have to convert them yourself. # This way you don't have to convert them yourself.
@ -7,32 +7,32 @@ src="https://huggingface.co/datasets/ggerganov/whisper.cpp-coreml"
pfx="resolve/main/ggml" pfx="resolve/main/ggml"
# get the path of this script # get the path of this script
function get_script_path() { get_script_path() {
if [ -x "$(command -v realpath)" ]; then if [ -x "$(command -v realpath)" ]; then
echo "$(dirname $(realpath $0))" dirname "$(realpath "$0")"
else else
local ret="$(cd -- "$(dirname "$0")" >/dev/null 2>&1 ; pwd -P)" _ret="$(cd -- "$(dirname "$0")" >/dev/null 2>&1 || exit ; pwd -P)"
echo "$ret" echo "$_ret"
fi fi
} }
models_path="$(get_script_path)" models_path="$(get_script_path)"
# Whisper models # Whisper models
models=( "tiny.en" "tiny" "base.en" "base" "small.en" "small" "medium.en" "medium" "large-v1" "large-v2" "large-v3" ) models="tiny.en tiny base.en base small.en small medium.en medium large-v1 large-v2 large-v3"
# list available models # list available models
function list_models { list_models() {
printf "\n" printf "\n"
printf " Available models:" printf " Available models:"
for model in "${models[@]}"; do for model in $models; do
printf " $model" printf " %s" "$models"
done done
printf "\n\n" printf "\n\n"
} }
if [ "$#" -ne 1 ]; then if [ "$#" -ne 1 ]; then
printf "Usage: $0 <model>\n" printf "Usage: %s <model>\n" "$0"
list_models list_models
exit 1 exit 1
@ -40,8 +40,8 @@ fi
model=$1 model=$1
if [[ ! " ${models[@]} " =~ " ${model} " ]]; then if ! echo "$models" | grep -q -w "$model"; then
printf "Invalid model: $model\n" printf "Invalid model: %s\n" "$model"
list_models list_models
exit 1 exit 1
@ -49,19 +49,19 @@ fi
# download Core ML model # download Core ML model
printf "Downloading Core ML model $model from '$src' ...\n" printf "Downloading Core ML model %s from '%s' ...\n" "$model" "$src"
cd $models_path cd "$models_path" || exit
if [ -f "ggml-$model.mlmodel" ]; then if [ -f "ggml-$model.mlmodel" ]; then
printf "Model $model already exists. Skipping download.\n" printf "Model %s already exists. Skipping download.\n" "$model"
exit 0 exit 0
fi fi
if [ -x "$(command -v wget)" ]; then if [ -x "$(command -v wget)" ]; then
wget --quiet --show-progress -O ggml-$model.mlmodel $src/$pfx-$model.mlmodel wget --quiet --show-progress -O ggml-"$model".mlmodel $src/$pfx-"$model".mlmodel
elif [ -x "$(command -v curl)" ]; then elif [ -x "$(command -v curl)" ]; then
curl -L --output ggml-$model.mlmodel $src/$pfx-$model.mlmodel curl -L --output ggml-"$model".mlmodel $src/$pfx-"$model".mlmodel
else else
printf "Either wget or curl is required to download models.\n" printf "Either wget or curl is required to download models.\n"
exit 1 exit 1
@ -69,14 +69,14 @@ fi
if [ $? -ne 0 ]; then if [ $? -ne 0 ]; then
printf "Failed to download Core ML model $model \n" printf "Failed to download Core ML model %s \n" "$model"
printf "Please try again later or download the original Whisper model files and convert them yourself.\n" printf "Please try again later or download the original Whisper model files and convert them yourself.\n"
exit 1 exit 1
fi fi
printf "Done! Model '$model' saved in 'models/ggml-$model.mlmodel'\n" printf "Done! Model '%s' saved in 'models/ggml-%s.mlmodel'\n" "$model" "$model"
printf "Run the following command to compile it:\n\n" printf "Run the following command to compile it:\n\n"
printf " $ xcrun coremlc compile ./models/ggml-$model.mlmodel ./models\n\n" printf " $ xcrun coremlc compile ./models/ggml-%s.mlmodel ./models\n\n" "$model"
printf "You can now use it like this:\n\n" printf "You can now use it like this:\n\n"
printf " $ ./main -m models/ggml-$model.bin -f samples/jfk.wav\n" printf " $ ./main -m models/ggml-%s.bin -f samples/jfk.wav\n" "$model"
printf "\n" printf "\n"

View File

@ -1,4 +1,4 @@
#!/bin/bash #!/bin/sh
# This script downloads Whisper model files that have already been converted to ggml format. # This script downloads Whisper model files that have already been converted to ggml format.
# This way you don't have to convert them yourself. # This way you don't have to convert them yourself.
@ -10,54 +10,52 @@ src="https://huggingface.co/ggerganov/whisper.cpp"
pfx="resolve/main/ggml" pfx="resolve/main/ggml"
# get the path of this script # get the path of this script
function get_script_path() { get_script_path() {
if [ -x "$(command -v realpath)" ]; then if [ -x "$(command -v realpath)" ]; then
echo "$(dirname "$(realpath "$0")")" dirname "$(realpath "$0")"
else else
local ret="$(cd -- "$(dirname "$0")" >/dev/null 2>&1 ; pwd -P)" _ret="$(cd -- "$(dirname "$0")" >/dev/null 2>&1 || exit ; pwd -P)"
echo "$ret" echo "$_ret"
fi fi
} }
models_path="${2:-$(get_script_path)}" models_path="${2:-$(get_script_path)}"
# Whisper models # Whisper models
models=( models="tiny.en
"tiny.en" tiny
"tiny" tiny-q5_1
"tiny-q5_1" tiny.en-q5_1
"tiny.en-q5_1" base.en
"base.en" base
"base" base-q5_1
"base-q5_1" base.en-q5_1
"base.en-q5_1" small.en
"small.en" small.en-tdrz
"small.en-tdrz" small
"small" small-q5_1
"small-q5_1" small.en-q5_1
"small.en-q5_1" medium
"medium" medium.en
"medium.en" medium-q5_0
"medium-q5_0" medium.en-q5_0
"medium.en-q5_0" large-v1
"large-v1" large-v2
"large-v2" large-v3
"large-v3" large-v3-q5_0"
"large-v3-q5_0"
)
# list available models # list available models
function list_models { list_models() {
printf "\n" printf "\n"
printf " Available models:" printf " Available models:"
for model in "${models[@]}"; do for model in $models; do
printf " $model" printf " %s" "$model"
done done
printf "\n\n" printf "\n\n"
} }
if [ "$#" -lt 1 ] || [ "$#" -gt 2 ]; then if [ "$#" -lt 1 ] || [ "$#" -gt 2 ]; then
printf "Usage: $0 <model> [models_path]\n" printf "Usage: %s <model> [models_path]\n" "$0"
list_models list_models
exit 1 exit 1
@ -65,34 +63,36 @@ fi
model=$1 model=$1
if [[ ! " ${models[@]} " =~ " ${model} " ]]; then if ! echo "$models" | grep -q -w "$model"; then
printf "Invalid model: $model\n" printf "Invalid model: %s\n" "$model"
list_models list_models
exit 1 exit 1
fi fi
# check if model contains `tdrz` and update the src and pfx accordingly # check if model contains `tdrz` and update the src and pfx accordingly
if [[ $model == *"tdrz"* ]]; then if echo "$model" | grep -q "tdrz"; then
src="https://huggingface.co/akashmjn/tinydiarize-whisper.cpp" src="https://huggingface.co/akashmjn/tinydiarize-whisper.cpp"
pfx="resolve/main/ggml" pfx="resolve/main/ggml"
fi fi
echo "$model" | grep -q '^"tdrz"*$'
# download ggml model # download ggml model
printf "Downloading ggml model $model from '$src' ...\n" printf "Downloading ggml model %s from '%s' ...\n" "$model" "$src"
cd "$models_path" cd "$models_path" || exit
if [ -f "ggml-$model.bin" ]; then if [ -f "ggml-$model.bin" ]; then
printf "Model $model already exists. Skipping download.\n" printf "Model %s already exists. Skipping download.\n" "$model"
exit 0 exit 0
fi fi
if [ -x "$(command -v wget)" ]; then if [ -x "$(command -v wget)" ]; then
wget --no-config --quiet --show-progress -O ggml-$model.bin $src/$pfx-$model.bin wget --no-config --quiet --show-progress -O ggml-"$model".bin $src/$pfx-"$model".bin
elif [ -x "$(command -v curl)" ]; then elif [ -x "$(command -v curl)" ]; then
curl -L --output ggml-$model.bin $src/$pfx-$model.bin curl -L --output ggml-"$model".bin $src/$pfx-"$model".bin
else else
printf "Either wget or curl is required to download models.\n" printf "Either wget or curl is required to download models.\n"
exit 1 exit 1
@ -100,12 +100,13 @@ fi
if [ $? -ne 0 ]; then if [ $? -ne 0 ]; then
printf "Failed to download ggml model $model \n" printf "Failed to download ggml model %s \n" "$model"
printf "Please try again later or download the original Whisper model files and convert them yourself.\n" printf "Please try again later or download the original Whisper model files and convert them yourself.\n"
exit 1 exit 1
fi fi
printf "Done! Model '$model' saved in '$models_path/ggml-$model.bin'\n"
printf "Done! Model '%s' saved in '%s/ggml-%s.bin'\n" "$model" "$models_path" "$model"
printf "You can now use it like this:\n\n" printf "You can now use it like this:\n\n"
printf " $ ./main -m $models_path/ggml-$model.bin -f samples/jfk.wav\n" printf " $ ./main -m %s/ggml-%s.bin -f samples/jfk.wav\n" "$models_path" "$model"
printf "\n" printf "\n"

View File

@ -1,4 +1,4 @@
#!/bin/bash #!/bin/sh
# #
# This generates: # This generates:
# - coreml/whisper-encoder-impl.h and coreml/whisper-encoder-impl.m # - coreml/whisper-encoder-impl.h and coreml/whisper-encoder-impl.m
@ -6,7 +6,7 @@
# #
wd=$(dirname "$0") wd=$(dirname "$0")
cd "$wd/../" cd "$wd/../" || exit
python3 models/convert-whisper-to-coreml.py --model tiny.en python3 models/convert-whisper-to-coreml.py --model tiny.en

View File

@ -1,4 +1,4 @@
#!/bin/bash #!/bin/sh
# Usage: ./generate-coreml-model.sh <model-name> # Usage: ./generate-coreml-model.sh <model-name>
if [ $# -eq 0 ]; then if [ $# -eq 0 ]; then
@ -6,7 +6,7 @@ if [ $# -eq 0 ]; then
echo "Usage for Whisper models: ./generate-coreml-model.sh <model-name>" echo "Usage for Whisper models: ./generate-coreml-model.sh <model-name>"
echo "Usage for HuggingFace models: ./generate-coreml-model.sh -h5 <model-name> <model-path>" echo "Usage for HuggingFace models: ./generate-coreml-model.sh -h5 <model-name> <model-path>"
exit 1 exit 1
elif [[ "$1" == "-h5" && $# != 3 ]]; then elif [ "$1" = "-h5" ] && [ $# != 3 ]; then
echo "No model name and model path supplied for a HuggingFace model" echo "No model name and model path supplied for a HuggingFace model"
echo "Usage for HuggingFace models: ./generate-coreml-model.sh -h5 <model-name> <model-path>" echo "Usage for HuggingFace models: ./generate-coreml-model.sh -h5 <model-name> <model-path>"
exit 1 exit 1
@ -15,20 +15,20 @@ fi
mname="$1" mname="$1"
wd=$(dirname "$0") wd=$(dirname "$0")
cd "$wd/../" cd "$wd/../" || exit
if [[ $mname == "-h5" ]]; then if [ "$mname" = "-h5" ]; then
mname="$2" mname="$2"
mpath="$3" mpath="$3"
echo $mpath echo "$mpath"
python3 models/convert-h5-to-coreml.py --model-name $mname --model-path $mpath --encoder-only True python3 models/convert-h5-to-coreml.py --model-name "$mname" --model-path "$mpath" --encoder-only True
else else
python3 models/convert-whisper-to-coreml.py --model $mname --encoder-only True --optimize-ane True python3 models/convert-whisper-to-coreml.py --model "$mname" --encoder-only True --optimize-ane True
fi fi
xcrun coremlc compile models/coreml-encoder-${mname}.mlpackage models/ xcrun coremlc compile models/coreml-encoder-"${mname}".mlpackage models/
rm -rf models/ggml-${mname}-encoder.mlmodelc rm -rf models/ggml-"${mname}"-encoder.mlmodelc
mv -v models/coreml-encoder-${mname}.mlmodelc models/ggml-${mname}-encoder.mlmodelc mv -v models/coreml-encoder-"${mname}".mlmodelc models/ggml-"${mname}"-encoder.mlmodelc
# TODO: decoder (sometime in the future maybe) # TODO: decoder (sometime in the future maybe)
#xcrun coremlc compile models/whisper-decoder-${mname}.mlpackage models/ #xcrun coremlc compile models/whisper-decoder-${mname}.mlpackage models/