From bfbaa4dce5cf1de2e1ec965f4cfbed11c39d3a77 Mon Sep 17 00:00:00 2001 From: Georgi Gerganov Date: Wed, 15 Nov 2023 19:42:25 +0200 Subject: [PATCH] whisper : make large version explicit + fix data size units (#1493) --- Makefile | 4 +- README.md | 16 +++---- .../go/examples/go-model-download/main.go | 2 +- examples/livestream.sh | 2 +- examples/twitch.sh | 2 +- extra/convert-all.sh | 2 +- ggml-metal.m | 18 ++++---- models/README.md | 26 +++++------ models/convert-h5-to-coreml.py | 4 +- models/convert-whisper-to-coreml.py | 4 +- models/convert-whisper-to-openvino.py | 4 +- models/download-coreml-model.sh | 2 +- models/download-ggml-model.cmd | 2 +- models/download-ggml-model.sh | 4 +- tests/run-tests.sh | 2 +- whisper.cpp | 44 +++++++++---------- 16 files changed, 69 insertions(+), 69 deletions(-) diff --git a/Makefile b/Makefile index 23dbb4f3..5358f063 100644 --- a/Makefile +++ b/Makefile @@ -418,9 +418,9 @@ samples: .PHONY: medium .PHONY: large-v1 .PHONY: large-v2 -.PHONY: large +.PHONY: large-v3 -tiny.en tiny base.en base small.en small medium.en medium large-v1 large-v2 large: main +tiny.en tiny base.en base small.en small medium.en medium large-v1 large-v2 large-v3: main bash ./models/download-ggml-model.sh $@ @echo "" @echo "===============================================" diff --git a/README.md b/README.md index e2c247c4..ff97ab10 100644 --- a/README.md +++ b/README.md @@ -231,18 +231,18 @@ make medium.en make medium make large-v1 make large-v2 -make large +make large-v3 ``` ## Memory usage -| Model | Disk | Mem | SHA | -| --- | --- | --- | --- | -| tiny | 75 MB | ~125 MB | `bd577a113a864445d4c299885e0cb97d4ba92b5f` | -| base | 142 MB | ~210 MB | `465707469ff3a37a2b9b8d8f89f2f99de7299dac` | -| small | 466 MB | ~600 MB | `55356645c2b361a969dfd0ef2c5a50d530afd8d5` | -| medium | 1.5 GB | ~1.7 GB | `fd9727b6e1217c2f614f9b698455c4ffd82463b4` | -| large | 2.9 GB | ~3.3 GB | `ad82bf6a9043ceed055076d0fd39f5f186ff8062` | +| Model | Disk | Mem | +| --- | --- | --- | +| tiny | 75 MiB | ~273 MB | +| base | 142 MiB | ~388 MB | +| small | 466 MiB | ~852 MB | +| medium | 1.5 GiB | ~2.1 GB | +| large | 2.9 GiB | ~3.9 GB | ## Quantization diff --git a/bindings/go/examples/go-model-download/main.go b/bindings/go/examples/go-model-download/main.go index d3e45c28..3522d881 100644 --- a/bindings/go/examples/go-model-download/main.go +++ b/bindings/go/examples/go-model-download/main.go @@ -24,7 +24,7 @@ const ( var ( // The models which will be downloaded, if no model is specified as an argument - modelNames = []string{"ggml-tiny.en", "ggml-tiny", "ggml-base.en", "ggml-base", "ggml-small.en", "ggml-small", "ggml-medium.en", "ggml-medium", "ggml-large-v1", "ggml-large-v2", "ggml-large"} + modelNames = []string{"ggml-tiny.en", "ggml-tiny", "ggml-base.en", "ggml-base", "ggml-small.en", "ggml-small", "ggml-medium.en", "ggml-medium", "ggml-large-v1", "ggml-large-v2", "ggml-large-v3"} ) var ( diff --git a/examples/livestream.sh b/examples/livestream.sh index d86a7c60..fbeb6dbc 100755 --- a/examples/livestream.sh +++ b/examples/livestream.sh @@ -48,7 +48,7 @@ if [ -n "$3" ]; then fi # Whisper models -models=( "tiny.en" "tiny" "base.en" "base" "small.en" "small" "medium.en" "medium" "large-v1" "large-v2" "large" ) +models=( "tiny.en" "tiny" "base.en" "base" "small.en" "small" "medium.en" "medium" "large-v1" "large-v2" "large-v3" ) # list available models function list_models { diff --git a/examples/twitch.sh b/examples/twitch.sh index 77b618dd..0403fea9 100755 --- a/examples/twitch.sh +++ b/examples/twitch.sh @@ -21,7 +21,7 @@ help() echo "Usage: ./twitch.sh -s [step] -m [model] -t [threads] [url]" echo "options:" echo "-s Step in seconds (default is $step)." - echo "-m Choose model, options are: 'tiny.en' 'tiny' 'base.en' 'base' 'small.en' 'small' 'medium.en' 'medium' 'large-v1' 'large-v2' 'large' (default is '$model')." + echo "-m Choose model, options are: 'tiny.en' 'tiny' 'base.en' 'base' 'small.en' 'small' 'medium.en' 'medium' 'large-v1' 'large-v2' 'large-v3' (default is '$model')." echo "-t Number of threads to use." echo "-h Print this help page." echo diff --git a/extra/convert-all.sh b/extra/convert-all.sh index c9638079..ff765c92 100755 --- a/extra/convert-all.sh +++ b/extra/convert-all.sh @@ -1,6 +1,6 @@ #!/bin/bash -models=( "tiny.en" "tiny" "base.en" "base" "small.en" "small" "medium.en" "medium" "large-v1" "large-v2" "large" ) +models=( "tiny.en" "tiny" "base.en" "base" "small.en" "small" "medium.en" "medium" "large-v1" "large-v2" "large-v3" ) for model in "${models[@]}"; do python3 models/convert-pt-to-ggml.py ~/.cache/whisper/$model.pt ../whisper models/ diff --git a/ggml-metal.m b/ggml-metal.m index 3d22b0b2..4fe9cc48 100644 --- a/ggml-metal.m +++ b/ggml-metal.m @@ -346,9 +346,9 @@ struct ggml_metal_context * ggml_metal_init(int n_cb) { } GGML_METAL_LOG_INFO("%s: hasUnifiedMemory = %s\n", __func__, ctx->device.hasUnifiedMemory ? "true" : "false"); - GGML_METAL_LOG_INFO("%s: recommendedMaxWorkingSetSize = %8.2f MB\n", __func__, ctx->device.recommendedMaxWorkingSetSize / 1024.0 / 1024.0); + GGML_METAL_LOG_INFO("%s: recommendedMaxWorkingSetSize = %8.2f MB\n", __func__, ctx->device.recommendedMaxWorkingSetSize / 1e6); if (ctx->device.maxTransferRate != 0) { - GGML_METAL_LOG_INFO("%s: maxTransferRate = %8.2f MB/s\n", __func__, ctx->device.maxTransferRate / 1024.0 / 1024.0); + GGML_METAL_LOG_INFO("%s: maxTransferRate = %8.2f MB/s\n", __func__, ctx->device.maxTransferRate / 1e6); } else { GGML_METAL_LOG_INFO("%s: maxTransferRate = built-in GPU\n", __func__); } @@ -541,11 +541,11 @@ bool ggml_metal_add_buffer( ctx->buffers[ctx->n_buffers].metal = [ctx->device newBufferWithBytesNoCopy:data length:size_aligned options:MTLResourceStorageModeShared deallocator:nil]; if (ctx->buffers[ctx->n_buffers].metal == nil) { - GGML_METAL_LOG_ERROR("%s: error: failed to allocate '%-16s' buffer, size = %8.2f MB\n", __func__, name, size_aligned / 1024.0 / 1024.0); + GGML_METAL_LOG_ERROR("%s: error: failed to allocate '%-16s' buffer, size = %8.2f MB\n", __func__, name, size_aligned / 1e6); return false; } - GGML_METAL_LOG_INFO("%s: allocated '%-16s' buffer, size = %8.2f MB", __func__, name, size_aligned / 1024.0 / 1024.0); + GGML_METAL_LOG_INFO("%s: allocated '%-16s' buffer, size = %8.2f MB", __func__, name, size_aligned / 1e6); ++ctx->n_buffers; } else { @@ -565,11 +565,11 @@ bool ggml_metal_add_buffer( ctx->buffers[ctx->n_buffers].metal = [ctx->device newBufferWithBytesNoCopy:(void *) ((uint8_t *) data + i) length:size_step_aligned options:MTLResourceStorageModeShared deallocator:nil]; if (ctx->buffers[ctx->n_buffers].metal == nil) { - GGML_METAL_LOG_ERROR("%s: error: failed to allocate '%-16s' buffer, size = %8.2f MB\n", __func__, name, size_step_aligned / 1024.0 / 1024.0); + GGML_METAL_LOG_ERROR("%s: error: failed to allocate '%-16s' buffer, size = %8.2f MB\n", __func__, name, size_step_aligned / 1e6); return false; } - GGML_METAL_LOG_INFO("%s: allocated '%-16s' buffer, size = %8.2f MB, offs = %12ld", __func__, name, size_step_aligned / 1024.0 / 1024.0, i); + GGML_METAL_LOG_INFO("%s: allocated '%-16s' buffer, size = %8.2f MB, offs = %12ld", __func__, name, size_step_aligned / 1e6, i); if (i + size_step < size) { GGML_METAL_LOG_INFO("\n"); } @@ -580,8 +580,8 @@ bool ggml_metal_add_buffer( #if TARGET_OS_OSX GGML_METAL_LOG_INFO(", (%8.2f / %8.2f)", - ctx->device.currentAllocatedSize / 1024.0 / 1024.0, - ctx->device.recommendedMaxWorkingSetSize / 1024.0 / 1024.0); + ctx->device.currentAllocatedSize / 1e6, + ctx->device.recommendedMaxWorkingSetSize / 1e6); if (ctx->device.currentAllocatedSize > ctx->device.recommendedMaxWorkingSetSize) { GGML_METAL_LOG_WARN("%s: warning: current allocated size is greater than the recommended max working set size\n", __func__); @@ -589,7 +589,7 @@ bool ggml_metal_add_buffer( GGML_METAL_LOG_INFO("\n"); } #else - GGML_METAL_LOG_INFO(", (%8.2f)\n", ctx->device.currentAllocatedSize / 1024.0 / 1024.0); + GGML_METAL_LOG_INFO(", (%8.2f)\n", ctx->device.currentAllocatedSize / 1e6); #endif } diff --git a/models/README.md b/models/README.md index b12f2d22..de4a13fe 100644 --- a/models/README.md +++ b/models/README.md @@ -39,19 +39,19 @@ https://huggingface.co/ggerganov/whisper.cpp/tree/main ## Available models -| Model | Disk | Mem | SHA | -| --- | --- | --- | --- | -| tiny | 75 MB | ~390 MB | `bd577a113a864445d4c299885e0cb97d4ba92b5f` | -| tiny.en | 75 MB | ~390 MB | `c78c86eb1a8faa21b369bcd33207cc90d64ae9df` | -| base | 142 MB | ~500 MB | `465707469ff3a37a2b9b8d8f89f2f99de7299dac` | -| base.en | 142 MB | ~500 MB | `137c40403d78fd54d454da0f9bd998f78703390c` | -| small | 466 MB | ~1.0 GB | `55356645c2b361a969dfd0ef2c5a50d530afd8d5` | -| small.en | 466 MB | ~1.0 GB | `db8a495a91d927739e50b3fc1cc4c6b8f6c2d022` | -| medium | 1.5 GB | ~2.6 GB | `fd9727b6e1217c2f614f9b698455c4ffd82463b4` | -| medium.en | 1.5 GB | ~2.6 GB | `8c30f0e44ce9560643ebd10bbe50cd20eafd3723` | -| large-v1 | 2.9 GB | ~4.7 GB | `b1caaf735c4cc1429223d5a74f0f4d0b9b59a299` | -| large-v2 | 2.9 GB | ~4.7 GB | `0f4c8e34f21cf1a914c59d8b3ce882345ad349d6` | -| large | 2.9 GB | ~4.7 GB | `ad82bf6a9043ceed055076d0fd39f5f186ff8062` | +| Model | Disk | SHA | +| --- | --- | --- | +| tiny | 75 MiB | `bd577a113a864445d4c299885e0cb97d4ba92b5f` | +| tiny.en | 75 MiB | `c78c86eb1a8faa21b369bcd33207cc90d64ae9df` | +| base | 142 MiB | `465707469ff3a37a2b9b8d8f89f2f99de7299dac` | +| base.en | 142 MiB | `137c40403d78fd54d454da0f9bd998f78703390c` | +| small | 466 MiB | `55356645c2b361a969dfd0ef2c5a50d530afd8d5` | +| small.en | 466 MiB | `db8a495a91d927739e50b3fc1cc4c6b8f6c2d022` | +| medium | 1.5 GiB | `fd9727b6e1217c2f614f9b698455c4ffd82463b4` | +| medium.en | 1.5 GiB | `8c30f0e44ce9560643ebd10bbe50cd20eafd3723` | +| large-v1 | 2.9 GiB | `b1caaf735c4cc1429223d5a74f0f4d0b9b59a299` | +| large-v2 | 2.9 GiB | `0f4c8e34f21cf1a914c59d8b3ce882345ad349d6` | +| large-v3 | 2.9 GiB | `ad82bf6a9043ceed055076d0fd39f5f186ff8062` | ## Model files for testing purposes diff --git a/models/convert-h5-to-coreml.py b/models/convert-h5-to-coreml.py index 3887c22a..57341ab0 100644 --- a/models/convert-h5-to-coreml.py +++ b/models/convert-h5-to-coreml.py @@ -78,14 +78,14 @@ def convert_hf_whisper(hf_model_name_or_path: str, whisper_state_path: str): # Ported from models/convert-whisper-to-coreml.py if __name__ == "__main__": parser = argparse.ArgumentParser() - parser.add_argument("--model-name", type=str, help="name of model to convert (e.g. tiny, tiny.en, base, base.en, small, small.en, medium, medium.en, large, large-v1, large-v2)", required=True) + parser.add_argument("--model-name", type=str, help="name of model to convert (e.g. tiny, tiny.en, base, base.en, small, small.en, medium, medium.en, large-v1, large-v2, large-v3)", required=True) parser.add_argument("--model-path", type=str, help="path to the model (e.g. if published on HuggingFace: Oblivion208/whisper-tiny-cantonese)", required=True) parser.add_argument("--encoder-only", type=bool, help="only convert encoder", default=False) parser.add_argument("--quantize", type=bool, help="quantize weights to F16", default=False) parser.add_argument("--optimize-ane", type=bool, help="optimize for ANE execution (currently broken)", default=False) args = parser.parse_args() - if args.model_name not in ["tiny", "tiny.en", "base", "base.en", "small", "small.en", "medium", "medium.en", "large", "large-v1", "large-v2"]: + if args.model_name not in ["tiny", "tiny.en", "base", "base.en", "small", "small.en", "medium", "medium.en", "large-v1", "large-v2", "large-v3"]: raise ValueError("Invalid model name") pt_target_path = f"models/hf-{args.model_name}.pt" diff --git a/models/convert-whisper-to-coreml.py b/models/convert-whisper-to-coreml.py index 7e09f5ba..fd7191ab 100644 --- a/models/convert-whisper-to-coreml.py +++ b/models/convert-whisper-to-coreml.py @@ -296,13 +296,13 @@ def convert_decoder(hparams, model, quantize=False): if __name__ == "__main__": parser = argparse.ArgumentParser() - parser.add_argument("--model", type=str, help="model to convert (e.g. tiny, tiny.en, base, base.en, small, small.en, medium, medium.en, large, large-v1, large-v2)", required=True) + parser.add_argument("--model", type=str, help="model to convert (e.g. tiny, tiny.en, base, base.en, small, small.en, medium, medium.en, large-v1, large-v2, large-v3)", required=True) parser.add_argument("--encoder-only", type=bool, help="only convert encoder", default=False) parser.add_argument("--quantize", type=bool, help="quantize weights to F16", default=False) parser.add_argument("--optimize-ane", type=bool, help="optimize for ANE execution (currently broken)", default=False) args = parser.parse_args() - if args.model not in ["tiny", "tiny.en", "base", "base.en", "small", "small.en", "small.en-tdrz", "medium", "medium.en", "large", "large-v1", "large-v2"]: + if args.model not in ["tiny", "tiny.en", "base", "base.en", "small", "small.en", "small.en-tdrz", "medium", "medium.en", "large-v1", "large-v2", "large-v3"]: raise ValueError("Invalid model name") whisper = load_model(args.model).cpu() diff --git a/models/convert-whisper-to-openvino.py b/models/convert-whisper-to-openvino.py index 88e03ff7..1a4ad304 100644 --- a/models/convert-whisper-to-openvino.py +++ b/models/convert-whisper-to-openvino.py @@ -38,10 +38,10 @@ def convert_encoder(hparams, encoder, mname): if __name__ == "__main__": parser = argparse.ArgumentParser() - parser.add_argument("--model", type=str, help="model to convert (e.g. tiny, tiny.en, base, base.en, small, small.en, medium, medium.en, large, large-v1, large-v2)", required=True) + parser.add_argument("--model", type=str, help="model to convert (e.g. tiny, tiny.en, base, base.en, small, small.en, medium, medium.en, large-v1, large-v2, large-v3)", required=True) args = parser.parse_args() - if args.model not in ["tiny", "tiny.en", "base", "base.en", "small", "small.en", "medium", "medium.en", "large", "large-v1", "large-v2"]: + if args.model not in ["tiny", "tiny.en", "base", "base.en", "small", "small.en", "medium", "medium.en", "large-v1", "large-v2", "large-v3"]: raise ValueError("Invalid model name") whisper = load_model(args.model).cpu() diff --git a/models/download-coreml-model.sh b/models/download-coreml-model.sh index 95739dbf..9e67a150 100755 --- a/models/download-coreml-model.sh +++ b/models/download-coreml-model.sh @@ -19,7 +19,7 @@ function get_script_path() { models_path="$(get_script_path)" # Whisper models -models=( "tiny.en" "tiny" "base.en" "base" "small.en" "small" "medium.en" "medium" "large-v1" "large-v2" "large" ) +models=( "tiny.en" "tiny" "base.en" "base" "small.en" "small" "medium.en" "medium" "large-v1" "large-v2" "large-v3" ) # list available models function list_models { diff --git a/models/download-ggml-model.cmd b/models/download-ggml-model.cmd index fc279967..4d21531d 100644 --- a/models/download-ggml-model.cmd +++ b/models/download-ggml-model.cmd @@ -8,7 +8,7 @@ popd set argc=0 for %%x in (%*) do set /A argc+=1 -set models=tiny.en tiny base.en base small.en small medium.en medium large-v1 large-v2 large +set models=tiny.en tiny base.en base small.en small medium.en medium large-v1 large-v2 large-v3 if %argc% neq 1 ( echo. diff --git a/models/download-ggml-model.sh b/models/download-ggml-model.sh index ea68da89..9c0119c3 100755 --- a/models/download-ggml-model.sh +++ b/models/download-ggml-model.sh @@ -22,7 +22,7 @@ function get_script_path() { models_path="$(get_script_path)" # Whisper models -models=( +models=( "tiny.en" "tiny" "tiny-q5_1" @@ -42,7 +42,7 @@ models=( "medium.en-q5_0" "large-v1" "large-v2" - "large" + "large-v3" "large-q5_0" ) diff --git a/tests/run-tests.sh b/tests/run-tests.sh index bf062dd6..a9606a1f 100755 --- a/tests/run-tests.sh +++ b/tests/run-tests.sh @@ -19,7 +19,7 @@ cd `dirname $0` # Whisper models -models=( "tiny.en" "tiny" "base.en" "base" "small.en" "small" "medium.en" "medium" "large-v1" "large-v2" "large" ) +models=( "tiny.en" "tiny" "base.en" "base" "small.en" "small" "medium.en" "medium" "large-v1" "large-v2" "large-v3" ) # list available models function list_models { diff --git a/whisper.cpp b/whisper.cpp index a3e0fbd0..acedee0d 100644 --- a/whisper.cpp +++ b/whisper.cpp @@ -1522,7 +1522,7 @@ static bool whisper_model_load(struct whisper_model_loader * loader, whisper_con model.buffer = ggml_backend_alloc_buffer(wctx.backend, size_main); - WHISPER_LOG_INFO("%s: %8s buffer size = %8.2f MB\n", __func__, ggml_backend_name(wctx.backend), size_main / 1024.0 / 1024.0); + WHISPER_LOG_INFO("%s: %8s buffer size = %8.2f MB\n", __func__, ggml_backend_name(wctx.backend), size_main / 1e6); } ggml_allocr * alloc = ggml_allocr_new_from_buffer(model.buffer); @@ -1637,12 +1637,12 @@ static bool whisper_model_load(struct whisper_model_loader * loader, whisper_con ggml_backend_tensor_set(tensor, read_buf.data(), 0, ggml_nbytes(tensor)); } - //printf("%48s - [%5d, %5d, %5d], type = %6s, %6.2f MB\n", name.data(), ne[0], ne[1], ne[2], ggml_type_name((ggml_type) ttype), ggml_nbytes(tensor)/1024.0/1024.0); + //printf("%48s - [%5d, %5d, %5d], type = %6s, %6.2f MB\n", name.data(), ne[0], ne[1], ne[2], ggml_type_name((ggml_type) ttype), ggml_nbytes(tensor)/1e6); total_size += ggml_nbytes(tensor); model.n_loaded++; } - WHISPER_LOG_INFO("%s: model size = %7.2f MB\n", __func__, total_size/1024.0/1024.0); + WHISPER_LOG_INFO("%s: model size = %7.2f MB\n", __func__, total_size/1e6); if (model.n_loaded == 0) { WHISPER_LOG_WARN("%s: WARN no tensors loaded from model file - assuming empty model for testing\n", __func__); @@ -2027,11 +2027,11 @@ static struct ggml_cgraph * whisper_build_graph_encoder( //////////////////////////////////////////////////////////////////////////// //printf("%s: used_mem = %f MB, %f MB, %f MB %f MB %f MB\n", __func__, - // ggml_used_mem(ctx0)/1024.0/1024.0, - // wstate.get_buf_max_mem(0)/1024.0/1024.0, - // wstate.get_buf_max_mem(1)/1024.0/1024.0, - // wstate.get_buf_max_mem(2)/1024.0/1024.0, - // wstate.get_buf_max_mem(3)/1024.0/1024.0); + // ggml_used_mem(ctx0)/1e6, + // wstate.get_buf_max_mem(0)/1e6, + // wstate.get_buf_max_mem(1)/1e6, + // wstate.get_buf_max_mem(2)/1e6, + // wstate.get_buf_max_mem(3)/1e6); ggml_free(ctx0); @@ -2613,11 +2613,11 @@ static bool whisper_decode_internal( if (batch.n_tokens > 1) { //printf("%s: used_mem = %f MB, %f MB, %f MB %f MB %f MB\n", __func__, - // ggml_used_mem(ctx0)/1024.0/1024.0, - // wstate.get_buf_max_mem(0)/1024.0/1024.0, - // wstate.get_buf_max_mem(1)/1024.0/1024.0, - // wstate.get_buf_max_mem(2)/1024.0/1024.0, - // wstate.get_buf_max_mem(3)/1024.0/1024.0); + // ggml_used_mem(ctx0)/1e6, + // wstate.get_buf_max_mem(0)/1e6, + // wstate.get_buf_max_mem(1)/1e6, + // wstate.get_buf_max_mem(2)/1e6, + // wstate.get_buf_max_mem(3)/1e6); } if (batch.n_tokens == 1) { @@ -3057,7 +3057,7 @@ struct whisper_state * whisper_init_state(whisper_context * ctx) { { const size_t memory_size = ggml_nbytes(state->kv_self.k) + ggml_nbytes(state->kv_self.v); - WHISPER_LOG_INFO("%s: kv self size = %7.2f MB\n", __func__, memory_size / 1024.0 / 1024.0); + WHISPER_LOG_INFO("%s: kv self size = %7.2f MB\n", __func__, memory_size / 1e6); } if (!kv_cache_init(ctx->model.hparams, state->kv_cross, ctx->backend, ctx->itype, ctx->model.hparams.n_audio_ctx)) { @@ -3068,7 +3068,7 @@ struct whisper_state * whisper_init_state(whisper_context * ctx) { { const size_t memory_size = ggml_nbytes(state->kv_cross.k) + ggml_nbytes(state->kv_cross.v); - WHISPER_LOG_INFO("%s: kv cross size = %7.2f MB\n", __func__, memory_size / 1024.0 / 1024.0); + WHISPER_LOG_INFO("%s: kv cross size = %7.2f MB\n", __func__, memory_size / 1e6); } #ifdef WHISPER_USE_COREML @@ -3110,7 +3110,7 @@ struct whisper_state * whisper_init_state(whisper_context * ctx) { return whisper_build_graph_conv(*ctx, *state, 0); }); - WHISPER_LOG_INFO("%s: compute buffer (conv) = %7.2f MB\n", __func__, whisper_allocr_size(state->alloc_conv) / 1024.0 / 1024.0); + WHISPER_LOG_INFO("%s: compute buffer (conv) = %7.2f MB\n", __func__, whisper_allocr_size(state->alloc_conv) / 1e6); } // encoder allocator @@ -3120,7 +3120,7 @@ struct whisper_state * whisper_init_state(whisper_context * ctx) { return whisper_build_graph_encoder(*ctx, *state); }); - WHISPER_LOG_INFO("%s: compute buffer (encode) = %7.2f MB\n", __func__, whisper_allocr_size(state->alloc_encode) / 1024.0 / 1024.0); + WHISPER_LOG_INFO("%s: compute buffer (encode) = %7.2f MB\n", __func__, whisper_allocr_size(state->alloc_encode) / 1e6); } // cross allocator @@ -3130,7 +3130,7 @@ struct whisper_state * whisper_init_state(whisper_context * ctx) { return whisper_build_graph_cross(*ctx, *state); }); - WHISPER_LOG_INFO("%s: compute buffer (cross) = %7.2f MB\n", __func__, whisper_allocr_size(state->alloc_cross) / 1024.0 / 1024.0); + WHISPER_LOG_INFO("%s: compute buffer (cross) = %7.2f MB\n", __func__, whisper_allocr_size(state->alloc_cross) / 1e6); } // decoder allocator @@ -3148,7 +3148,7 @@ struct whisper_state * whisper_init_state(whisper_context * ctx) { return whisper_build_graph_decoder(*ctx, *state, state->batch); }); - WHISPER_LOG_INFO("%s: compute buffer (decode) = %7.2f MB\n", __func__, whisper_allocr_size(state->alloc_decode) / 1024.0 / 1024.0); + WHISPER_LOG_INFO("%s: compute buffer (decode) = %7.2f MB\n", __func__, whisper_allocr_size(state->alloc_decode) / 1e6); } whisper_allocr_graph_realloc(state->alloc_conv, ctx->backend); @@ -6072,8 +6072,8 @@ WHISPER_API const char * whisper_bench_memcpy_str(int n_threads) { size_t n = 20; size_t arr = n_threads > 0 ? 1024llu : n_threads; // trick to avoid compiler optimizations - // 1GB MB array - const size_t size = arr*1024llu*1024llu; + // 1GB array + const size_t size = arr*1e9; // single-thread { @@ -6099,7 +6099,7 @@ WHISPER_API const char * whisper_bench_memcpy_str(int n_threads) { src[rand() % size] = rand() % 256; } - snprintf(strbuf, sizeof(strbuf), "memcpy: %.2f GB/s (1 thread)\n", (double) (n*size)/(tsum*1024llu*1024llu*1024llu)); + snprintf(strbuf, sizeof(strbuf), "memcpy: %.2f GB/s (1 thread)\n", (double) (n*size)/(tsum*1e9)); s += strbuf; // needed to prevent the compiler from optimizing the memcpy away