mirror of
https://github.com/ggerganov/whisper.cpp.git
synced 2024-12-24 06:46:37 +00:00
Adding sanitizer tests
This commit is contained in:
parent
29b041f79b
commit
2f069335ab
6
.github/workflows/build.yml
vendored
6
.github/workflows/build.yml
vendored
@ -61,7 +61,7 @@ jobs:
|
|||||||
- name: Build
|
- name: Build
|
||||||
run: |
|
run: |
|
||||||
make
|
make
|
||||||
ctest --output-on-failure
|
ctest -L gh --output-on-failure
|
||||||
|
|
||||||
ubuntu-latest-clang:
|
ubuntu-latest-clang:
|
||||||
runs-on: ubuntu-latest
|
runs-on: ubuntu-latest
|
||||||
@ -87,7 +87,7 @@ jobs:
|
|||||||
- name: Build
|
- name: Build
|
||||||
run: |
|
run: |
|
||||||
make
|
make
|
||||||
ctest --output-on-failure
|
ctest -L gh --output-on-failure
|
||||||
|
|
||||||
ubuntu-latest-gcc-sanitized:
|
ubuntu-latest-gcc-sanitized:
|
||||||
runs-on: ubuntu-latest
|
runs-on: ubuntu-latest
|
||||||
@ -112,4 +112,4 @@ jobs:
|
|||||||
- name: Build
|
- name: Build
|
||||||
run: |
|
run: |
|
||||||
make
|
make
|
||||||
ctest --output-on-failure
|
ctest -L gh --output-on-failure
|
||||||
|
1
.gitignore
vendored
1
.gitignore
vendored
@ -4,3 +4,4 @@ stream
|
|||||||
*.o
|
*.o
|
||||||
.cache
|
.cache
|
||||||
build/
|
build/
|
||||||
|
compile_commands.json
|
||||||
|
@ -0,0 +1,62 @@
|
|||||||
|
set(TEST_TARGET test-main-tiny)
|
||||||
|
add_test(NAME ${TEST_TARGET}
|
||||||
|
COMMAND $<TARGET_FILE:main>
|
||||||
|
-m ${PROJECT_SOURCE_DIR}/models/for-tests-ggml-tiny.bin
|
||||||
|
-f ${PROJECT_SOURCE_DIR}/samples/jfk.wav)
|
||||||
|
set_tests_properties(${TEST_TARGET} PROPERTIES LABELS "tiny;gh")
|
||||||
|
|
||||||
|
set(TEST_TARGET test-main-tiny.en)
|
||||||
|
add_test(NAME ${TEST_TARGET}
|
||||||
|
COMMAND $<TARGET_FILE:main>
|
||||||
|
-m ${PROJECT_SOURCE_DIR}/models/for-tests-ggml-tiny.en.bin
|
||||||
|
-f ${PROJECT_SOURCE_DIR}/samples/jfk.wav)
|
||||||
|
set_tests_properties(${TEST_TARGET} PROPERTIES LABELS "tiny;en;gh")
|
||||||
|
|
||||||
|
set(TEST_TARGET test-main-base)
|
||||||
|
add_test(NAME ${TEST_TARGET}
|
||||||
|
COMMAND $<TARGET_FILE:main>
|
||||||
|
-m ${PROJECT_SOURCE_DIR}/models/for-tests-ggml-base.bin
|
||||||
|
-f ${PROJECT_SOURCE_DIR}/samples/jfk.wav)
|
||||||
|
set_tests_properties(${TEST_TARGET} PROPERTIES LABELS "base")
|
||||||
|
|
||||||
|
set(TEST_TARGET test-main-base.en)
|
||||||
|
add_test(NAME ${TEST_TARGET}
|
||||||
|
COMMAND $<TARGET_FILE:main>
|
||||||
|
-m ${PROJECT_SOURCE_DIR}/models/for-tests-ggml-base.en.bin
|
||||||
|
-f ${PROJECT_SOURCE_DIR}/samples/jfk.wav)
|
||||||
|
set_tests_properties(${TEST_TARGET} PROPERTIES LABELS "base;en")
|
||||||
|
|
||||||
|
set(TEST_TARGET test-main-small)
|
||||||
|
add_test(NAME ${TEST_TARGET}
|
||||||
|
COMMAND $<TARGET_FILE:main>
|
||||||
|
-m ${PROJECT_SOURCE_DIR}/models/for-tests-ggml-small.bin
|
||||||
|
-f ${PROJECT_SOURCE_DIR}/samples/jfk.wav)
|
||||||
|
set_tests_properties(${TEST_TARGET} PROPERTIES LABELS "small")
|
||||||
|
|
||||||
|
set(TEST_TARGET test-main-small.en)
|
||||||
|
add_test(NAME ${TEST_TARGET}
|
||||||
|
COMMAND $<TARGET_FILE:main>
|
||||||
|
-m ${PROJECT_SOURCE_DIR}/models/for-tests-ggml-small.en.bin
|
||||||
|
-f ${PROJECT_SOURCE_DIR}/samples/jfk.wav)
|
||||||
|
set_tests_properties(${TEST_TARGET} PROPERTIES LABELS "small;en")
|
||||||
|
|
||||||
|
set(TEST_TARGET test-main-medium)
|
||||||
|
add_test(NAME ${TEST_TARGET}
|
||||||
|
COMMAND $<TARGET_FILE:main>
|
||||||
|
-m ${PROJECT_SOURCE_DIR}/models/for-tests-ggml-medium.bin
|
||||||
|
-f ${PROJECT_SOURCE_DIR}/samples/jfk.wav)
|
||||||
|
set_tests_properties(${TEST_TARGET} PROPERTIES LABELS "medium")
|
||||||
|
|
||||||
|
set(TEST_TARGET test-main-medium.en)
|
||||||
|
add_test(NAME ${TEST_TARGET}
|
||||||
|
COMMAND $<TARGET_FILE:main>
|
||||||
|
-m ${PROJECT_SOURCE_DIR}/models/for-tests-ggml-medium.en.bin
|
||||||
|
-f ${PROJECT_SOURCE_DIR}/samples/jfk.wav)
|
||||||
|
set_tests_properties(${TEST_TARGET} PROPERTIES LABELS "medium;en")
|
||||||
|
|
||||||
|
set(TEST_TARGET test-main-large)
|
||||||
|
add_test(NAME ${TEST_TARGET}
|
||||||
|
COMMAND $<TARGET_FILE:main>
|
||||||
|
-m ${PROJECT_SOURCE_DIR}/models/for-tests-ggml-large.bin
|
||||||
|
-f ${PROJECT_SOURCE_DIR}/samples/jfk.wav)
|
||||||
|
set_tests_properties(${TEST_TARGET} PROPERTIES LABELS "large")
|
11
whisper.cpp
11
whisper.cpp
@ -950,6 +950,7 @@ bool whisper_model_load(const std::string & fname, whisper_context & wctx) {
|
|||||||
|
|
||||||
// load weights
|
// load weights
|
||||||
{
|
{
|
||||||
|
int n_loaded = 0;
|
||||||
size_t total_size = 0;
|
size_t total_size = 0;
|
||||||
|
|
||||||
while (true) {
|
while (true) {
|
||||||
@ -1004,9 +1005,17 @@ bool whisper_model_load(const std::string & fname, whisper_context & wctx) {
|
|||||||
|
|
||||||
//printf("%24s - [%5d, %5d], type = %6s, %6.2f MB\n", name.data(), ne[0], ne[1], ftype == 0 ? "float" : "f16", ggml_nbytes(tensor)/1024.0/1024.0);
|
//printf("%24s - [%5d, %5d], type = %6s, %6.2f MB\n", name.data(), ne[0], ne[1], ftype == 0 ? "float" : "f16", ggml_nbytes(tensor)/1024.0/1024.0);
|
||||||
total_size += ggml_nbytes(tensor);
|
total_size += ggml_nbytes(tensor);
|
||||||
|
n_loaded++;
|
||||||
}
|
}
|
||||||
|
|
||||||
printf("%s: model size = %8.2f MB\n", __func__, total_size/1024.0/1024.0);
|
printf("%s: model size = %8.2f MB\n", __func__, total_size/1024.0/1024.0);
|
||||||
|
|
||||||
|
if (n_loaded == 0) {
|
||||||
|
printf("%s: WARN no tensors loaded from model file - assuming empty model for testing\n", __func__);
|
||||||
|
} else if (n_loaded != model.tensors.size()) {
|
||||||
|
fprintf(stderr, "%s: ERROR not all tensors loaded from model file - expected %zu, got %d\n", __func__, model.tensors.size(), n_loaded);
|
||||||
|
return false;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
fin.close();
|
fin.close();
|
||||||
@ -1772,8 +1781,6 @@ bool whisper_decode(
|
|||||||
}
|
}
|
||||||
|
|
||||||
// the most basic sampling scheme - select the top token
|
// the most basic sampling scheme - select the top token
|
||||||
// TODO: beam search
|
|
||||||
// TODO: temperature
|
|
||||||
whisper_vocab::id whisper_sample_best(
|
whisper_vocab::id whisper_sample_best(
|
||||||
const whisper_vocab & vocab,
|
const whisper_vocab & vocab,
|
||||||
const float * probs, bool need_timestamp) {
|
const float * probs, bool need_timestamp) {
|
||||||
|
11
whisper.h
11
whisper.h
@ -71,11 +71,12 @@ extern "C" {
|
|||||||
// return the id of the specified language, returns -1 if not found
|
// return the id of the specified language, returns -1 if not found
|
||||||
WHISPER_API int whisper_lang_id(const char * lang);
|
WHISPER_API int whisper_lang_id(const char * lang);
|
||||||
|
|
||||||
WHISPER_API int whisper_n_len (struct whisper_context * ctx); // mel length
|
WHISPER_API int whisper_n_len (struct whisper_context * ctx); // mel length
|
||||||
WHISPER_API int whisper_n_vocab (struct whisper_context * ctx);
|
WHISPER_API int whisper_n_vocab (struct whisper_context * ctx);
|
||||||
WHISPER_API int whisper_n_text_ctx (struct whisper_context * ctx);
|
WHISPER_API int whisper_n_text_ctx (struct whisper_context * ctx);
|
||||||
WHISPER_API int whisper_is_multilingual(struct whisper_context * ctx);
|
WHISPER_API int whisper_is_multilingual(struct whisper_context * ctx);
|
||||||
WHISPER_API float * whisper_get_probs (struct whisper_context * ctx);
|
|
||||||
|
WHISPER_API float * whisper_get_probs(struct whisper_context * ctx);
|
||||||
|
|
||||||
WHISPER_API const char * whisper_token_to_str(struct whisper_context * ctx, whisper_token token);
|
WHISPER_API const char * whisper_token_to_str(struct whisper_context * ctx, whisper_token token);
|
||||||
|
|
||||||
|
Loading…
Reference in New Issue
Block a user