Adding sanitizer tests

This commit is contained in:
Georgi Gerganov 2022-10-08 10:56:59 +03:00
parent 29b041f79b
commit 2f069335ab
5 changed files with 81 additions and 10 deletions

View File

@ -61,7 +61,7 @@ jobs:
- name: Build
run: |
make
ctest --output-on-failure
ctest -L gh --output-on-failure
ubuntu-latest-clang:
runs-on: ubuntu-latest
@ -87,7 +87,7 @@ jobs:
- name: Build
run: |
make
ctest --output-on-failure
ctest -L gh --output-on-failure
ubuntu-latest-gcc-sanitized:
runs-on: ubuntu-latest
@ -112,4 +112,4 @@ jobs:
- name: Build
run: |
make
ctest --output-on-failure
ctest -L gh --output-on-failure

1
.gitignore vendored
View File

@ -4,3 +4,4 @@ stream
*.o
.cache
build/
compile_commands.json

View File

@ -0,0 +1,62 @@
set(TEST_TARGET test-main-tiny)
add_test(NAME ${TEST_TARGET}
COMMAND $<TARGET_FILE:main>
-m ${PROJECT_SOURCE_DIR}/models/for-tests-ggml-tiny.bin
-f ${PROJECT_SOURCE_DIR}/samples/jfk.wav)
set_tests_properties(${TEST_TARGET} PROPERTIES LABELS "tiny;gh")
set(TEST_TARGET test-main-tiny.en)
add_test(NAME ${TEST_TARGET}
COMMAND $<TARGET_FILE:main>
-m ${PROJECT_SOURCE_DIR}/models/for-tests-ggml-tiny.en.bin
-f ${PROJECT_SOURCE_DIR}/samples/jfk.wav)
set_tests_properties(${TEST_TARGET} PROPERTIES LABELS "tiny;en;gh")
set(TEST_TARGET test-main-base)
add_test(NAME ${TEST_TARGET}
COMMAND $<TARGET_FILE:main>
-m ${PROJECT_SOURCE_DIR}/models/for-tests-ggml-base.bin
-f ${PROJECT_SOURCE_DIR}/samples/jfk.wav)
set_tests_properties(${TEST_TARGET} PROPERTIES LABELS "base")
set(TEST_TARGET test-main-base.en)
add_test(NAME ${TEST_TARGET}
COMMAND $<TARGET_FILE:main>
-m ${PROJECT_SOURCE_DIR}/models/for-tests-ggml-base.en.bin
-f ${PROJECT_SOURCE_DIR}/samples/jfk.wav)
set_tests_properties(${TEST_TARGET} PROPERTIES LABELS "base;en")
set(TEST_TARGET test-main-small)
add_test(NAME ${TEST_TARGET}
COMMAND $<TARGET_FILE:main>
-m ${PROJECT_SOURCE_DIR}/models/for-tests-ggml-small.bin
-f ${PROJECT_SOURCE_DIR}/samples/jfk.wav)
set_tests_properties(${TEST_TARGET} PROPERTIES LABELS "small")
set(TEST_TARGET test-main-small.en)
add_test(NAME ${TEST_TARGET}
COMMAND $<TARGET_FILE:main>
-m ${PROJECT_SOURCE_DIR}/models/for-tests-ggml-small.en.bin
-f ${PROJECT_SOURCE_DIR}/samples/jfk.wav)
set_tests_properties(${TEST_TARGET} PROPERTIES LABELS "small;en")
set(TEST_TARGET test-main-medium)
add_test(NAME ${TEST_TARGET}
COMMAND $<TARGET_FILE:main>
-m ${PROJECT_SOURCE_DIR}/models/for-tests-ggml-medium.bin
-f ${PROJECT_SOURCE_DIR}/samples/jfk.wav)
set_tests_properties(${TEST_TARGET} PROPERTIES LABELS "medium")
set(TEST_TARGET test-main-medium.en)
add_test(NAME ${TEST_TARGET}
COMMAND $<TARGET_FILE:main>
-m ${PROJECT_SOURCE_DIR}/models/for-tests-ggml-medium.en.bin
-f ${PROJECT_SOURCE_DIR}/samples/jfk.wav)
set_tests_properties(${TEST_TARGET} PROPERTIES LABELS "medium;en")
set(TEST_TARGET test-main-large)
add_test(NAME ${TEST_TARGET}
COMMAND $<TARGET_FILE:main>
-m ${PROJECT_SOURCE_DIR}/models/for-tests-ggml-large.bin
-f ${PROJECT_SOURCE_DIR}/samples/jfk.wav)
set_tests_properties(${TEST_TARGET} PROPERTIES LABELS "large")

View File

@ -950,6 +950,7 @@ bool whisper_model_load(const std::string & fname, whisper_context & wctx) {
// load weights
{
int n_loaded = 0;
size_t total_size = 0;
while (true) {
@ -1004,9 +1005,17 @@ bool whisper_model_load(const std::string & fname, whisper_context & wctx) {
//printf("%24s - [%5d, %5d], type = %6s, %6.2f MB\n", name.data(), ne[0], ne[1], ftype == 0 ? "float" : "f16", ggml_nbytes(tensor)/1024.0/1024.0);
total_size += ggml_nbytes(tensor);
n_loaded++;
}
printf("%s: model size = %8.2f MB\n", __func__, total_size/1024.0/1024.0);
if (n_loaded == 0) {
printf("%s: WARN no tensors loaded from model file - assuming empty model for testing\n", __func__);
} else if (n_loaded != model.tensors.size()) {
fprintf(stderr, "%s: ERROR not all tensors loaded from model file - expected %zu, got %d\n", __func__, model.tensors.size(), n_loaded);
return false;
}
}
fin.close();
@ -1772,8 +1781,6 @@ bool whisper_decode(
}
// the most basic sampling scheme - select the top token
// TODO: beam search
// TODO: temperature
whisper_vocab::id whisper_sample_best(
const whisper_vocab & vocab,
const float * probs, bool need_timestamp) {

View File

@ -71,11 +71,12 @@ extern "C" {
// return the id of the specified language, returns -1 if not found
WHISPER_API int whisper_lang_id(const char * lang);
WHISPER_API int whisper_n_len (struct whisper_context * ctx); // mel length
WHISPER_API int whisper_n_vocab (struct whisper_context * ctx);
WHISPER_API int whisper_n_text_ctx (struct whisper_context * ctx);
WHISPER_API int whisper_is_multilingual(struct whisper_context * ctx);
WHISPER_API float * whisper_get_probs (struct whisper_context * ctx);
WHISPER_API int whisper_n_len (struct whisper_context * ctx); // mel length
WHISPER_API int whisper_n_vocab (struct whisper_context * ctx);
WHISPER_API int whisper_n_text_ctx (struct whisper_context * ctx);
WHISPER_API int whisper_is_multilingual(struct whisper_context * ctx);
WHISPER_API float * whisper_get_probs(struct whisper_context * ctx);
WHISPER_API const char * whisper_token_to_str(struct whisper_context * ctx, whisper_token token);