mirror of
https://github.com/ggerganov/whisper.cpp.git
synced 2025-01-18 18:56:35 +00:00
whisper : fix UB when reading buffer of length 0 bytes (#265)
This commit is contained in:
parent
f66ac6dc4f
commit
124c718c73
13
whisper.cpp
13
whisper.cpp
@ -549,13 +549,20 @@ static bool whisper_model_load(const std::string & fname, whisper_context & wctx
|
|||||||
//}
|
//}
|
||||||
|
|
||||||
std::string word;
|
std::string word;
|
||||||
|
std::vector<char> tmp;
|
||||||
for (int i = 0; i < n_vocab; i++) {
|
for (int i = 0; i < n_vocab; i++) {
|
||||||
uint32_t len;
|
uint32_t len;
|
||||||
read_safe(fin, len);
|
read_safe(fin, len);
|
||||||
|
|
||||||
std::vector<char> tmp(len); // create a buffer
|
if (len > 0) {
|
||||||
fin.read( &tmp[0], tmp.size() ); // read to buffer
|
tmp.resize(len);
|
||||||
word.assign(&tmp[0], tmp.size());
|
fin.read(&tmp[0], tmp.size()); // read to buffer
|
||||||
|
word.assign(&tmp[0], tmp.size());
|
||||||
|
} else {
|
||||||
|
// seems like we have an empty-string token in multi-language models (i = 50256)
|
||||||
|
//fprintf(stderr, "%s: warning: empty-string token in vocab, i = %d\n", __func__, i);
|
||||||
|
word = "";
|
||||||
|
}
|
||||||
|
|
||||||
vocab.token_to_id[word] = i;
|
vocab.token_to_id[word] = i;
|
||||||
vocab.id_to_token[i] = word;
|
vocab.id_to_token[i] = word;
|
||||||
|
Loading…
Reference in New Issue
Block a user