whisper : add support for large v3 (#1444)

* whisper : add support for large v3

* bench : fix build + fix go bindings

* bench : fix n_mels

* models : update readme
This commit is contained in:
Georgi Gerganov
2023-11-07 15:30:18 +02:00
committed by GitHub
parent 973111088b
commit 2cdfc4e025
20 changed files with 70 additions and 38 deletions

View File

@ -228,7 +228,7 @@ with np.load(dir_whisper / "whisper" / "assets" / "mel_filters.npz") as f:
# for backwards compatibility, also check for older hf_transformers format tokenizer files
# old format: dir_whisper/whisper/assets/[multilingual/gpt2]/vocab.json
# new format: dir_whisper/whisper/assets/[multilingual/gpt2].tiktoken
multilingual = hparams["n_vocab"] == 51865
multilingual = hparams["n_vocab"] >= 51865
tokenizer = dir_whisper / "whisper" / "assets" / (multilingual and "multilingual.tiktoken" or "gpt2.tiktoken")
tokenizer_type = "tiktoken"
if not tokenizer.is_file():