whisper : add integer quantization support (#540)

* whisper : add integer quantization support

* examples : add common-ggml + prepare to add "quantize" tool

* whisper : quantization tool ready

* whisper : fix F32 support

* whisper : try to fix shared lib linkage

* wasm : update quantized models to Q5

* bench.wasm : remove "medium" button

* bench.wasm : fix custom model button

* ggml : add Q5_0 and Q5_1 WASM SIMD

* wasm : add quantized models to all WASM examples

* wasm : bump DB version number to 2

* talk-llama : update example to latest llama.cpp

* node : increase test timeout to 10s

* readme : add information for model quantization

* wasm : add links to other examples
This commit is contained in:
Georgi Gerganov
2023-04-30 18:51:57 +03:00
committed by GitHub
parent 5fd1bdd7fc
commit 794b162a46
41 changed files with 3183 additions and 1010 deletions

View File

@ -10,6 +10,12 @@ std::thread g_worker;
std::vector<struct whisper_context *> g_contexts(4, nullptr);
static inline int mpow2(int n) {
int p = 1;
while (p <= n) p *= 2;
return p/2;
}
EMSCRIPTEN_BINDINGS(whisper) {
emscripten::function("init", emscripten::optional_override([](const std::string & path_model) {
if (g_worker.joinable()) {
@ -43,7 +49,7 @@ EMSCRIPTEN_BINDINGS(whisper) {
}
}));
emscripten::function("full_default", emscripten::optional_override([](size_t index, const emscripten::val & audio, const std::string & lang, bool translate) {
emscripten::function("full_default", emscripten::optional_override([](size_t index, const emscripten::val & audio, const std::string & lang, int nthreads, bool translate) {
if (g_worker.joinable()) {
g_worker.join();
}
@ -66,7 +72,7 @@ EMSCRIPTEN_BINDINGS(whisper) {
params.print_special = false;
params.translate = translate;
params.language = whisper_is_multilingual(g_contexts[index]) ? lang.c_str() : "en";
params.n_threads = std::min(8, (int) std::thread::hardware_concurrency());
params.n_threads = std::min(nthreads, std::min(16, mpow2(std::thread::hardware_concurrency())));
params.offset_ms = 0;
std::vector<float> pcmf32;