From 0244810697c7f1d0bd210f8ce386e7cc8fed0362 Mon Sep 17 00:00:00 2001 From: Georgi Gerganov Date: Sun, 26 Mar 2023 16:09:06 +0300 Subject: [PATCH] rebase on master after whisper_state changes --- whisper.cpp | 73 +++++++++++++++++++++++++++-------------------------- 1 file changed, 37 insertions(+), 36 deletions(-) diff --git a/whisper.cpp b/whisper.cpp index 11300c3b..401d9454 100644 --- a/whisper.cpp +++ b/whisper.cpp @@ -589,7 +589,6 @@ struct whisper_state { int lang_id = 0; // english by default - std::string path_model; // populated by whisper_init_from_file() #ifdef WHISPER_USE_COREML whisper_coreml_context * ctx_coreml; #endif @@ -644,6 +643,8 @@ struct whisper_context { whisper_model model; whisper_vocab vocab; whisper_state * state = nullptr; + + std::string path_model; // populated by whisper_init_from_file() }; template @@ -1606,7 +1607,7 @@ static bool whisper_encode_internal( ggml_repeat(ctx0, layer.mlp_ln_w, cur), cur), ggml_repeat(ctx0, layer.mlp_ln_b, cur)); - } + } #ifdef WHISPER_USE_FLASH_FF wstate.use_buf(ctx0, 0); @@ -1646,7 +1647,7 @@ static bool whisper_encode_internal( ggml_repeat(ctx0, layer.mlp_1_b, cur), cur); #endif -} + } wstate.use_buf(ctx0, 3); @@ -1684,11 +1685,11 @@ static bool whisper_encode_internal( //ggml_graph_print(&gf); } #else - wctx.use_buf(ctx0, -1); + wstate.use_buf(ctx0, -1); struct ggml_tensor * cur = ggml_new_tensor_2d(ctx0, GGML_TYPE_F32, n_state, n_ctx); - whisper_coreml_encode(wctx.ctx_coreml, (float *) mel->data, (float *) cur->data); + whisper_coreml_encode(wstate.ctx_coreml, (float *) mel->data, (float *) cur->data); #endif // cur @@ -2493,12 +2494,25 @@ static std::vector tokenize(const whisper_vocab & vocab, cons // interface implementation // +#ifdef WHISPER_USE_COREML +// replace .bin with .mlmodelc +static std::string whisper_get_coreml_path(std::string path_bin) { + auto pos = path_bin.rfind('.'); + if (pos != std::string::npos) { + path_bin = path_bin.substr(0, pos); + } + + path_bin += ".mlmodelc"; + + return path_bin; +} +#endif + struct whisper_state * whisper_init_state(whisper_context * ctx) { whisper_state * state = new whisper_state; const size_t scale = ctx->model.hparams.f16 ? 1 : 2; - if (!kv_cache_init(ctx->model.hparams, scale * MEM_REQ_KV_SELF.at(ctx->model.type), state->decoders[0].kv_self, ctx->wtype, ctx->model.hparams.n_text_ctx)) { fprintf(stderr, "%s: kv_cache_init() failed for self-attention cache\n", __func__); return nullptr; @@ -2519,6 +2533,20 @@ struct whisper_state * whisper_init_state(whisper_context * ctx) { fprintf(stderr, "%s: kv cross size = %7.2f MB\n", __func__, memory_size / 1024.0 / 1024.0); } +#ifdef WHISPER_USE_COREML + const auto path_coreml = whisper_get_coreml_path(ctx->path_model); + + fprintf(stderr, "%s: loading Core ML model from '%s'\n", __func__, path_coreml.c_str()); + fprintf(stderr, "%s: first run on a device may take a while ...\n", __func__); + + state->ctx_coreml = whisper_coreml_init(path_coreml.c_str()); + if (!state->ctx_coreml) { + fprintf(stderr, "%s: failed to load Core ML model from '%s'\n", __func__, path_coreml.c_str()); + return nullptr; + } + + fprintf(stderr, "%s: Core ML model loaded\n", __func__); +#endif state->logits.reserve(ctx->vocab.n_vocab * ctx->model.hparams.n_text_ctx); @@ -2542,20 +2570,6 @@ struct whisper_state * whisper_init_state(whisper_context * ctx) { return state; } -#ifdef WHISPER_USE_COREML -// replace .bin with .mlmodelc -static std::string whisper_get_coreml_path(std::string path_bin) { - auto pos = path_bin.rfind('.'); - if (pos != std::string::npos) { - path_bin = path_bin.substr(0, pos); - } - - path_bin += ".mlmodelc"; - - return path_bin; -} -#endif - struct whisper_context * whisper_init_from_file_no_state(const char * path_model) { whisper_model_loader loader = {}; @@ -2585,23 +2599,10 @@ struct whisper_context * whisper_init_from_file_no_state(const char * path_model fin->close(); }; - auto ctx = whisper_init(&loader); + auto ctx = whisper_init_no_state(&loader); if (ctx) { ctx->path_model = path_model; -#ifdef WHISPER_USE_COREML - const auto path_coreml = whisper_get_coreml_path(ctx->path_model); - fprintf(stderr, "%s: loading Core ML model from '%s'\n", __func__, path_coreml.c_str()); - fprintf(stderr, "%s: first run on a device may take a while ...\n", __func__); - - ctx->ctx_coreml = whisper_coreml_init(path_coreml.c_str()); - if (!ctx->ctx_coreml) { - fprintf(stderr, "%s: failed to load Core ML model from '%s'\n", __func__, path_coreml.c_str()); - return nullptr; - } - - fprintf(stderr, "%s: Core ML model loaded\n", __func__); -#endif } return ctx; @@ -2730,8 +2731,8 @@ void whisper_free(struct whisper_context * ctx) { whisper_free_state(ctx->state); #ifdef WHISPER_USE_COREML - whisper_coreml_free(ctx->ctx_coreml); - ctx->ctx_coreml = nullptr; + whisper_coreml_free(ctx->state->ctx_coreml); + ctx->state->ctx_coreml = nullptr; #endif delete ctx; }