rebase on master after whisper_state changes

2025-03-16 00:55:24 +00:00 · 2023-03-26 16:09:06 +03:00 · 2023-03-26 16:09:06 +03:00 · 0244810697
commit 0244810697
parent 6efb04fc72
1 changed files with 37 additions and 36 deletions
--- a/whisper.cpp
+++ b/whisper.cpp
@ -589,7 +589,6 @@ struct whisper_state {

    int lang_id = 0; // english by default

-    std::string path_model; // populated by whisper_init_from_file()
 #ifdef WHISPER_USE_COREML
    whisper_coreml_context * ctx_coreml;
 #endif
@ -644,6 +643,8 @@ struct whisper_context {
    whisper_model model;
    whisper_vocab vocab;
    whisper_state * state = nullptr;
+
+    std::string path_model; // populated by whisper_init_from_file()
 };

 template<typename T>
@ -1606,7 +1607,7 @@ static bool whisper_encode_internal(
                        ggml_repeat(ctx0, layer.mlp_ln_w, cur),
                        cur),
                    ggml_repeat(ctx0, layer.mlp_ln_b, cur));
-    }
+            }

 #ifdef WHISPER_USE_FLASH_FF
            wstate.use_buf(ctx0, 0);
@ -1646,7 +1647,7 @@ static bool whisper_encode_internal(
                ggml_repeat(ctx0, layer.mlp_1_b, cur),
                cur);
 #endif
-}
+        }

        wstate.use_buf(ctx0, 3);

@ -1684,11 +1685,11 @@ static bool whisper_encode_internal(
        //ggml_graph_print(&gf);
    }
 #else
-    wctx.use_buf(ctx0, -1);
+    wstate.use_buf(ctx0, -1);

    struct ggml_tensor * cur = ggml_new_tensor_2d(ctx0, GGML_TYPE_F32, n_state, n_ctx);

-    whisper_coreml_encode(wctx.ctx_coreml, (float *) mel->data, (float *) cur->data);
+    whisper_coreml_encode(wstate.ctx_coreml, (float *) mel->data, (float *) cur->data);
 #endif

    // cur
@ -2493,12 +2494,25 @@ static std::vector<whisper_vocab::id> tokenize(const whisper_vocab & vocab, cons
 // interface implementation
 //

+#ifdef WHISPER_USE_COREML
+// replace .bin with .mlmodelc
+static std::string whisper_get_coreml_path(std::string path_bin) {
+    auto pos = path_bin.rfind('.');
+    if (pos != std::string::npos) {
+        path_bin = path_bin.substr(0, pos);
+    }
+
+    path_bin += ".mlmodelc";
+
+    return path_bin;
+}
+#endif
+
 struct whisper_state * whisper_init_state(whisper_context * ctx) {
    whisper_state * state = new whisper_state;

    const size_t scale = ctx->model.hparams.f16 ? 1 : 2;

-
    if (!kv_cache_init(ctx->model.hparams, scale * MEM_REQ_KV_SELF.at(ctx->model.type), state->decoders[0].kv_self, ctx->wtype, ctx->model.hparams.n_text_ctx)) {
        fprintf(stderr, "%s: kv_cache_init() failed for self-attention cache\n", __func__);
        return nullptr;
@ -2519,6 +2533,20 @@ struct whisper_state * whisper_init_state(whisper_context * ctx) {
        fprintf(stderr, "%s: kv cross size = %7.2f MB\n", __func__, memory_size / 1024.0 / 1024.0);
    }

+#ifdef WHISPER_USE_COREML
+        const auto path_coreml = whisper_get_coreml_path(ctx->path_model);
+
+        fprintf(stderr, "%s: loading Core ML model from '%s'\n", __func__, path_coreml.c_str());
+        fprintf(stderr, "%s: first run on a device may take a while ...\n", __func__);
+
+        state->ctx_coreml = whisper_coreml_init(path_coreml.c_str());
+        if (!state->ctx_coreml) {
+            fprintf(stderr, "%s: failed to load Core ML model from '%s'\n", __func__, path_coreml.c_str());
+            return nullptr;
+        }
+
+        fprintf(stderr, "%s: Core ML model loaded\n", __func__);
+#endif

    state->logits.reserve(ctx->vocab.n_vocab * ctx->model.hparams.n_text_ctx);

@ -2542,20 +2570,6 @@ struct whisper_state * whisper_init_state(whisper_context * ctx) {
    return state;
 }

-#ifdef WHISPER_USE_COREML
-// replace .bin with .mlmodelc
-static std::string whisper_get_coreml_path(std::string path_bin) {
-    auto pos = path_bin.rfind('.');
-    if (pos != std::string::npos) {
-        path_bin = path_bin.substr(0, pos);
-    }
-
-    path_bin += ".mlmodelc";
-
-    return path_bin;
-}
-#endif
-
 struct whisper_context * whisper_init_from_file_no_state(const char * path_model) {
    whisper_model_loader loader = {};

@ -2585,23 +2599,10 @@ struct whisper_context * whisper_init_from_file_no_state(const char * path_model
        fin->close();
    };

-    auto ctx = whisper_init(&loader);
+    auto ctx = whisper_init_no_state(&loader);

    if (ctx) {
        ctx->path_model = path_model;
-#ifdef WHISPER_USE_COREML
-        const auto path_coreml = whisper_get_coreml_path(ctx->path_model);
-        fprintf(stderr, "%s: loading Core ML model from '%s'\n", __func__, path_coreml.c_str());
-        fprintf(stderr, "%s: first run on a device may take a while ...\n", __func__);
-
-        ctx->ctx_coreml = whisper_coreml_init(path_coreml.c_str());
-        if (!ctx->ctx_coreml) {
-            fprintf(stderr, "%s: failed to load Core ML model from '%s'\n", __func__, path_coreml.c_str());
-            return nullptr;
-        }
-
-        fprintf(stderr, "%s: Core ML model loaded\n", __func__);
-#endif
    }

    return ctx;
@ -2730,8 +2731,8 @@ void whisper_free(struct whisper_context * ctx) {
        whisper_free_state(ctx->state);

 #ifdef WHISPER_USE_COREML
-        whisper_coreml_free(ctx->ctx_coreml);
-        ctx->ctx_coreml = nullptr;
+        whisper_coreml_free(ctx->state->ctx_coreml);
+        ctx->state->ctx_coreml = nullptr;
 #endif
        delete ctx;
    }