whisper : add OpenVINO support (#1037)

* openvino: use OpenVINO encoder inference * openvino: add python script for OpenVINO model generation * whisper: Fix 'unused' warnings when OpenVINO isn't enabled in build * Apply suggestions from code review Co-authored-by: Georgi Gerganov <ggerganov@gmail.com> * whisper: Fix compilation error * whisper: revert whisper_get_openvino_path_encoder & whisper_get_openvino_path_cache to non-const func signatures * cmake: Add openvino-encoder as separate object target * whisper : minor style fixes * minor : indentation fixes --------- Co-authored-by: Georgi Gerganov <ggerganov@gmail.com>
2025-06-15 05:18:07 +00:00 · 2023-07-04 08:56:11 -04:00
parent 176d7e4e7b
commit 62b81276e0
8 changed files with 367 additions and 3 deletions
--- a/whisper.cpp
+++ b/whisper.cpp
@ -3,6 +3,10 @@
 #include "coreml/whisper-encoder.h"
 #endif

+#if WHISPER_USE_OPENVINO
+#include "openvino/whisper-openvino-encoder.h"
+#endif
+
 #include "ggml.h"

 #include <algorithm>
@ -660,6 +664,10 @@ struct whisper_state {
    whisper_coreml_context * ctx_coreml = nullptr;
 #endif

+#ifdef WHISPER_USE_OPENVINO
+    whisper_openvino_context * ctx_openvino = nullptr;
+#endif
+
    // [EXPERIMENTAL] token-level timestamps data
    int64_t t_beg = 0;
    int64_t t_last = 0;
@ -1478,7 +1486,13 @@ static bool whisper_encode_internal(
    const bool use_coreml = wstate.ctx_coreml != nullptr;
 #endif

-    if (!use_coreml) {
+#ifndef WHISPER_USE_OPENVINO
+    const bool use_openvino = false;
+#else
+    const bool use_openvino = wstate.ctx_openvino != nullptr;
+#endif
+
+    if (!use_coreml && !use_openvino) {
        // convolution + gelu
        {
            wstate.use_buf(ctx0, 1);
@ -1777,8 +1791,7 @@ static bool whisper_encode_internal(
        }
    }
 #ifdef WHISPER_USE_COREML
-    else
-    {
+    else if (use_coreml) {
        wstate.use_buf(ctx0, -1);

        cur = ggml_new_tensor_2d(ctx0, GGML_TYPE_F32, n_state, n_ctx);
@ -1786,6 +1799,17 @@ static bool whisper_encode_internal(
        whisper_coreml_encode(wstate.ctx_coreml, (float *) mel->data, (float *) cur->data);
    }
 #endif
+#ifdef WHISPER_USE_OPENVINO
+    else if (use_openvino) {
+        wstate.use_buf(ctx0, -1);
+
+        cur = ggml_new_tensor_2d(ctx0, GGML_TYPE_F32, n_state, n_ctx);
+
+        if (!whisper_openvino_encode(wstate.ctx_openvino, mel, cur)) {
+            return false;
+        }
+    }
+#endif

    // cur
    //{
@ -2628,6 +2652,31 @@ static std::string whisper_get_coreml_path_encoder(std::string path_bin) {
 }
 #endif

+#ifdef WHISPER_USE_OPENVINO
+// replace .bin with-encoder-openvino.xml
+static std::string whisper_get_openvino_path_encoder(std::string path_bin) {
+    auto pos = path_bin.rfind('.');
+    if (pos != std::string::npos) {
+        path_bin = path_bin.substr(0, pos);
+    }
+
+    path_bin += "-encoder-openvino.xml";
+
+    return path_bin;
+}
+
+static std::string whisper_get_openvino_path_cache(std::string path_bin) {
+    auto pos = path_bin.rfind('.');
+    if (pos != std::string::npos) {
+        path_bin = path_bin.substr(0, pos);
+    }
+
+    path_bin += "-encoder-openvino-cache";
+
+    return path_bin;
+}
+#endif
+
 struct whisper_state * whisper_init_state(whisper_context * ctx) {
    whisper_state * state = new whisper_state;

@ -2694,6 +2743,58 @@ struct whisper_state * whisper_init_state(whisper_context * ctx) {
    return state;
 }

+int whisper_ctx_init_openvino_encoder(struct whisper_context* ctx,
+    const char* openvino_model_path,
+    const char* openvino_device,
+    const char* openvino_cache_dir)
+{
+#ifndef WHISPER_USE_OPENVINO
+    (void)(ctx);
+    (void)(openvino_model_path);
+    (void)(openvino_device);
+    (void)(openvino_cache_dir);
+    return 0;
+#else
+    if (!openvino_model_path && ctx->path_model.empty())
+    {
+        fprintf(stderr, "%s: openvino_model_path is nullptr, and ctx has no model_path set.\n", __func__);
+        return 0;
+    }
+
+    std::string path_openvino;
+    if (!openvino_model_path) {
+        //if openvino_model_path is not set, attempt to find it in the same directory as ggml-<model>.bin model
+        path_openvino = whisper_get_openvino_path_encoder(ctx->path_model);
+    }
+    else {
+        path_openvino = openvino_model_path;
+    }
+
+    std::string path_openvino_cache_dir;
+    if (!openvino_cache_dir) {
+        //if openvino_cache_dir is not set, set it as a dir residing next to ggml-<model>.bin
+        path_openvino_cache_dir = whisper_get_openvino_path_cache(ctx->path_model);
+    }
+    else {
+        path_openvino_cache_dir = openvino_cache_dir;
+    }
+
+    fprintf(stderr, "%s: loading OpenVINO model from '%s'\n", __func__, path_openvino.c_str());
+    fprintf(stderr, "%s: first run on a device may take a while ...\n", __func__);
+
+    ctx->state->ctx_openvino = whisper_openvino_init(path_openvino.c_str(), openvino_device, path_openvino_cache_dir.c_str());
+    if (!ctx->state->ctx_openvino) {
+        fprintf(stderr, "%s: failed to init OpenVINO encoder from '%s'\n", __func__, path_openvino.c_str());
+        return 0;
+    }
+    else {
+        fprintf(stderr, "%s: OpenVINO model loaded\n", __func__);
+    }
+
+    return 1;
+#endif
+}
+
 struct whisper_context * whisper_init_from_file_no_state(const char * path_model) {

    fprintf(stderr, "%s: loading model from '%s'\n", __func__, path_model);
@ -2848,6 +2949,13 @@ void whisper_free_state(struct whisper_state * state)
        }
 #endif

+#ifdef WHISPER_USE_OPENVINO
+        if (state->ctx_openvino != nullptr) {
+            whisper_openvino_free(state->ctx_openvino);
+            state->ctx_openvino = nullptr;
+        }
+#endif
+
        delete state;
    }
 }
@ -3287,6 +3395,14 @@ static int whisper_has_coreml(void) {
 #endif
 }

+static int whisper_has_openvino(void) {
+#ifdef WHISPER_USE_OPENVINO
+    return 1;
+#else
+    return 0;
+#endif
+}
+
 const char * whisper_print_system_info(void) {
    static std::string s;

@ -3304,6 +3420,7 @@ const char * whisper_print_system_info(void) {
    s += "SSE3 = "      + std::to_string(ggml_cpu_has_sse3())      + " | ";
    s += "VSX = "       + std::to_string(ggml_cpu_has_vsx())       + " | ";
    s += "COREML = "    + std::to_string(whisper_has_coreml())     + " | ";
+    s += "OPENVINO = "  + std::to_string(whisper_has_openvino())   + " | ";

    return s.c_str();
 }