whisper : add OpenVINO support (#1037)

* openvino: use OpenVINO encoder inference * openvino: add python script for OpenVINO model generation * whisper: Fix 'unused' warnings when OpenVINO isn't enabled in build * Apply suggestions from code review Co-authored-by: Georgi Gerganov <ggerganov@gmail.com> * whisper: Fix compilation error * whisper: revert whisper_get_openvino_path_encoder & whisper_get_openvino_path_cache to non-const func signatures * cmake: Add openvino-encoder as separate object target * whisper : minor style fixes * minor : indentation fixes --------- Co-authored-by: Georgi Gerganov <ggerganov@gmail.com>
2025-06-12 20:18:08 +00:00 · 2023-07-04 08:56:11 -04:00
parent 176d7e4e7b
commit 62b81276e0
8 changed files with 367 additions and 3 deletions
--- a/examples/main/main.cpp
+++ b/examples/main/main.cpp
@ -95,6 +95,8 @@ struct whisper_params {
    // [TDRZ] speaker turn string
    std::string tdrz_speaker_turn = " [SPEAKER_TURN]"; // TODO: set from command line

+    std::string openvino_encode_device = "CPU";
+
    std::vector<std::string> fname_inp = {};
    std::vector<std::string> fname_out = {};
 };
@ -155,6 +157,7 @@ bool whisper_params_parse(int argc, char ** argv, whisper_params & params) {
        else if (                  arg == "--prompt")          { params.prompt          = argv[++i]; }
        else if (arg == "-m"    || arg == "--model")           { params.model           = argv[++i]; }
        else if (arg == "-f"    || arg == "--file")            { params.fname_inp.emplace_back(argv[++i]); }
+        else if (arg == "-oved" || arg == "--ov-e-device")     { params.openvino_encode_device = argv[++i]; }
        else {
            fprintf(stderr, "error: unknown argument: %s\n", arg.c_str());
            whisper_print_usage(argc, argv, params);
@ -207,6 +210,7 @@ void whisper_print_usage(int /*argc*/, char ** argv, const whisper_params & para
    fprintf(stderr, "             --prompt PROMPT     [%-7s] initial prompt\n",                                 params.prompt.c_str());
    fprintf(stderr, "  -m FNAME,  --model FNAME       [%-7s] model path\n",                                     params.model.c_str());
    fprintf(stderr, "  -f FNAME,  --file FNAME        [%-7s] input WAV file path\n",                            "");
+    fprintf(stderr, "  -oved D,   --ov-e-device DNAME [%-7s] the OpenVINO device used for encode inference\n",  params.openvino_encode_device.c_str());
    fprintf(stderr, "\n");
 }

@ -809,6 +813,9 @@ int main(int argc, char ** argv) {
        return 3;
    }

+    // initialize openvino encoder. This has no effect on whisper.cpp builds that don't have OpenVINO configured.
+    whisper_ctx_init_openvino_encoder(ctx, nullptr, params.openvino_encode_device.c_str(), nullptr);
+
    for (int f = 0; f < (int) params.fname_inp.size(); ++f) {
        const auto fname_inp = params.fname_inp[f];
 		const auto fname_out = f < (int) params.fname_out.size() && !params.fname_out[f].empty() ? params.fname_out[f] : params.fname_inp[f];