whisper : add OpenVINO support (#1037)

* openvino: use OpenVINO encoder inference

* openvino: add python script for OpenVINO model generation

* whisper: Fix 'unused' warnings when OpenVINO isn't enabled in build

* Apply suggestions from code review

Co-authored-by: Georgi Gerganov <ggerganov@gmail.com>

* whisper: Fix compilation error

* whisper: revert whisper_get_openvino_path_encoder & whisper_get_openvino_path_cache to non-const func signatures

* cmake: Add openvino-encoder as separate object target

* whisper : minor style fixes

* minor : indentation fixes

---------

Co-authored-by: Georgi Gerganov <ggerganov@gmail.com>
This commit is contained in:
Ryan Metcalfe
2023-07-04 08:56:11 -04:00
committed by GitHub
parent 176d7e4e7b
commit 62b81276e0
8 changed files with 367 additions and 3 deletions

View File

@ -95,6 +95,8 @@ struct whisper_params {
// [TDRZ] speaker turn string
std::string tdrz_speaker_turn = " [SPEAKER_TURN]"; // TODO: set from command line
std::string openvino_encode_device = "CPU";
std::vector<std::string> fname_inp = {};
std::vector<std::string> fname_out = {};
};
@ -155,6 +157,7 @@ bool whisper_params_parse(int argc, char ** argv, whisper_params & params) {
else if ( arg == "--prompt") { params.prompt = argv[++i]; }
else if (arg == "-m" || arg == "--model") { params.model = argv[++i]; }
else if (arg == "-f" || arg == "--file") { params.fname_inp.emplace_back(argv[++i]); }
else if (arg == "-oved" || arg == "--ov-e-device") { params.openvino_encode_device = argv[++i]; }
else {
fprintf(stderr, "error: unknown argument: %s\n", arg.c_str());
whisper_print_usage(argc, argv, params);
@ -207,6 +210,7 @@ void whisper_print_usage(int /*argc*/, char ** argv, const whisper_params & para
fprintf(stderr, " --prompt PROMPT [%-7s] initial prompt\n", params.prompt.c_str());
fprintf(stderr, " -m FNAME, --model FNAME [%-7s] model path\n", params.model.c_str());
fprintf(stderr, " -f FNAME, --file FNAME [%-7s] input WAV file path\n", "");
fprintf(stderr, " -oved D, --ov-e-device DNAME [%-7s] the OpenVINO device used for encode inference\n", params.openvino_encode_device.c_str());
fprintf(stderr, "\n");
}
@ -809,6 +813,9 @@ int main(int argc, char ** argv) {
return 3;
}
// initialize openvino encoder. This has no effect on whisper.cpp builds that don't have OpenVINO configured.
whisper_ctx_init_openvino_encoder(ctx, nullptr, params.openvino_encode_device.c_str(), nullptr);
for (int f = 0; f < (int) params.fname_inp.size(); ++f) {
const auto fname_inp = params.fname_inp[f];
const auto fname_out = f < (int) params.fname_out.size() && !params.fname_out[f].empty() ? params.fname_out[f] : params.fname_inp[f];