mirror of
https://github.com/ggerganov/whisper.cpp.git
synced 2025-06-16 05:48:09 +00:00
whisper : add OpenVINO support (#1037)
* openvino: use OpenVINO encoder inference * openvino: add python script for OpenVINO model generation * whisper: Fix 'unused' warnings when OpenVINO isn't enabled in build * Apply suggestions from code review Co-authored-by: Georgi Gerganov <ggerganov@gmail.com> * whisper: Fix compilation error * whisper: revert whisper_get_openvino_path_encoder & whisper_get_openvino_path_cache to non-const func signatures * cmake: Add openvino-encoder as separate object target * whisper : minor style fixes * minor : indentation fixes --------- Co-authored-by: Georgi Gerganov <ggerganov@gmail.com>
This commit is contained in:
108
openvino/whisper-openvino-encoder.cpp
Normal file
108
openvino/whisper-openvino-encoder.cpp
Normal file
@ -0,0 +1,108 @@
|
||||
#include "openvino/whisper-openvino-encoder.h"
|
||||
#include "ggml.h"
|
||||
#include <openvino/openvino.hpp>
|
||||
#include <iostream>
|
||||
|
||||
struct whisper_openvino_context {
|
||||
ov::InferRequest inferRequest;
|
||||
};
|
||||
|
||||
struct whisper_openvino_context * whisper_openvino_init(const char* path_model,
|
||||
const char* device,
|
||||
const char* cache_dir)
|
||||
{
|
||||
if (!path_model || !device) {
|
||||
fprintf(stderr, "%s: path_model and/or device is null\n", __func__);
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
fprintf(stderr, "%s: path_model = %s, device = %s, cache_dir = %s\n",
|
||||
__func__, path_model, device, cache_dir ? cache_dir : "(not set)");
|
||||
|
||||
whisper_openvino_context *context = new whisper_openvino_context;
|
||||
try {
|
||||
ov::Core core;
|
||||
|
||||
if (cache_dir) {
|
||||
// enables caching of device-specific 'blobs' during core.compile_model
|
||||
// routine. This speeds up calls to compile_model for successive runs.
|
||||
core.set_property(ov::cache_dir(cache_dir));
|
||||
}
|
||||
|
||||
//Read the OpenVINO encoder IR (.xml/.bin) from disk, producing an ov::Model object.
|
||||
std::shared_ptr<ov::Model> model = core.read_model(path_model);
|
||||
|
||||
// Produce a compiled-model object, given the device ("CPU", "GPU", etc.)
|
||||
auto compiledModel = core.compile_model(model, device);
|
||||
|
||||
// From the compiled model object, create an infer request. This is the thing that we
|
||||
// we will use later on to trigger inference execution.
|
||||
context->inferRequest = compiledModel.create_infer_request();
|
||||
}
|
||||
catch (const std::exception& error) {
|
||||
std::cout << "in openvino encoder compile routine: exception: " << error.what() << std::endl;
|
||||
delete context;
|
||||
context = nullptr;
|
||||
}
|
||||
|
||||
return context;
|
||||
}
|
||||
|
||||
void whisper_openvino_free(struct whisper_openvino_context * ctx) {
|
||||
if( ctx ) {
|
||||
delete ctx;
|
||||
}
|
||||
}
|
||||
|
||||
int whisper_openvino_encode(
|
||||
whisper_openvino_context* ctx,
|
||||
ggml_tensor* mel,
|
||||
ggml_tensor* out) {
|
||||
|
||||
if (!ctx || !mel || !out) {
|
||||
fprintf(stderr, "%s: Error! ctx / mel / out is null\n", __func__);
|
||||
return 0;
|
||||
}
|
||||
|
||||
if (mel->n_dims != 2) {
|
||||
fprintf(stderr, "%s: Error! mel ggml_tensor expected to have n_dims=2, but it has n_dims=%d\n",
|
||||
__func__, mel->n_dims);
|
||||
return 0;
|
||||
}
|
||||
|
||||
if (out->n_dims != 2) {
|
||||
fprintf(stderr, "%s: Error! out ggml_tensor expected to have n_dims=2, but it has n_dims=%d\n",
|
||||
__func__, out->n_dims);
|
||||
return 0;
|
||||
}
|
||||
|
||||
try {
|
||||
|
||||
//wrap the passed-in mel ggml_tensor as an OpenVINO Tensor object, and set as input tensor to infer request
|
||||
{
|
||||
// note, we populate shape & stride dimensions in opposite order from how they are listed in ne / nb arrays
|
||||
ov::Shape input_shape = { 1, (unsigned long long)mel->ne[1], (unsigned long long)mel->ne[0] };
|
||||
ov::Strides input_strides = { mel->nb[2], mel->nb[1], mel->nb[0] };
|
||||
ov::Tensor input_tensor(ov::element::f32, input_shape, mel->data, input_strides);
|
||||
ctx->inferRequest.set_input_tensor(input_tensor);
|
||||
}
|
||||
|
||||
//wrap the passed-in out ggml_tensor as an OpenVINO Tensor object, and set as output tensor to infer request
|
||||
{
|
||||
// note, we populate shape & stride dimensions in opposite order from how they are listed in ne / nb arrays
|
||||
ov::Shape output_shape = { 1, (unsigned long long)out->ne[1], (unsigned long long)out->ne[0] };
|
||||
ov::Strides output_strides = { out->nb[2], out->nb[1], out->nb[0] };
|
||||
ov::Tensor out_tensor(ov::element::f32, output_shape, out->data, output_strides);
|
||||
ctx->inferRequest.set_output_tensor(out_tensor);
|
||||
}
|
||||
|
||||
//run inference
|
||||
ctx->inferRequest.infer();
|
||||
}
|
||||
catch (const std::exception& error) {
|
||||
std::cout << "in openvino encode inference execution routine: exception: " << error.what() << std::endl;
|
||||
return 0;
|
||||
}
|
||||
|
||||
return 1;
|
||||
}
|
Reference in New Issue
Block a user