mirror of
https://github.com/ggerganov/whisper.cpp.git
synced 2024-12-23 22:42:22 +00:00
93935980f8
* metal : init * whisper : factor out graph builds * whisper : allocate encoder and decoder using ggml-alloc * whisper : ggml-alloc is now supported * whisper : CoreML support ggml-alloc * build : fix ggml-alloc * ios : update submodule * extra : update sync-ggml.sh script to also sync ggml-alloc * ci : see if this is causing the crash * whisper : refactor ggml-alloc init * whisper.android : try to fix build * whisper : initial Metal version * ci : try to debug vmem issue * metal : decoder works on GPU! * metal : add multi-decoder support * ggml : fix ggml_nbytes (probably temp solution) * metal : run "cross" step on the GPU * whisper : remove ggml_repeat in the encoder * whisper : offload the Encoder to Metal * ggml : use simpler ggml_bytes() implementation * ggml-alloc : try to make CI happy by reducing vram to 128GB * whisper : add whisper_allocr to wrap ggml_allocr * whisper : factor out alloc init in a function * cmake : update to support Metal build * whisper : add <functional> header * objc : fix build (no Metal yet) * ios : add Metal support * swiftui : fix build * metal : speed-up KQ multiplication * metal : sync latest llama.cpp kernels * readme : add Metal info * ios : update submodule * coreml : add code to toggle Core ML config (CPU, ANE, GPU) * bench : fix timings by running a pre-heat * bench : start benching the decoder * whisper : add ggml_mul_mat_pad * bench : fix uninitialized vars * whisper : add comment for disabling mul-mat padding * whisper : add description of ggml_mul_mat_pad * whisper : clean-up ggml_mul_mat_pad * metal : remove the "concurrent" flag * bench : variable n_past * ios : update SPM package
72 lines
2.1 KiB
Plaintext
72 lines
2.1 KiB
Plaintext
#if !__has_feature(objc_arc)
|
|
#error This file must be compiled with automatic reference counting enabled (-fobjc-arc)
|
|
#endif
|
|
|
|
#import "whisper-encoder.h"
|
|
#import "whisper-encoder-impl.h"
|
|
|
|
#import <CoreML/CoreML.h>
|
|
|
|
#include <stdlib.h>
|
|
|
|
#if __cplusplus
|
|
extern "C" {
|
|
#endif
|
|
|
|
struct whisper_coreml_context {
|
|
const void * data;
|
|
};
|
|
|
|
struct whisper_coreml_context * whisper_coreml_init(const char * path_model) {
|
|
NSString * path_model_str = [[NSString alloc] initWithUTF8String:path_model];
|
|
|
|
NSURL * url_model = [NSURL fileURLWithPath: path_model_str];
|
|
|
|
// select which device to run the Core ML model on
|
|
MLModelConfiguration *config = [[MLModelConfiguration alloc] init];
|
|
config.computeUnits = MLComputeUnitsCPUAndGPU;
|
|
//config.computeUnits = MLComputeUnitsCPUAndNeuralEngine;
|
|
//config.computeUnits = MLComputeUnitsAll;
|
|
|
|
const void * data = CFBridgingRetain([[whisper_encoder_impl alloc] initWithContentsOfURL:url_model configuration:config error:nil]);
|
|
|
|
if (data == NULL) {
|
|
return NULL;
|
|
}
|
|
|
|
whisper_coreml_context * ctx = new whisper_coreml_context;
|
|
|
|
ctx->data = data;
|
|
|
|
return ctx;
|
|
}
|
|
|
|
void whisper_coreml_free(struct whisper_coreml_context * ctx) {
|
|
CFRelease(ctx->data);
|
|
delete ctx;
|
|
}
|
|
|
|
void whisper_coreml_encode(
|
|
const whisper_coreml_context * ctx,
|
|
float * mel,
|
|
float * out) {
|
|
MLMultiArray * inMultiArray = [
|
|
[MLMultiArray alloc] initWithDataPointer: mel
|
|
shape: @[@1, @80, @3000]
|
|
dataType: MLMultiArrayDataTypeFloat32
|
|
strides: @[@(240000), @(3000), @1]
|
|
deallocator: nil
|
|
error: nil
|
|
];
|
|
|
|
@autoreleasepool {
|
|
whisper_encoder_implOutput * outCoreML = [(__bridge id) ctx->data predictionFromLogmel_data:inMultiArray error:nil];
|
|
|
|
memcpy(out, outCoreML.output.dataPointer, outCoreML.output.count * sizeof(float));
|
|
}
|
|
}
|
|
|
|
#if __cplusplus
|
|
}
|
|
#endif
|