whisper : Metal and ggml-alloc support (#1270)

* metal : init

* whisper : factor out graph builds

* whisper : allocate encoder and decoder using ggml-alloc

* whisper : ggml-alloc is now supported

* whisper : CoreML support ggml-alloc

* build : fix ggml-alloc

* ios : update submodule

* extra : update sync-ggml.sh script to also sync ggml-alloc

* ci : see if this is causing the crash

* whisper : refactor ggml-alloc init

* whisper.android : try to fix build

* whisper : initial Metal version

* ci : try to debug vmem issue

* metal : decoder works on GPU!

* metal : add multi-decoder support

* ggml : fix ggml_nbytes (probably temp solution)

* metal : run "cross" step on the GPU

* whisper : remove ggml_repeat in the encoder

* whisper : offload the Encoder to Metal

* ggml : use simpler ggml_bytes() implementation

* ggml-alloc : try to make CI happy by reducing vram to 128GB

* whisper : add whisper_allocr to wrap ggml_allocr

* whisper : factor out alloc init in a function

* cmake : update to support Metal build

* whisper : add <functional> header

* objc : fix build (no Metal yet)

* ios : add Metal support

* swiftui : fix build

* metal : speed-up KQ multiplication

* metal : sync latest llama.cpp kernels

* readme : add Metal info

* ios : update submodule

* coreml : add code to toggle Core ML config (CPU, ANE, GPU)

* bench : fix timings by running a pre-heat

* bench : start benching the decoder

* whisper : add ggml_mul_mat_pad

* bench : fix uninitialized vars

* whisper : add comment for disabling mul-mat padding

* whisper : add description of ggml_mul_mat_pad

* whisper : clean-up ggml_mul_mat_pad

* metal : remove the "concurrent" flag

* bench : variable n_past

* ios : update SPM package
This commit is contained in:
Georgi Gerganov
2023-09-15 12:18:18 +03:00
committed by GitHub
parent 3fec2119e6
commit 93935980f8
18 changed files with 1855 additions and 1252 deletions

View File

@ -20,6 +20,7 @@
0AAC5DCC29539EB1003032C3 /* ggml.c in Sources */ = {isa = PBXBuildFile; fileRef = 0AAC5DC929539EB0003032C3 /* ggml.c */; settings = {COMPILER_FLAGS = "-DGGML_USE_ACCELERATE -Wno-shorten-64-to-32"; }; };
0AAC5DCE2953A05C003032C3 /* WhisperState.swift in Sources */ = {isa = PBXBuildFile; fileRef = 0AAC5DCD2953A05C003032C3 /* WhisperState.swift */; };
0AAC5DD12953A394003032C3 /* LibWhisper.swift in Sources */ = {isa = PBXBuildFile; fileRef = 0AAC5DD02953A394003032C3 /* LibWhisper.swift */; };
18AED4812AB21F2B009D854F /* ggml-alloc.c in Sources */ = {isa = PBXBuildFile; fileRef = 18AED47F2AB21F2B009D854F /* ggml-alloc.c */; };
/* End PBXBuildFile section */
/* Begin PBXFileReference section */
@ -41,6 +42,8 @@
0AAC5DCA29539EB0003032C3 /* ggml.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = ggml.h; sourceTree = "<group>"; };
0AAC5DCD2953A05C003032C3 /* WhisperState.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = WhisperState.swift; sourceTree = "<group>"; };
0AAC5DD02953A394003032C3 /* LibWhisper.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = LibWhisper.swift; sourceTree = "<group>"; };
18AED47F2AB21F2B009D854F /* ggml-alloc.c */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.c; path = "ggml-alloc.c"; sourceTree = "<group>"; };
18AED4802AB21F2B009D854F /* ggml-alloc.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = "ggml-alloc.h"; sourceTree = "<group>"; };
/* End PBXFileReference section */
/* Begin PBXFrameworksBuildPhase section */
@ -124,6 +127,8 @@
0AAC5DC529539E89003032C3 /* whisper.cpp */ = {
isa = PBXGroup;
children = (
18AED47F2AB21F2B009D854F /* ggml-alloc.c */,
18AED4802AB21F2B009D854F /* ggml-alloc.h */,
0AAC5DC929539EB0003032C3 /* ggml.c */,
0AAC5DCA29539EB0003032C3 /* ggml.h */,
0AAC5DC729539EB0003032C3 /* whisper.cpp */,
@ -242,6 +247,7 @@
0AA7514C2953B569001EE061 /* RiffWaveUtils.swift in Sources */,
0AAC5DCB29539EB1003032C3 /* whisper.cpp in Sources */,
0AA7514E2953D958001EE061 /* Recorder.swift in Sources */,
18AED4812AB21F2B009D854F /* ggml-alloc.c in Sources */,
);
runOnlyForDeploymentPostprocessing = 0;
};
@ -369,7 +375,7 @@
CODE_SIGN_STYLE = Automatic;
CURRENT_PROJECT_VERSION = 1;
DEVELOPMENT_ASSET_PATHS = "\"whisper.swiftui.demo/Supporting files/Preview Content\"";
DEVELOPMENT_TEAM = 3TZ9BM962G;
DEVELOPMENT_TEAM = P8JZH34X63;
ENABLE_HARDENED_RUNTIME = YES;
ENABLE_PREVIEWS = YES;
GENERATE_INFOPLIST_FILE = YES;
@ -410,7 +416,7 @@
CODE_SIGN_STYLE = Automatic;
CURRENT_PROJECT_VERSION = 1;
DEVELOPMENT_ASSET_PATHS = "\"whisper.swiftui.demo/Supporting files/Preview Content\"";
DEVELOPMENT_TEAM = 3TZ9BM962G;
DEVELOPMENT_TEAM = P8JZH34X63;
ENABLE_HARDENED_RUNTIME = YES;
ENABLE_PREVIEWS = YES;
GENERATE_INFOPLIST_FILE = YES;