mirror of
https://github.com/ggerganov/whisper.cpp.git
synced 2025-06-19 07:18:07 +00:00
whisper : reduce memory usage during inference (#431)
* ggml : add "scratch" buffer support * ggml : support for scratch ring-buffer * ggml : bug fix in ggml_repeat() * ggml : error on scratch buffer overflow * whisper : use scratch buffers during inference (base model only) * whisper : update memory usage for all models * whisper : fix encoder memory usage * whisper : use whisper_context functions instead of macros * whisper : fix FF + remove it from README * ggml : reuse ggml_new_i32 * ggml : refactor the scratch buffer storage * whisper : reorder scratch buffers in the decoder * main : add option to disable temp fallback * Update README.md
This commit is contained in:
9
ggml.h
9
ggml.h
@ -301,6 +301,13 @@ struct ggml_cgraph {
|
||||
int64_t perf_time_us;
|
||||
};
|
||||
|
||||
// scratch buffer
|
||||
struct ggml_scratch {
|
||||
size_t offs;
|
||||
size_t size;
|
||||
void * data;
|
||||
};
|
||||
|
||||
struct ggml_init_params {
|
||||
// memory pool
|
||||
size_t mem_size; // bytes
|
||||
@ -327,6 +334,8 @@ void ggml_free(struct ggml_context * ctx);
|
||||
|
||||
size_t ggml_used_mem(const struct ggml_context * ctx);
|
||||
|
||||
size_t ggml_set_scratch(struct ggml_context * ctx, struct ggml_scratch scratch);
|
||||
|
||||
struct ggml_tensor * ggml_new_tensor(
|
||||
struct ggml_context * ctx,
|
||||
enum ggml_type type,
|
||||
|
Reference in New Issue
Block a user