sync : ggml (#2001)

* sync : update scripts

* sync : ggml

* talk-llama : sync llama.cpp

* make : WHISPER_CUBLAS -> WHISPER_CUDA

* ci : try to fix sycl build

* talk-llama : fix make build
This commit is contained in:
Georgi Gerganov
2024-03-27 18:55:10 +02:00
committed by GitHub
parent 1558ec5a16
commit 2948c740a2
90 changed files with 15702 additions and 12476 deletions

View File

@ -103,6 +103,11 @@ extern "C" {
// check if the backend supports an operation
bool (*GGML_CALL supports_op)(ggml_backend_t backend, const struct ggml_tensor * op);
// check if the backend wants to run an operation, even if the weights are allocated in a CPU buffer
// these should be expensive operations with large batch sizes that may benefit from running on this backend
// even if the weight has to be copied from the CPU temporarily
bool (*GGML_CALL offload_op)(ggml_backend_t backend, const struct ggml_tensor * op);
// (optional) event synchronization
ggml_backend_event_t (*GGML_CALL event_new) (ggml_backend_t backend);
void (*GGML_CALL event_free) (ggml_backend_event_t event);