mirror of
https://github.com/ggerganov/whisper.cpp.git
synced 2025-06-16 05:48:09 +00:00
cuda : fix tensor size calculation for non-split buffer (llama/5145)
This commit is contained in:
@ -30,7 +30,9 @@ size_t ggml_backend_buft_get_alignment(ggml_backend_buffer_type_t buft) {
|
||||
GGML_CALL size_t ggml_backend_buft_get_alloc_size(ggml_backend_buffer_type_t buft, struct ggml_tensor * tensor) {
|
||||
// get_alloc_size is optional, defaults to ggml_nbytes
|
||||
if (buft->iface.get_alloc_size) {
|
||||
return buft->iface.get_alloc_size(buft, tensor);
|
||||
size_t size = buft->iface.get_alloc_size(buft, tensor);
|
||||
assert(size >= ggml_nbytes(tensor));
|
||||
return size;
|
||||
}
|
||||
return ggml_nbytes(tensor);
|
||||
}
|
||||
|
Reference in New Issue
Block a user