cuda : fix tensor size calculation for non-split buffer (llama/5145)

2025-06-16 05:48:09 +00:00 · 2024-01-26 18:59:43 +01:00
parent c65edd5b64
commit 0878ab7c15
2 changed files with 8 additions and 15 deletions
--- a/ggml-backend.c
+++ b/ggml-backend.c
@ -30,7 +30,9 @@ size_t ggml_backend_buft_get_alignment(ggml_backend_buffer_type_t buft) {
 GGML_CALL size_t ggml_backend_buft_get_alloc_size(ggml_backend_buffer_type_t buft, struct ggml_tensor * tensor) {
    // get_alloc_size is optional, defaults to ggml_nbytes
    if (buft->iface.get_alloc_size) {
-        return buft->iface.get_alloc_size(buft, tensor);
+        size_t size = buft->iface.get_alloc_size(buft, tensor);
+        assert(size >= ggml_nbytes(tensor));
+        return size;
    }
    return ggml_nbytes(tensor);
 }