ggml: Don't assert fail when tensor data changes (llama/13222)

The following scenario will cause an assertion failure in the graph
allocator:
 - Build and allocate a graph containing a tensor with a non-NULL data
   pointer
 - Build and allocate a new graph where that data is NULL

Result:
ggml-alloc.c:819: GGML_ASSERT(talloc->buffer_id >= 0) failed

This happens during revalidation because we think that memory should
have been previously allocated based on the current graph but in
reality the previous graph was different. In this situation, we
should do a full reallocation pass.
This commit is contained in:
Jesse Gross 2025-05-01 13:46:10 -07:00 committed by Georgi Gerganov
parent 5a9ccde7da
commit 37ac0264ef
No known key found for this signature in database
GPG Key ID: 449E073F9DC10735

View File

@ -816,7 +816,10 @@ static void ggml_gallocr_init_tensor(ggml_gallocr_t galloc, struct ggml_tensor *
static bool ggml_gallocr_node_needs_realloc(ggml_gallocr_t galloc, struct ggml_tensor * node, struct tensor_alloc * talloc) { static bool ggml_gallocr_node_needs_realloc(ggml_gallocr_t galloc, struct ggml_tensor * node, struct tensor_alloc * talloc) {
size_t node_size = 0; size_t node_size = 0;
if (!node->data && !node->view_src) { if (!node->data && !node->view_src) {
GGML_ASSERT(talloc->buffer_id >= 0); // prevent segfault when misusing the API // If we previously had data but don't now then reallocate
if (talloc->buffer_id < 0) {
return false;
}
node_size = ggml_backend_buft_get_alloc_size(galloc->bufts[talloc->buffer_id], node); node_size = ggml_backend_buft_get_alloc_size(galloc->bufts[talloc->buffer_id], node);
} }
return talloc->size_max >= node_size; return talloc->size_max >= node_size;