CUDA: fix partial offloading for ne0 % 256 != 0 (llama/8572)

This commit is contained in:
Johannes Gäßler
2024-07-18 23:48:47 +02:00
committed by Georgi Gerganov
parent fb6a835938
commit a8ab3abe09
4 changed files with 29 additions and 15 deletions

View File

@ -776,6 +776,7 @@ bool ggml_gallocr_reserve_n(ggml_gallocr_t galloc, struct ggml_cgraph * graph, c
fprintf(stderr, "%s: failed to allocate %s buffer of size %zu\n", __func__, ggml_backend_buft_name(galloc->bufts[i]), new_size);
return false;
}
ggml_backend_buffer_set_usage(galloc->buffers[i], GGML_BACKEND_BUFFER_USAGE_COMPUTE);
}
}