mirror of
https://github.com/ggerganov/whisper.cpp.git
synced 2025-06-21 16:09:55 +00:00
CUDA: fix partial offloading for ne0 % 256 != 0 (llama/8572)
This commit is contained in:
committed by
Georgi Gerganov
parent
fb6a835938
commit
a8ab3abe09
@ -776,6 +776,7 @@ bool ggml_gallocr_reserve_n(ggml_gallocr_t galloc, struct ggml_cgraph * graph, c
|
||||
fprintf(stderr, "%s: failed to allocate %s buffer of size %zu\n", __func__, ggml_backend_buft_name(galloc->bufts[i]), new_size);
|
||||
return false;
|
||||
}
|
||||
ggml_backend_buffer_set_usage(galloc->buffers[i], GGML_BACKEND_BUFFER_USAGE_COMPUTE);
|
||||
}
|
||||
}
|
||||
|
||||
|
Reference in New Issue
Block a user