mirror of
https://github.com/ggerganov/whisper.cpp.git
synced 2025-06-22 08:30:07 +00:00
Vulkan: Set device max size for host memory to avoid OOM warning and fallback to CPU buffer (llama/14249)
This commit is contained in:
@ -9495,6 +9495,12 @@ static size_t ggml_backend_vk_host_buffer_type_get_alignment(ggml_backend_buffer
|
|||||||
UNUSED(buft);
|
UNUSED(buft);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static size_t ggml_backend_vk_host_buffer_type_get_max_size(ggml_backend_buffer_type_t buft) {
|
||||||
|
return vk_instance.devices[0]->suballocation_block_size;
|
||||||
|
|
||||||
|
UNUSED(buft);
|
||||||
|
}
|
||||||
|
|
||||||
// Should be changed to return device-specific host buffer type
|
// Should be changed to return device-specific host buffer type
|
||||||
// but that probably requires changes in llama.cpp
|
// but that probably requires changes in llama.cpp
|
||||||
ggml_backend_buffer_type_t ggml_backend_vk_host_buffer_type() {
|
ggml_backend_buffer_type_t ggml_backend_vk_host_buffer_type() {
|
||||||
@ -9503,7 +9509,7 @@ ggml_backend_buffer_type_t ggml_backend_vk_host_buffer_type() {
|
|||||||
/* .get_name = */ ggml_backend_vk_host_buffer_type_name,
|
/* .get_name = */ ggml_backend_vk_host_buffer_type_name,
|
||||||
/* .alloc_buffer = */ ggml_backend_vk_host_buffer_type_alloc_buffer,
|
/* .alloc_buffer = */ ggml_backend_vk_host_buffer_type_alloc_buffer,
|
||||||
/* .get_alignment = */ ggml_backend_vk_host_buffer_type_get_alignment,
|
/* .get_alignment = */ ggml_backend_vk_host_buffer_type_get_alignment,
|
||||||
/* .get_max_size = */ NULL, // defaults to SIZE_MAX
|
/* .get_max_size = */ ggml_backend_vk_host_buffer_type_get_max_size,
|
||||||
/* .get_alloc_size = */ ggml_backend_cpu_buffer_type()->iface.get_alloc_size,
|
/* .get_alloc_size = */ ggml_backend_cpu_buffer_type()->iface.get_alloc_size,
|
||||||
/* .is_host = */ ggml_backend_cpu_buffer_type()->iface.is_host,
|
/* .is_host = */ ggml_backend_cpu_buffer_type()->iface.is_host,
|
||||||
},
|
},
|
||||||
|
Reference in New Issue
Block a user