Vulkan: Set device max size for host memory to avoid OOM warning and fallback to CPU buffer (llama/14249)

This commit is contained in:
0cc4m
2025-06-19 09:15:42 +02:00
committed by Georgi Gerganov
parent b251d739ad
commit 1aca7b5c8a

View File

@ -9495,6 +9495,12 @@ static size_t ggml_backend_vk_host_buffer_type_get_alignment(ggml_backend_buffer
UNUSED(buft); UNUSED(buft);
} }
static size_t ggml_backend_vk_host_buffer_type_get_max_size(ggml_backend_buffer_type_t buft) {
return vk_instance.devices[0]->suballocation_block_size;
UNUSED(buft);
}
// Should be changed to return device-specific host buffer type // Should be changed to return device-specific host buffer type
// but that probably requires changes in llama.cpp // but that probably requires changes in llama.cpp
ggml_backend_buffer_type_t ggml_backend_vk_host_buffer_type() { ggml_backend_buffer_type_t ggml_backend_vk_host_buffer_type() {
@ -9503,7 +9509,7 @@ ggml_backend_buffer_type_t ggml_backend_vk_host_buffer_type() {
/* .get_name = */ ggml_backend_vk_host_buffer_type_name, /* .get_name = */ ggml_backend_vk_host_buffer_type_name,
/* .alloc_buffer = */ ggml_backend_vk_host_buffer_type_alloc_buffer, /* .alloc_buffer = */ ggml_backend_vk_host_buffer_type_alloc_buffer,
/* .get_alignment = */ ggml_backend_vk_host_buffer_type_get_alignment, /* .get_alignment = */ ggml_backend_vk_host_buffer_type_get_alignment,
/* .get_max_size = */ NULL, // defaults to SIZE_MAX /* .get_max_size = */ ggml_backend_vk_host_buffer_type_get_max_size,
/* .get_alloc_size = */ ggml_backend_cpu_buffer_type()->iface.get_alloc_size, /* .get_alloc_size = */ ggml_backend_cpu_buffer_type()->iface.get_alloc_size,
/* .is_host = */ ggml_backend_cpu_buffer_type()->iface.is_host, /* .is_host = */ ggml_backend_cpu_buffer_type()->iface.is_host,
}, },