From 1aca7b5c8aaab2585c2a1a6629e2381d7d528ea5 Mon Sep 17 00:00:00 2001 From: 0cc4m Date: Thu, 19 Jun 2025 09:15:42 +0200 Subject: [PATCH] Vulkan: Set device max size for host memory to avoid OOM warning and fallback to CPU buffer (llama/14249) --- ggml/src/ggml-vulkan/ggml-vulkan.cpp | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/ggml/src/ggml-vulkan/ggml-vulkan.cpp b/ggml/src/ggml-vulkan/ggml-vulkan.cpp index 8d62303a..1375bfeb 100644 --- a/ggml/src/ggml-vulkan/ggml-vulkan.cpp +++ b/ggml/src/ggml-vulkan/ggml-vulkan.cpp @@ -9495,6 +9495,12 @@ static size_t ggml_backend_vk_host_buffer_type_get_alignment(ggml_backend_buffer UNUSED(buft); } +static size_t ggml_backend_vk_host_buffer_type_get_max_size(ggml_backend_buffer_type_t buft) { + return vk_instance.devices[0]->suballocation_block_size; + + UNUSED(buft); +} + // Should be changed to return device-specific host buffer type // but that probably requires changes in llama.cpp ggml_backend_buffer_type_t ggml_backend_vk_host_buffer_type() { @@ -9503,7 +9509,7 @@ ggml_backend_buffer_type_t ggml_backend_vk_host_buffer_type() { /* .get_name = */ ggml_backend_vk_host_buffer_type_name, /* .alloc_buffer = */ ggml_backend_vk_host_buffer_type_alloc_buffer, /* .get_alignment = */ ggml_backend_vk_host_buffer_type_get_alignment, - /* .get_max_size = */ NULL, // defaults to SIZE_MAX + /* .get_max_size = */ ggml_backend_vk_host_buffer_type_get_max_size, /* .get_alloc_size = */ ggml_backend_cpu_buffer_type()->iface.get_alloc_size, /* .is_host = */ ggml_backend_cpu_buffer_type()->iface.is_host, },