From b5521fea1988e8110d9fdc1c17f0f98abff0346a Mon Sep 17 00:00:00 2001 From: William Tambellini Date: Mon, 6 May 2024 11:12:14 -0700 Subject: [PATCH] Add an option to build without CUDA VMM (llama/7067) Add an option to build ggml cuda without CUDA VMM resolves https://github.com/ggerganov/llama.cpp/issues/6889 https://forums.developer.nvidia.com/t/potential-nvshmem-allocated-memory-performance-issue/275416/4 --- ggml-cuda.cu | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/ggml-cuda.cu b/ggml-cuda.cu index fa56f952..8739baa2 100644 --- a/ggml-cuda.cu +++ b/ggml-cuda.cu @@ -113,7 +113,7 @@ static ggml_cuda_device_info ggml_cuda_init() { for (int id = 0; id < info.device_count; ++id) { int device_vmm = 0; -#if !defined(GGML_USE_HIPBLAS) +#if !defined(GGML_USE_HIPBLAS) && !defined(GGML_CUDA_NO_VMM) CUdevice device; CU_CHECK(cuDeviceGet(&device, id)); CU_CHECK(cuDeviceGetAttribute(&device_vmm, CU_DEVICE_ATTRIBUTE_VIRTUAL_MEMORY_MANAGEMENT_SUPPORTED, device)); @@ -259,7 +259,7 @@ struct ggml_cuda_pool_leg : public ggml_cuda_pool { }; // pool with virtual memory -#if !defined(GGML_USE_HIPBLAS) +#if !defined(GGML_USE_HIPBLAS) && !defined(GGML_CUDA_NO_VMM) struct ggml_cuda_pool_vmm : public ggml_cuda_pool { static const size_t CUDA_POOL_VMM_MAX_SIZE = 1ull << 35; // 32 GB @@ -356,7 +356,7 @@ struct ggml_cuda_pool_vmm : public ggml_cuda_pool { #endif // !defined(GGML_USE_HIPBLAS) std::unique_ptr ggml_backend_cuda_context::new_pool_for_device(int device) { -#if !defined(GGML_USE_HIPBLAS) +#if !defined(GGML_USE_HIPBLAS) && !defined(GGML_CUDA_NO_VMM) if (ggml_cuda_info().devices[device].vmm) { return std::unique_ptr(new ggml_cuda_pool_vmm(device)); }