From d41cf26a0ff53c46bd437da7ba5bd538c6086d3b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Johannes=20G=C3=A4=C3=9Fler?= Date: Tue, 6 May 2025 23:35:51 +0200 Subject: [PATCH] CUDA: mix virt/real CUDA archs for GGML_NATIVE=OFF (llama/13135) --- ggml/src/ggml-cuda/CMakeLists.txt | 22 ++++++++++++++++++++-- 1 file changed, 20 insertions(+), 2 deletions(-) diff --git a/ggml/src/ggml-cuda/CMakeLists.txt b/ggml/src/ggml-cuda/CMakeLists.txt index f3cfdeae..969a178f 100644 --- a/ggml/src/ggml-cuda/CMakeLists.txt +++ b/ggml/src/ggml-cuda/CMakeLists.txt @@ -12,12 +12,30 @@ if (CUDAToolkit_FOUND) # 61 == Pascal, __dp4a instruction (per-byte integer dot product) # 70 == V100, FP16 tensor cores # 75 == Turing, int8 tensor cores + # 80 == Ampere, asynchronous data loading, faster tensor core instructions + # 86 == RTX 3000, needs CUDA v11.1 + # 89 == RTX 4000, needs CUDA v11.8 + # + # XX-virtual == compile CUDA code as PTX, do JIT compilation to binary code on first run + # XX-real == compile CUDA code as device code for this specific architecture + # no suffix == compile as both PTX and device code + # + # The default behavior for a non-native is to build virtual architectures as needed to cover all features needed + # for best performance and to also build real architectures for the most commonly used GPUs. if (GGML_NATIVE AND CUDAToolkit_VERSION VERSION_GREATER_EQUAL "11.6" AND CMAKE_VERSION VERSION_GREATER_EQUAL "3.24") set(CMAKE_CUDA_ARCHITECTURES "native") elseif(GGML_CUDA_F16 OR GGML_CUDA_DMMV_F16) - set(CMAKE_CUDA_ARCHITECTURES "60;61;70;75;80") + if (CUDAToolkit_VERSION VERSION_GREATER_EQUAL "11.8") + set(CMAKE_CUDA_ARCHITECTURES "60-virtual;61-virtual;70-virtual;75-virtual;80-virtual;86-real;89-real") + else() + set(CMAKE_CUDA_ARCHITECTURES "60-virtual;61-virtual;70-virtual;75-virtual;80-virtual;86-real") + endif() else() - set(CMAKE_CUDA_ARCHITECTURES "50;61;70;75;80") + if (CUDAToolkit_VERSION VERSION_GREATER_EQUAL "11.8") + set(CMAKE_CUDA_ARCHITECTURES "50-virtual;61-virtual;70-virtual;75-virtual;80-virtual;86-real;89-real") + else() + set(CMAKE_CUDA_ARCHITECTURES "50-virtual;61-virtual;70-virtual;75-virtual;80-virtual;86-real") + endif() endif() endif() message(STATUS "Using CUDA architectures: ${CMAKE_CUDA_ARCHITECTURES}")