CUDA: fix CUDART_VERSION checks (llama/11821)

This commit is contained in:
Johannes Gäßler 2025-02-12 13:16:39 +01:00 committed by Georgi Gerganov
parent 91d02de332
commit 556f773d53
3 changed files with 8 additions and 6 deletions

View File

@ -165,11 +165,11 @@ static const char * cu_get_error_str(CUresult err) {
#define CU_CHECK(err) CUDA_CHECK_GEN(err, CUDA_SUCCESS, cu_get_error_str)
#endif
#if CUDART_VERSION >= 11100 || defined(GGML_USE_MUSA)
#if CUDART_VERSION >= 11010 || defined(GGML_USE_MUSA)
#define GGML_CUDA_ASSUME(x) __builtin_assume(x)
#else
#define GGML_CUDA_ASSUME(x)
#endif // CUDART_VERSION >= 11100
#endif // CUDART_VERSION >= 11010
#ifdef GGML_CUDA_F16
typedef half dfloat; // dequantize float

View File

@ -2840,7 +2840,7 @@ bool ggml_backend_cuda_register_host_buffer(void * buffer, size_t size) {
return false;
}
#if CUDART_VERSION >= 11100 || defined(GGML_USE_MUSA)
#if CUDART_VERSION >= 11010 || defined(GGML_USE_MUSA)
cudaError_t err = cudaHostRegister(buffer, size, cudaHostRegisterPortable | cudaHostRegisterReadOnly);
if (err != cudaSuccess) {
// clear the error
@ -2852,8 +2852,10 @@ bool ggml_backend_cuda_register_host_buffer(void * buffer, size_t size) {
}
return true;
#else
GGML_UNUSED(buffer);
GGML_UNUSED(size);
return false;
#endif
#endif // CUDART_VERSION >= 11010 || defined(GGML_USE_MUSA)
}
void ggml_backend_cuda_unregister_host_buffer(void * buffer) {

View File

@ -1,6 +1,6 @@
#if !defined(GGML_USE_HIP) && !defined(GGML_USE_MUSA) && CUDART_VERSION >= 11700
#if !defined(GGML_USE_HIP) && !defined(GGML_USE_MUSA) && CUDART_VERSION >= 11070
#define USE_CUB
#endif // !defined(GGML_USE_HIP) && !defined(GGML_USE_MUSA) && CUDART_VERSION >= 11700
#endif // !defined(GGML_USE_HIP) && !defined(GGML_USE_MUSA) && CUDART_VERSION >= 11070
#ifdef USE_CUB
#include <cub/cub.cuh>