CUDA: fix sum.cu compilation for CUDA < 11.7 (llama/9562)

This commit is contained in:
Johannes Gäßler 2024-09-20 18:35:35 +02:00 committed by Georgi Gerganov
parent 138e20b697
commit c0761c95f5

View File

@ -1,9 +1,13 @@
#if !defined(GGML_USE_HIPBLAS) && !defined(GGML_USE_MUSA) #if !defined(GGML_USE_HIPBLAS) && !defined(GGML_USE_MUSA) && CUDART_VERSION >= 11700
#define USE_CUB
#endif // !defined(GGML_USE_HIPBLAS) && !defined(GGML_USE_MUSA) && CUDART_VERSION >= 11700
#ifdef USE_CUB
// On Windows CUB uses libraries with variables called CC_PASCAL which conflict with the define in common.cuh. // On Windows CUB uses libraries with variables called CC_PASCAL which conflict with the define in common.cuh.
// For this reason CUB must be included BEFORE anything else. // For this reason CUB must be included BEFORE anything else.
#include <cub/cub.cuh> #include <cub/cub.cuh>
using namespace cub; using namespace cub;
#endif // !defined(GGML_USE_HIPBLAS) && !defined(GGML_USE_MUSA) #endif // USE_CUB
#include "sumrows.cuh" #include "sumrows.cuh"
#include "sum.cuh" #include "sum.cuh"
@ -11,7 +15,7 @@ using namespace cub;
#include <cstdint> #include <cstdint>
void sum_f32_cuda(ggml_cuda_pool & pool, const float * x, float * dst, const int64_t ne, cudaStream_t stream) { void sum_f32_cuda(ggml_cuda_pool & pool, const float * x, float * dst, const int64_t ne, cudaStream_t stream) {
#if !defined(GGML_USE_HIPBLAS) && !defined(GGML_USE_MUSA) #ifdef USE_CUB
size_t tmp_size = 0; size_t tmp_size = 0;
DeviceReduce::Sum(nullptr, tmp_size, x, dst, ne, stream); DeviceReduce::Sum(nullptr, tmp_size, x, dst, ne, stream);
ggml_cuda_pool_alloc<uint8_t> tmp_alloc(pool, tmp_size); ggml_cuda_pool_alloc<uint8_t> tmp_alloc(pool, tmp_size);
@ -21,7 +25,7 @@ void sum_f32_cuda(ggml_cuda_pool & pool, const float * x, float * dst, const int
// For AMD there is rocPRIM which could be used as a drop-in replacement via hipcub but this would require C++11 -> C++14. // For AMD there is rocPRIM which could be used as a drop-in replacement via hipcub but this would require C++11 -> C++14.
sum_rows_f32_cuda(x, dst, ne, 1, stream); sum_rows_f32_cuda(x, dst, ne, 1, stream);
GGML_UNUSED(pool); GGML_UNUSED(pool);
#endif // !defined(GGML_USE_HIPBLAS) && !defined(GGML_USE_MUSA) #endif // USE_CUB
} }
void ggml_cuda_op_sum(ggml_backend_cuda_context & ctx, ggml_tensor * dst) { void ggml_cuda_op_sum(ggml_backend_cuda_context & ctx, ggml_tensor * dst) {