ci : enable -Werror for CUDA builds (llama/5579)

* cmake : pass -Werror through -Xcompiler

ggml-ci

* make, cmake : enable CUDA errors on warnings

ggml-ci
This commit is contained in:
Georgi Gerganov 2024-02-19 14:45:41 +02:00
parent 5ec1e0edfa
commit a4d8f9d559
No known key found for this signature in database
GPG Key ID: BF970631944C16B7

View File

@ -651,18 +651,18 @@ static __device__ __forceinline__ float2 warp_reduce_sum(float2 a) {
return a; return a;
} }
static __device__ __forceinline__ half2 warp_reduce_sum(half2 a) { //static __device__ __forceinline__ half2 warp_reduce_sum(half2 a) {
#if !(defined(GGML_USE_HIPBLAS) && defined(__HIP_PLATFORM_AMD__)) && __CUDA_ARCH__ >= CC_PASCAL //#if !(defined(GGML_USE_HIPBLAS) && defined(__HIP_PLATFORM_AMD__)) && __CUDA_ARCH__ >= CC_PASCAL
#pragma unroll //#pragma unroll
for (int mask = 16; mask > 0; mask >>= 1) { // for (int mask = 16; mask > 0; mask >>= 1) {
a = __hadd2(a, __shfl_xor_sync(0xffffffff, a, mask, 32)); // a = __hadd2(a, __shfl_xor_sync(0xffffffff, a, mask, 32));
} // }
return a; // return a;
#else //#else
(void) a; // (void) a;
NO_DEVICE_CODE; // NO_DEVICE_CODE;
#endif // !(defined(GGML_USE_HIPBLAS) && defined(__HIP_PLATFORM_AMD__)) && __CUDA_ARCH__ >= CC_PASCAL //#endif // !(defined(GGML_USE_HIPBLAS) && defined(__HIP_PLATFORM_AMD__)) && __CUDA_ARCH__ >= CC_PASCAL
} //}
static __device__ __forceinline__ float warp_reduce_max(float x) { static __device__ __forceinline__ float warp_reduce_max(float x) {
#pragma unroll #pragma unroll
@ -672,18 +672,18 @@ static __device__ __forceinline__ float warp_reduce_max(float x) {
return x; return x;
} }
static __device__ __forceinline__ half2 warp_reduce_max(half2 x) { //static __device__ __forceinline__ half2 warp_reduce_max(half2 x) {
#if !(defined(GGML_USE_HIPBLAS) && defined(__HIP_PLATFORM_AMD__)) && __CUDA_ARCH__ >= CC_PASCAL && CUDART_VERSION >= CUDART_HMAX //#if !(defined(GGML_USE_HIPBLAS) && defined(__HIP_PLATFORM_AMD__)) && __CUDA_ARCH__ >= CC_PASCAL && CUDART_VERSION >= CUDART_HMAX
#pragma unroll //#pragma unroll
for (int mask = 16; mask > 0; mask >>= 1) { // for (int mask = 16; mask > 0; mask >>= 1) {
x = __hmax2(x, __shfl_xor_sync(0xffffffff, x, mask, 32)); // x = __hmax2(x, __shfl_xor_sync(0xffffffff, x, mask, 32));
} // }
return x; // return x;
#else //#else
(void) x; // (void) x;
NO_DEVICE_CODE; // NO_DEVICE_CODE;
#endif // !(defined(GGML_USE_HIPBLAS) && defined(__HIP_PLATFORM_AMD__)) && __CUDA_ARCH__ >= CC_PASCAL && CUDART_VERSION >= CUDART_HMAX //#endif // !(defined(GGML_USE_HIPBLAS) && defined(__HIP_PLATFORM_AMD__)) && __CUDA_ARCH__ >= CC_PASCAL && CUDART_VERSION >= CUDART_HMAX
} //}
static __device__ __forceinline__ float op_repeat(const float a, const float b) { static __device__ __forceinline__ float op_repeat(const float a, const float b) {
return b; return b;
@ -4641,10 +4641,12 @@ static __device__ __forceinline__ float vec_dot_iq2_xs_q8_1(
const float d = (float)bq2->d * __low2float(bq8_1[ib32].ds) * 0.25f; const float d = (float)bq2->d * __low2float(bq8_1[ib32].ds) * 0.25f;
return d * ((0.5f + ls1) * sumi1 + (0.5f + ls2) * sumi2); return d * ((0.5f + ls1) * sumi1 + (0.5f + ls2) * sumi2);
#else #else
(void) ksigns64;
assert(false); assert(false);
return 0.f; return 0.f;
#endif #endif
#else #else
(void) ksigns64;
assert(false); assert(false);
return 0.f; return 0.f;
#endif #endif