CUDA: fix shared memory access condition for mmv (llama/10740)

This commit is contained in:
Johannes Gäßler 2024-12-09 20:07:12 +01:00 committed by Georgi Gerganov
parent 37df308a2a
commit ce2b75d2fb
No known key found for this signature in database
GPG Key ID: 449E073F9DC10735

View File

@ -57,7 +57,7 @@ static __global__ void mul_mat_vec(
if (block_size > WARP_SIZE) {
buf_iw[tid/WARP_SIZE] = sumf;
__syncthreads();
if (tid > WARP_SIZE) {
if (tid >= WARP_SIZE) {
return;
}
sumf = buf_iw[tid];