CUDA: fix race condition in MMQ stream-k fixup (llama/13299)

2025-05-09 20:13:14 +00:00 · 2025-05-04 14:16:39 +02:00 · 2025-05-04 14:16:39 +02:00 · 7fa8bb303f
commit 7fa8bb303f
parent 7564f5e6f1
1 changed files with 1 additions and 0 deletions
--- a/ggml/src/ggml-cuda/mmq.cuh
+++ b/ggml/src/ggml-cuda/mmq.cuh
@ -2958,6 +2958,7 @@ static __global__ void mul_mat_q_stream_k_fixup(
    for (int j = threadIdx.y*WARP_SIZE + threadIdx.x; j < mmq_x; j += nwarps*WARP_SIZE) {
        ids_dst_shared[j] = ids_dst[col_low + j];
    }
    __syncthreads();
    const int offset_dst = it*mmq_y;
    dst += offset_dst;