mirror of
https://github.com/ggerganov/whisper.cpp.git
synced 2025-05-09 20:13:14 +00:00
CUDA: fix race condition in MMQ stream-k fixup (llama/13299)
This commit is contained in:
parent
7564f5e6f1
commit
7fa8bb303f
@ -2958,6 +2958,7 @@ static __global__ void mul_mat_q_stream_k_fixup(
|
|||||||
for (int j = threadIdx.y*WARP_SIZE + threadIdx.x; j < mmq_x; j += nwarps*WARP_SIZE) {
|
for (int j = threadIdx.y*WARP_SIZE + threadIdx.x; j < mmq_x; j += nwarps*WARP_SIZE) {
|
||||||
ids_dst_shared[j] = ids_dst[col_low + j];
|
ids_dst_shared[j] = ids_dst[col_low + j];
|
||||||
}
|
}
|
||||||
|
__syncthreads();
|
||||||
|
|
||||||
const int offset_dst = it*mmq_y;
|
const int offset_dst = it*mmq_y;
|
||||||
dst += offset_dst;
|
dst += offset_dst;
|
||||||
|
Loading…
x
Reference in New Issue
Block a user