Add OpenCL add kernel (llama/5151)

* Add OpenCL add kernel

* Put add kernel into different string to stay within MSVC string length limit, disable float16 support due to bad results
This commit is contained in:
0cc4m
2024-01-26 23:07:32 +01:00
committed by Georgi Gerganov
parent 0878ab7c15
commit 6061241292
3 changed files with 96 additions and 3 deletions

11
ggml.c
View File

@ -7207,6 +7207,17 @@ static void ggml_compute_forward_add_f32(
const int ith = params->ith;
const int nth = params->nth;
#ifdef GGML_USE_CLBLAST
if (src1->backend == GGML_BACKEND_GPU) {
// TODO: OpenCL kernel support full broadcast
GGML_ASSERT(ggml_can_repeat_rows(src1, src0));
if (ith == 0) {
ggml_cl_add(src0, src1, dst);
}
return;
}
#endif
const int nr = ggml_nrows(src0);
GGML_TENSOR_BINARY_OP_LOCALS