SYCL: Add gated linear attention kernel (llama/11175)

* SYCL: Add Gated Linear attention kernel * glahpp: add a space at the end of file * gla: Put the barrier inside the main logic loop
2025-06-22 16:38:58 +00:00 · 2025-01-15 08:50:17 +05:30
parent 8e0143e205
commit 9700cfb0a3
4 changed files with 118 additions and 0 deletions
--- a/ggml/src/ggml-sycl/ggml-sycl.cpp
+++ b/ggml/src/ggml-sycl/ggml-sycl.cpp
@ -4040,6 +4040,9 @@ bool ggml_sycl_compute_forward(ggml_backend_sycl_context & ctx, struct ggml_tens
        case GGML_OP_RWKV_WKV6:
            ggml_sycl_op_rwkv_wkv6(ctx, dst);
            break;
+        case GGML_OP_GATED_LINEAR_ATTN:
+            ggml_sycl_op_gated_linear_attn(ctx, dst);
+            break;
        default:
            return false;
    }
@ -4507,6 +4510,7 @@ static bool ggml_backend_sycl_device_supports_op(ggml_backend_dev_t dev, const g
        case GGML_OP_LEAKY_RELU:
        case GGML_OP_TIMESTEP_EMBEDDING:
        case GGML_OP_RWKV_WKV6:
+        case GGML_OP_GATED_LINEAR_ATTN:
            return true;
        default:
            return false;