mirror of
https://github.com/ggerganov/whisper.cpp.git
synced 2025-06-01 15:00:44 +00:00
SYCL: Avoid using with SYCL-Graph for unsupported nodes (llama/13587)
Currently on a CUDA backend to SYCL when running
`GGML_SYCL_DISABLE_GRAPH=0 ./bin/test-backend-ops -b SYCL0` there
are two operations that throw an exception from the blocking
waits during queue recording.
* `-o CONCAT` : Use of blocking waits on a queue that's being recorded https://github.com/ggml-org/llama.cpp/blob/master/ggml/src/ggml-sycl/concat.cpp#L185-L187
* `-o MUL_MAT_ID`: Blocking wait on a recording queue for a copy to host memory https://github.com/ggml-org/llama.cpp/blob/master/ggml/src/ggml-sycl/ggml-sycl.cpp#L3072-L3074
We've noticed that `ggml-cuda.cu` has the
[check_node_graph_compatibility_and_refresh_copy_ops](39e73ae0d6/ggml/src/ggml-cuda/ggml-cuda.cu (L2458-L2458)
)
method for checking if a graph can be used, even if enabled. I've taken a
similar approach in this PR by adding a method to `ggml-sycl.cpp` for checking
if a graph can be used for the operations even if a user has asked for it to be
enabled.
This commit is contained in:
parent
316600e8ee
commit
730a00be8a
@ -3809,11 +3809,43 @@ static void ggml_backend_sycl_graph_compute_impl(ggml_backend_sycl_context * syc
|
||||
}
|
||||
}
|
||||
|
||||
#ifdef GGML_SYCL_GRAPH
|
||||
static bool check_graph_compatibility(ggml_cgraph * cgraph) {
|
||||
if (ggml_sycl_info().device_count > 1) {
|
||||
// A sycl_ex::command_graph object can only be created for a single device
|
||||
GGML_LOG_INFO("%s: disabling SYCL graphs due to multiple devices\n", __func__);
|
||||
return false;
|
||||
}
|
||||
|
||||
for (int i = 0; i < cgraph->n_nodes; i++) {
|
||||
const ggml_op node_op = cgraph->nodes[i]->op;
|
||||
switch (node_op) {
|
||||
default:
|
||||
break;
|
||||
case GGML_OP_CONCAT:
|
||||
// ggml_sycl_op_concat() does a blocking host wait after memcpy operations,
|
||||
// but wait() can't be called on the events returned by a queue recording
|
||||
// to a graph.
|
||||
[[fallthrough]];
|
||||
case GGML_OP_MUL_MAT_ID:
|
||||
// ggml_sycl_mul_mat_id() does a blocking host wait on the sycl queue after
|
||||
// submitting a memcpy operation, but wait() can't be called on a queue that
|
||||
// is recording to a graph.
|
||||
GGML_LOG_INFO("%s: disabling SYCL graphs due to unsupported node type %s\n", __func__,
|
||||
ggml_op_name(node_op));
|
||||
return false;
|
||||
}
|
||||
}
|
||||
return true;
|
||||
}
|
||||
#endif
|
||||
|
||||
static ggml_status ggml_backend_sycl_graph_compute(ggml_backend_t backend, ggml_cgraph * cgraph) {
|
||||
auto * sycl_ctx = static_cast<ggml_backend_sycl_context *>(backend->context);
|
||||
|
||||
#ifdef GGML_SYCL_GRAPH
|
||||
if (!g_ggml_sycl_disable_graph) {
|
||||
bool use_sycl_graph = !g_ggml_sycl_disable_graph && check_graph_compatibility(cgraph);
|
||||
if (use_sycl_graph) {
|
||||
const bool graph_support = dpct::get_device(sycl_ctx->device).has(sycl::aspect::ext_oneapi_limited_graph);
|
||||
if (!graph_support) {
|
||||
GGML_SYCL_DEBUG("[SYCL-GRAPH] can not use graphs on device:%d\n", sycl_ctx->device);
|
||||
|
Loading…
x
Reference in New Issue
Block a user