From 4ea599afdf86fbb5a24cceda803b42540c826b25 Mon Sep 17 00:00:00 2001 From: Anton Mitkov Date: Fri, 13 Jun 2025 08:51:39 +0100 Subject: [PATCH] sycl: Adding additional cpy dbg print output (llama/14034) --- ggml/src/ggml-sycl/common.hpp | 41 +++++++++++++++----------------- ggml/src/ggml-sycl/cpy.cpp | 3 +-- ggml/src/ggml-sycl/ggml-sycl.cpp | 26 ++++++++++---------- 3 files changed, 33 insertions(+), 37 deletions(-) diff --git a/ggml/src/ggml-sycl/common.hpp b/ggml/src/ggml-sycl/common.hpp index 4f17699a..753b4af1 100644 --- a/ggml/src/ggml-sycl/common.hpp +++ b/ggml/src/ggml-sycl/common.hpp @@ -513,9 +513,9 @@ constexpr size_t ceil_div(const size_t m, const size_t n) { bool gpu_has_xmx(sycl::device &dev); -template void debug_print_array(const std::string & prefix, const T array[N]) { +template std::string debug_get_array_str(const std::string & prefix, const T array[N]) { if (LIKELY(!g_ggml_sycl_debug)) { - return; + return ""; } std::stringstream ss; ss << prefix << "=["; @@ -526,29 +526,26 @@ template void debug_print_array(const std::string & prefix, con ss << array[N - 1]; } ss << "]"; - GGML_SYCL_DEBUG("%s", ss.str().c_str()); + return ss.str(); } -inline void debug_print_tensor(const std::string & prefix, const ggml_tensor * tensor, - const std::string & suffix = "") { - if (LIKELY(!g_ggml_sycl_debug)) { - return; - } - GGML_SYCL_DEBUG("%s=", prefix.c_str()); +inline std::string debug_get_tensor_str(const std::string &prefix, + const ggml_tensor *tensor, const std::string &suffix = "") { + std::stringstream ss; + if (LIKELY(!g_ggml_sycl_debug)) { return ss.str(); } + ss << prefix.c_str() << "="; if (tensor) { - GGML_SYCL_DEBUG("'%s':type=%s", tensor->name, ggml_type_name(tensor->type)); - debug_print_array(";ne", tensor->ne); - debug_print_array(";nb", tensor->nb); - if (!ggml_is_contiguous(tensor)) { - GGML_SYCL_DEBUG(";strided"); - } - if (ggml_is_permuted(tensor)) { - GGML_SYCL_DEBUG(";permuted"); - } + ss << "'" << tensor->name << "':type=" << ggml_type_name(tensor->type); + ss << debug_get_array_str(";ne", tensor->ne); + ss << debug_get_array_str(";nb", tensor->nb); + + if (!ggml_is_contiguous(tensor)) { ss << ";strided"; } + if (ggml_is_permuted(tensor)) { ss << ";permuted"; } } else { - GGML_SYCL_DEBUG("nullptr"); + ss << "nullptr"; } - GGML_SYCL_DEBUG("%s", suffix.c_str()); + ss << suffix; + return ss.str(); } // Use scope_op_debug_print to log operations coming from running a model @@ -564,10 +561,10 @@ struct scope_op_debug_print { return; } GGML_SYCL_DEBUG("[SYCL][OP] call %s%s:", func.data(), func_suffix.data()); - debug_print_tensor(" dst", dst); + GGML_SYCL_DEBUG("%s", debug_get_tensor_str(" dst", dst).c_str()); if (dst) { for (std::size_t i = 0; i < num_src; ++i) { - debug_print_tensor("\tsrc" + std::to_string(i), dst->src[i]); + GGML_SYCL_DEBUG("%s", debug_get_tensor_str("\tsrc" + std::to_string(i), dst->src[i]).c_str()); } } GGML_SYCL_DEBUG("%s\n", suffix.data()); diff --git a/ggml/src/ggml-sycl/cpy.cpp b/ggml/src/ggml-sycl/cpy.cpp index 56373b4d..bec13714 100644 --- a/ggml/src/ggml-sycl/cpy.cpp +++ b/ggml/src/ggml-sycl/cpy.cpp @@ -723,8 +723,7 @@ static void ggml_cpy_q4_1_q4_1(const char * cx, char * cdst, const int ne, const void ggml_sycl_cpy(ggml_backend_sycl_context & ctx, const ggml_tensor * src0, const ggml_tensor * src1) try { // Unlike other operators ggml_sycl_cpy takes 2 distinct tensors instead of a dst ggml_tensor and rely on its src field - scope_op_debug_print scope_dbg_print(__func__, src1, /*num_src=*/0, - std::string(" src0 type=") + ggml_type_name(src0->type)); + scope_op_debug_print scope_dbg_print(__func__, src1, /*num_src=*/0, debug_get_tensor_str("\tsrc0", src0)); const int64_t ne = ggml_nelements(src0); GGML_ASSERT(ne == ggml_nelements(src1)); diff --git a/ggml/src/ggml-sycl/ggml-sycl.cpp b/ggml/src/ggml-sycl/ggml-sycl.cpp index feb30304..4b761036 100644 --- a/ggml/src/ggml-sycl/ggml-sycl.cpp +++ b/ggml/src/ggml-sycl/ggml-sycl.cpp @@ -347,7 +347,7 @@ static enum ggml_status ggml_backend_sycl_buffer_init_tensor(ggml_backend_buffer_t buffer, ggml_tensor *tensor) try { GGML_SYCL_DEBUG("[SYCL] call %s", __func__); - debug_print_tensor(": tensor=", tensor, "\n"); + GGML_SYCL_DEBUG("%s", debug_get_tensor_str(": tensor", tensor, "\n").c_str()); ggml_backend_sycl_buffer_context * ctx = (ggml_backend_sycl_buffer_context *)buffer->context; if (tensor->view_src != NULL) { @@ -385,7 +385,7 @@ static void ggml_backend_sycl_buffer_set_tensor(ggml_backend_buffer_t buffer, const void *data, size_t offset, size_t size) try { GGML_SYCL_DEBUG("[SYCL] call %s", __func__); - debug_print_tensor(": tensor=", tensor); + GGML_SYCL_DEBUG("%s", debug_get_tensor_str(": tensor", tensor).c_str()); GGML_SYCL_DEBUG(" size=%zu offset=%zu\n", size, offset); ggml_backend_sycl_buffer_context * ctx = ( ggml_backend_sycl_buffer_context *)buffer->context; ggml_sycl_set_device(ctx->device); @@ -413,7 +413,7 @@ static void ggml_backend_sycl_buffer_get_tensor(ggml_backend_buffer_t buffer, void *data, size_t offset, size_t size) try { GGML_SYCL_DEBUG("[SYCL] call %s", __func__); - debug_print_tensor(": tensor=", tensor); + GGML_SYCL_DEBUG("%s", debug_get_tensor_str(": tensor", tensor).c_str()); GGML_SYCL_DEBUG(" size=%zu offset=%zu\n", size, offset); ggml_backend_sycl_buffer_context * ctx = ( ggml_backend_sycl_buffer_context *)buffer->context; @@ -444,8 +444,8 @@ ggml_backend_sycl_buffer_cpy_tensor(ggml_backend_buffer_t buffer, ggml_tensor *dst) try { bool is_cpy_supported = ggml_backend_buffer_is_sycl(src->buffer); GGML_SYCL_DEBUG("[SYCL] call %s", __func__); - debug_print_tensor(": dst=", dst); - debug_print_tensor(" src=", src); + GGML_SYCL_DEBUG("%s", debug_get_tensor_str(": dst", dst).c_str()); + GGML_SYCL_DEBUG("%s", debug_get_tensor_str(" src", src).c_str()); GGML_SYCL_DEBUG(" is_cpy_supported=%d\n", is_cpy_supported); if (is_cpy_supported) { ggml_backend_sycl_buffer_context * src_ctx = (ggml_backend_sycl_buffer_context *)src->buffer->context; @@ -525,7 +525,7 @@ catch (sycl::exception const &exc) { static void ggml_backend_sycl_buffer_memset_tensor(ggml_backend_buffer_t buffer, ggml_tensor * tensor, uint8_t value, size_t offset, size_t size) { GGML_SYCL_DEBUG("[SYCL] call %s", __func__); - debug_print_tensor(": tensor=", tensor); + GGML_SYCL_DEBUG("%s", debug_get_tensor_str(": tensor", tensor).c_str()); GGML_SYCL_DEBUG(" size=%zu offset=%zu value=%u\n", size, offset, value); ggml_backend_sycl_buffer_context * ctx = (ggml_backend_sycl_buffer_context *) buffer->context; SYCL_CHECK(ggml_sycl_set_device(ctx->device)); @@ -805,7 +805,7 @@ static enum ggml_status ggml_backend_sycl_split_buffer_init_tensor(ggml_backend_buffer_t buffer, ggml_tensor *tensor) try { GGML_SYCL_DEBUG("[SYCL] call %s", __func__); - debug_print_tensor(": tensor=", tensor, "\n"); + GGML_SYCL_DEBUG("%s", debug_get_tensor_str(": tensor", tensor, "\n").c_str()); GGML_ASSERT(tensor->view_src == nullptr); // views of split tensors are not supported ggml_backend_sycl_split_buffer_context * ctx = (ggml_backend_sycl_split_buffer_context *)buffer->context; @@ -891,7 +891,7 @@ ggml_backend_sycl_split_buffer_set_tensor(ggml_backend_buffer_t buffer, ggml_tensor *tensor, const void *data, size_t offset, size_t size) try { GGML_SYCL_DEBUG("[SYCL] call %s", __func__); - debug_print_tensor(": tensor=", tensor); + GGML_SYCL_DEBUG("%s", debug_get_tensor_str(": tensor", tensor).c_str()); GGML_SYCL_DEBUG(" size=%zu offset=%zu\n", size, offset); // split tensors must always be set in their entirety at once GGML_ASSERT(offset == 0); @@ -947,7 +947,7 @@ ggml_backend_sycl_split_buffer_get_tensor(ggml_backend_buffer_t buffer, const ggml_tensor *tensor, void *data, size_t offset, size_t size) try { GGML_SYCL_DEBUG("[SYCL] call %s", __func__); - debug_print_tensor(": tensor=", tensor); + GGML_SYCL_DEBUG("%s", debug_get_tensor_str(": tensor", tensor).c_str()); GGML_SYCL_DEBUG(" size=%zu offset=%zu\n", size, offset); // split tensors must always be set in their entirety at once GGML_ASSERT(offset == 0); @@ -3863,7 +3863,7 @@ static void ggml_backend_sycl_set_tensor_async(ggml_backend_t backend, const void *data, size_t offset, size_t size) try { GGML_SYCL_DEBUG("[SYCL] call %s", __func__); - debug_print_tensor(": tensor=", tensor); + GGML_SYCL_DEBUG("%s", debug_get_tensor_str(": tensor", tensor).c_str()); GGML_SYCL_DEBUG(" size=%zu offset=%zu\n", size, offset); ggml_backend_sycl_context * sycl_ctx = (ggml_backend_sycl_context *)backend->context; ggml_backend_buffer_t buf = tensor->view_src ? tensor->view_src->buffer : tensor->buffer; @@ -3884,7 +3884,7 @@ static void ggml_backend_sycl_get_tensor_async(ggml_backend_t backend, void *data, size_t offset, size_t size) try { GGML_SYCL_DEBUG("[SYCL] call %s", __func__); - debug_print_tensor(": tensor=", tensor); + GGML_SYCL_DEBUG("%s", debug_get_tensor_str(": tensor", tensor).c_str()); GGML_SYCL_DEBUG(" size=%zu offset=%zu\n", size, offset); ggml_backend_sycl_context * sycl_ctx = (ggml_backend_sycl_context *)backend->context; ggml_backend_buffer_t buf = tensor->view_src ? tensor->view_src->buffer : tensor->buffer; @@ -3907,8 +3907,8 @@ static bool ggml_backend_sycl_cpy_tensor_async(ggml_backend_t backend, bool is_cpy_supported = dst->buffer->buft == ggml_backend_sycl_buffer_type(sycl_ctx->device) && ggml_backend_buffer_is_sycl(src->buffer); GGML_SYCL_DEBUG("[SYCL] call %s", __func__); - debug_print_tensor(": dst=", dst); - debug_print_tensor(" src=", src); + GGML_SYCL_DEBUG("%s", debug_get_tensor_str(": dst", dst).c_str()); + GGML_SYCL_DEBUG("%s", debug_get_tensor_str(" src", src).c_str()); GGML_SYCL_DEBUG(" is_cpy_supported=%d\n", is_cpy_supported); if (is_cpy_supported) { /*