mirror of
https://github.com/ggerganov/whisper.cpp.git
synced 2025-06-22 16:38:58 +00:00
sycl: Add more debug prints (llama/13640)
This commit is contained in:
committed by
Georgi Gerganov
parent
474f7be8b6
commit
25e27904ca
@ -346,6 +346,8 @@ static void * ggml_backend_sycl_buffer_get_base(ggml_backend_buffer_t buffer) {
|
||||
static enum ggml_status
|
||||
ggml_backend_sycl_buffer_init_tensor(ggml_backend_buffer_t buffer,
|
||||
ggml_tensor *tensor) try {
|
||||
GGML_SYCL_DEBUG("[SYCL] call %s", __func__);
|
||||
debug_print_tensor(": tensor=", tensor, "\n");
|
||||
ggml_backend_sycl_buffer_context * ctx = (ggml_backend_sycl_buffer_context *)buffer->context;
|
||||
|
||||
if (tensor->view_src != NULL) {
|
||||
@ -381,7 +383,9 @@ static void ggml_backend_sycl_buffer_set_tensor(ggml_backend_buffer_t buffer,
|
||||
ggml_tensor *tensor,
|
||||
const void *data, size_t offset,
|
||||
size_t size) try {
|
||||
|
||||
GGML_SYCL_DEBUG("[SYCL] call %s", __func__);
|
||||
debug_print_tensor(": tensor=", tensor);
|
||||
GGML_SYCL_DEBUG(" size=%zu offset=%zu\n", size, offset);
|
||||
ggml_backend_sycl_buffer_context * ctx = ( ggml_backend_sycl_buffer_context *)buffer->context;
|
||||
ggml_sycl_set_device(ctx->device);
|
||||
auto stream = &(dpct::dev_mgr::instance().get_device(ctx->device).default_queue());
|
||||
@ -407,7 +411,9 @@ static void ggml_backend_sycl_buffer_get_tensor(ggml_backend_buffer_t buffer,
|
||||
const ggml_tensor *tensor,
|
||||
void *data, size_t offset,
|
||||
size_t size) try {
|
||||
|
||||
GGML_SYCL_DEBUG("[SYCL] call %s", __func__);
|
||||
debug_print_tensor(": tensor=", tensor);
|
||||
GGML_SYCL_DEBUG(" size=%zu offset=%zu\n", size, offset);
|
||||
ggml_backend_sycl_buffer_context * ctx = ( ggml_backend_sycl_buffer_context *)buffer->context;
|
||||
|
||||
ggml_sycl_set_device(ctx->device);
|
||||
@ -435,7 +441,12 @@ static bool
|
||||
ggml_backend_sycl_buffer_cpy_tensor(ggml_backend_buffer_t buffer,
|
||||
const ggml_tensor *src,
|
||||
ggml_tensor *dst) try {
|
||||
if (ggml_backend_buffer_is_sycl(src->buffer)) {
|
||||
bool is_cpy_supported = ggml_backend_buffer_is_sycl(src->buffer);
|
||||
GGML_SYCL_DEBUG("[SYCL] call %s", __func__);
|
||||
debug_print_tensor(": dst=", dst);
|
||||
debug_print_tensor(" src=", src);
|
||||
GGML_SYCL_DEBUG(" is_cpy_supported=%d\n", is_cpy_supported);
|
||||
if (is_cpy_supported) {
|
||||
ggml_backend_sycl_buffer_context * src_ctx = (ggml_backend_sycl_buffer_context *)src->buffer->context;
|
||||
ggml_backend_sycl_buffer_context * dst_ctx = (ggml_backend_sycl_buffer_context *)dst->buffer->context;
|
||||
|
||||
@ -492,7 +503,8 @@ ggml_backend_sycl_buffer_cpy_tensor(ggml_backend_buffer_t buffer,
|
||||
|
||||
static void ggml_backend_sycl_buffer_clear(ggml_backend_buffer_t buffer,
|
||||
uint8_t value) try {
|
||||
ggml_backend_sycl_buffer_context * ctx = ( ggml_backend_sycl_buffer_context *)buffer->context;
|
||||
GGML_SYCL_DEBUG("[SYCL] call %s: size=%zu\n", __func__, buffer->size);
|
||||
ggml_backend_sycl_buffer_context * ctx = (ggml_backend_sycl_buffer_context *) buffer->context;
|
||||
|
||||
ggml_sycl_set_device(ctx->device);
|
||||
queue_ptr stream = ctx->stream;
|
||||
@ -511,7 +523,9 @@ catch (sycl::exception const &exc) {
|
||||
|
||||
static void ggml_backend_sycl_buffer_memset_tensor(ggml_backend_buffer_t buffer, ggml_tensor * tensor, uint8_t value,
|
||||
size_t offset, size_t size) {
|
||||
GGML_SYCL_DEBUG(" [SYCL] call %s\n", __func__);
|
||||
GGML_SYCL_DEBUG("[SYCL] call %s", __func__);
|
||||
debug_print_tensor(": tensor=", tensor);
|
||||
GGML_SYCL_DEBUG(" size=%zu offset=%zu value=%u\n", size, offset, value);
|
||||
ggml_backend_sycl_buffer_context * ctx = (ggml_backend_sycl_buffer_context *) buffer->context;
|
||||
SYCL_CHECK(ggml_sycl_set_device(ctx->device));
|
||||
auto stream = &(dpct::dev_mgr::instance().get_device(ctx->device).default_queue());
|
||||
@ -789,6 +803,8 @@ static void * ggml_backend_sycl_split_buffer_get_base(ggml_backend_buffer_t buff
|
||||
static enum ggml_status
|
||||
ggml_backend_sycl_split_buffer_init_tensor(ggml_backend_buffer_t buffer,
|
||||
ggml_tensor *tensor) try {
|
||||
GGML_SYCL_DEBUG("[SYCL] call %s", __func__);
|
||||
debug_print_tensor(": tensor=", tensor, "\n");
|
||||
GGML_ASSERT(tensor->view_src == nullptr); // views of split tensors are not supported
|
||||
|
||||
ggml_backend_sycl_split_buffer_context * ctx = (ggml_backend_sycl_split_buffer_context *)buffer->context;
|
||||
@ -873,6 +889,9 @@ static void
|
||||
ggml_backend_sycl_split_buffer_set_tensor(ggml_backend_buffer_t buffer,
|
||||
ggml_tensor *tensor, const void *data,
|
||||
size_t offset, size_t size) try {
|
||||
GGML_SYCL_DEBUG("[SYCL] call %s", __func__);
|
||||
debug_print_tensor(": tensor=", tensor);
|
||||
GGML_SYCL_DEBUG(" size=%zu offset=%zu\n", size, offset);
|
||||
// split tensors must always be set in their entirety at once
|
||||
GGML_ASSERT(offset == 0);
|
||||
GGML_ASSERT(size == ggml_nbytes(tensor));
|
||||
@ -926,6 +945,9 @@ static void
|
||||
ggml_backend_sycl_split_buffer_get_tensor(ggml_backend_buffer_t buffer,
|
||||
const ggml_tensor *tensor, void *data,
|
||||
size_t offset, size_t size) try {
|
||||
GGML_SYCL_DEBUG("[SYCL] call %s", __func__);
|
||||
debug_print_tensor(": tensor=", tensor);
|
||||
GGML_SYCL_DEBUG(" size=%zu offset=%zu\n", size, offset);
|
||||
// split tensors must always be set in their entirety at once
|
||||
GGML_ASSERT(offset == 0);
|
||||
GGML_ASSERT(size == ggml_nbytes(tensor));
|
||||
@ -2015,12 +2037,12 @@ inline void ggml_sycl_op_mul_mat_sycl(
|
||||
#else
|
||||
bool use_fp16 = false;
|
||||
#endif
|
||||
if ((src0->type == GGML_TYPE_F16 || ggml_is_quantized(src0->type)) &&
|
||||
use_fp16 && ggml_is_contiguous(src0) && row_diff == src0->ne[1] &&
|
||||
dst->op_params[0] == GGML_PREC_DEFAULT) {
|
||||
// GGML_SYCL_DEBUG("ggml_sycl_op_mul_mat_sycl - fp16 path\n");
|
||||
if ((src0->type == GGML_TYPE_F16 || ggml_is_quantized(src0->type)) && use_fp16 && ggml_is_contiguous(src0) &&
|
||||
row_diff == src0->ne[1] && dst->op_params[0] == GGML_PREC_DEFAULT) {
|
||||
ggml_sycl_pool_alloc<sycl::half> src0_as_f16(ctx.pool());
|
||||
if (src0->type != GGML_TYPE_F16) {
|
||||
scope_op_debug_print scope_dbg_print(__func__, "/to_fp16_sycl", dst, /*num_src=*/2,
|
||||
" : converting src0 to fp16");
|
||||
const to_fp16_sycl_t to_fp16_sycl = ggml_get_to_fp16_sycl(src0->type, dst);
|
||||
GGML_ASSERT(to_fp16_sycl != nullptr);
|
||||
size_t ne = row_diff*ne00;
|
||||
@ -2033,6 +2055,8 @@ inline void ggml_sycl_op_mul_mat_sycl(
|
||||
|
||||
ggml_sycl_pool_alloc<sycl::half> src1_as_f16(ctx.pool());
|
||||
if (src1->type != GGML_TYPE_F16) {
|
||||
scope_op_debug_print scope_dbg_print(__func__, "/to_fp16_sycl", dst, /*num_src=*/2,
|
||||
" : converting src1 to fp16");
|
||||
const to_fp16_sycl_t to_fp16_sycl = ggml_get_to_fp16_sycl(src1->type, dst);
|
||||
GGML_ASSERT(to_fp16_sycl != nullptr);
|
||||
size_t ne = src1_ncols*ne10;
|
||||
@ -2049,6 +2073,8 @@ inline void ggml_sycl_op_mul_mat_sycl(
|
||||
DnnlGemmWrapper::row_gemm(ctx, src1_ncols, row_diff, ne10, src1_ptr,
|
||||
DnnlGemmWrapper::to_dt<sycl::half>(), src0_ptr, DnnlGemmWrapper::to_dt<sycl::half>(),
|
||||
dst_f16.get(), DnnlGemmWrapper::to_dt<sycl::half>(), stream);
|
||||
scope_op_debug_print scope_dbg_print(__func__, "/to_fp32_sycl", dst, /*num_src=*/2,
|
||||
" : converting dst to fp32");
|
||||
const to_fp32_sycl_t to_fp32_sycl = ggml_get_to_fp32_sycl(GGML_TYPE_F16, dst);
|
||||
to_fp32_sycl(dst_f16.get(), dst_dd_i, row_diff* src1_ncols, stream);
|
||||
}
|
||||
@ -2064,21 +2090,25 @@ inline void ggml_sycl_op_mul_mat_sycl(
|
||||
src1_ptr, dpct::library_data_t::real_half, ne10, &beta_f16,
|
||||
dst_f16.get(), dpct::library_data_t::real_half, ldc,
|
||||
dpct::library_data_t::real_half)));
|
||||
scope_op_debug_print scope_dbg_print(__func__, "/to_fp32_sycl", dst, /*num_src=*/2,
|
||||
" : converting dst to fp32");
|
||||
const to_fp32_sycl_t to_fp32_sycl = ggml_get_to_fp32_sycl(GGML_TYPE_F16, dst);
|
||||
to_fp32_sycl(dst_f16.get(), dst_dd_i, row_diff*src1_ncols, stream);
|
||||
}
|
||||
}
|
||||
else {
|
||||
// GGML_SYCL_DEBUG("ggml_sycl_op_mul_mat_sycl - fp32 path\n");
|
||||
} else {
|
||||
ggml_sycl_pool_alloc<float> src0_ddq_as_f32(ctx.pool());
|
||||
ggml_sycl_pool_alloc<float> src1_ddq_as_f32(ctx.pool());
|
||||
if (src0->type != GGML_TYPE_F32) {
|
||||
scope_op_debug_print scope_dbg_print(__func__, "/to_fp32_sycl", dst, /*num_src=*/2,
|
||||
" : converting src0 to fp32");
|
||||
const to_fp32_sycl_t to_fp32_sycl = ggml_get_to_fp32_sycl(src0->type, dst);
|
||||
GGML_ASSERT(to_fp32_sycl != nullptr);
|
||||
src0_ddq_as_f32.alloc(row_diff*ne00);
|
||||
to_fp32_sycl(src0_dd_i, src0_ddq_as_f32.get(), row_diff*ne00, stream);
|
||||
}
|
||||
if (src1->type != GGML_TYPE_F32) {
|
||||
scope_op_debug_print scope_dbg_print(__func__, "/to_fp32_sycl", dst, /*num_src=*/2,
|
||||
" : converting src1 to fp32");
|
||||
const to_fp32_sycl_t to_fp32_sycl = ggml_get_to_fp32_sycl(src1->type, dst);
|
||||
GGML_ASSERT(to_fp32_sycl != nullptr);
|
||||
src1_ddq_as_f32.alloc(src1_ncols*ne10);
|
||||
@ -2114,8 +2144,7 @@ catch (sycl::exception const &exc) {
|
||||
std::exit(1);
|
||||
}
|
||||
|
||||
static void ggml_sycl_op_pool2d(ggml_backend_sycl_context & ctx, ggml_tensor *dst) {
|
||||
|
||||
static void ggml_sycl_op_pool2d(ggml_backend_sycl_context & ctx, ggml_tensor * dst) {
|
||||
GGML_ASSERT(dst->src[0]->type == GGML_TYPE_F32);
|
||||
GGML_ASSERT( dst->type == GGML_TYPE_F32);
|
||||
dpct::queue_ptr main_stream = ctx.stream();
|
||||
@ -2167,8 +2196,7 @@ inline void ggml_sycl_op_sum(ggml_backend_sycl_context & ctx, ggml_tensor *dst)
|
||||
sum_rows_f32_sycl(src0_dd, dst_dd, ne, 1, main_stream);
|
||||
}
|
||||
|
||||
inline void ggml_sycl_op_sum_rows(ggml_backend_sycl_context & ctx, ggml_tensor *dst) {
|
||||
|
||||
inline void ggml_sycl_op_sum_rows(ggml_backend_sycl_context & ctx, ggml_tensor * dst) {
|
||||
GGML_ASSERT(dst->src[0]->type == GGML_TYPE_F32);
|
||||
GGML_ASSERT( dst->type == GGML_TYPE_F32);
|
||||
dpct::queue_ptr main_stream = ctx.stream();
|
||||
@ -2199,8 +2227,7 @@ inline void ggml_sycl_op_argsort(ggml_backend_sycl_context & ctx, ggml_tensor *
|
||||
argsort_f32_i32_sycl(src0_dd, (int *) dst_dd, ncols, nrows, order, main_stream);
|
||||
}
|
||||
|
||||
inline void ggml_sycl_op_argmax(ggml_backend_sycl_context & ctx, ggml_tensor *dst) {
|
||||
|
||||
inline void ggml_sycl_op_argmax(ggml_backend_sycl_context & ctx, ggml_tensor * dst) {
|
||||
GGML_ASSERT(dst->src[0]->type == GGML_TYPE_F32);
|
||||
GGML_ASSERT( dst->type == GGML_TYPE_I32);
|
||||
|
||||
@ -2215,8 +2242,7 @@ inline void ggml_sycl_op_argmax(ggml_backend_sycl_context & ctx, ggml_tensor *ds
|
||||
argmax_f32_i32_sycl(src0_dd, dst_dd, ncols, nrows, main_stream);
|
||||
}
|
||||
|
||||
inline void ggml_sycl_op_diag_mask_inf(ggml_backend_sycl_context & ctx,ggml_tensor *dst) {
|
||||
|
||||
inline void ggml_sycl_op_diag_mask_inf(ggml_backend_sycl_context & ctx, ggml_tensor * dst) {
|
||||
GGML_ASSERT(dst->src[0]->type == GGML_TYPE_F32);
|
||||
GGML_ASSERT( dst->type == GGML_TYPE_F32);
|
||||
dpct::queue_ptr main_stream = ctx.stream();
|
||||
@ -2233,8 +2259,7 @@ inline void ggml_sycl_op_diag_mask_inf(ggml_backend_sycl_context & ctx,ggml_tens
|
||||
diag_mask_inf_f32_sycl(src0_dd, dst_dd, ne00, nrows0, ne01, n_past, main_stream);
|
||||
}
|
||||
|
||||
inline void ggml_sycl_op_scale(ggml_backend_sycl_context & ctx, ggml_tensor *dst) {
|
||||
|
||||
inline void ggml_sycl_op_scale(ggml_backend_sycl_context & ctx, ggml_tensor * dst) {
|
||||
GGML_ASSERT(dst->src[0]->type == GGML_TYPE_F32);
|
||||
GGML_ASSERT( dst->type == GGML_TYPE_F32);
|
||||
dpct::queue_ptr main_stream = ctx.stream();
|
||||
@ -2421,6 +2446,8 @@ static void ggml_sycl_op_mul_mat(ggml_backend_sycl_context & ctx, const ggml_ten
|
||||
dev[i].src1_ddq = dev[i].src1_ddq_alloc.alloc(ctx.pool(i), nrows1*src1_padded_col_size*q8_1_ts/q8_1_bs);
|
||||
|
||||
if (src1_on_device && src1_is_contiguous) {
|
||||
scope_op_debug_print scope_dbg_print(__func__, "/quantize_row_q8_1_sycl", dst,
|
||||
/*num_src=*/2, " : converting src1 to Q8_1");
|
||||
quantize_row_q8_1_sycl(dev[i].src1_ddf, dev[i].src1_ddq, ne10, nrows1, src1_padded_col_size, stream);
|
||||
/*
|
||||
DPCT1010:90: SYCL uses exceptions to report errors and does not
|
||||
@ -2525,6 +2552,8 @@ static void ggml_sycl_op_mul_mat(ggml_backend_sycl_context & ctx, const ggml_ten
|
||||
}
|
||||
|
||||
if (convert_src1_to_q8_1 && !src1_is_contiguous) {
|
||||
scope_op_debug_print scope_dbg_print(__func__, "/quantize_row_q8_1_sycl", dst,
|
||||
/*num_src=*/2, " : converting src1 to Q8_1");
|
||||
quantize_row_q8_1_sycl(src1_ddf_i, src1_ddq_i, ne10, src1_ncols, src1_padded_col_size, stream);
|
||||
/*
|
||||
DPCT1010:92: SYCL uses exceptions to report errors and does
|
||||
@ -2619,33 +2648,28 @@ catch (sycl::exception const &exc) {
|
||||
|
||||
|
||||
static void ggml_sycl_get_rows(ggml_backend_sycl_context & ctx, ggml_tensor * dst) {
|
||||
GGML_SYCL_DEBUG("call %s\n", __func__);
|
||||
scope_op_debug_print scope_dbg_print(__func__, dst, /*num_src=*/2);
|
||||
ggml_sycl_op_get_rows(ctx, dst);
|
||||
GGML_SYCL_DEBUG("call %s done\n", __func__);
|
||||
}
|
||||
|
||||
static void ggml_sycl_norm(ggml_backend_sycl_context & ctx, ggml_tensor * dst) {
|
||||
GGML_SYCL_DEBUG("call %s\n", __func__);
|
||||
scope_op_debug_print scope_dbg_print(__func__, dst, /*num_src=*/1);
|
||||
ggml_sycl_op_norm(ctx, dst);
|
||||
GGML_SYCL_DEBUG("call %s done\n", __func__);
|
||||
}
|
||||
|
||||
static void ggml_sycl_rms_norm(ggml_backend_sycl_context & ctx, ggml_tensor * dst) {
|
||||
GGML_SYCL_DEBUG("call %s\n", __func__);
|
||||
scope_op_debug_print scope_dbg_print(__func__, dst, /*num_src=*/1);
|
||||
ggml_sycl_op_rms_norm(ctx, dst);
|
||||
GGML_SYCL_DEBUG("call %s done\n", __func__);
|
||||
}
|
||||
|
||||
static void ggml_sycl_l2_norm(ggml_backend_sycl_context & ctx, ggml_tensor * dst) {
|
||||
GGML_SYCL_DEBUG("call %s\n", __func__);
|
||||
scope_op_debug_print scope_dbg_print(__func__, dst, /*num_src=*/1);
|
||||
ggml_sycl_op_l2_norm(ctx, dst);
|
||||
GGML_SYCL_DEBUG("call %s done\n", __func__);
|
||||
}
|
||||
|
||||
static void ggml_sycl_group_norm(ggml_backend_sycl_context & ctx, ggml_tensor * dst) {
|
||||
GGML_SYCL_DEBUG("call %s\n", __func__);
|
||||
scope_op_debug_print scope_dbg_print(__func__, dst, /*num_src=*/1);
|
||||
ggml_sycl_op_group_norm(ctx, dst);
|
||||
GGML_SYCL_DEBUG("call %s done\n", __func__);
|
||||
}
|
||||
|
||||
static void ggml_sycl_mul_mat_vec_p021(ggml_backend_sycl_context & ctx, const ggml_tensor *src0,
|
||||
@ -2773,6 +2797,8 @@ static void ggml_sycl_mul_mat_batched_sycl(ggml_backend_sycl_context & ctx, cons
|
||||
|
||||
// convert src1 to fp16
|
||||
if (src1->type != GGML_TYPE_F16) {
|
||||
scope_op_debug_print scope_dbg_print(__func__, "/to_fp16_nc_sycl", dst, /*num_src=*/2,
|
||||
" : converting src1 to fp16");
|
||||
const to_fp16_nc_sycl_t to_fp16_nc_sycl = get_to_fp16_nc_sycl(src1->type);
|
||||
GGML_ASSERT(to_fp16_nc_sycl != nullptr);
|
||||
const int64_t ne_src1 = ggml_nelements(src1);
|
||||
@ -3076,6 +3102,7 @@ static bool can_use_mul_mat_vec_q(const ggml_tensor * src0, const ggml_tensor *
|
||||
}
|
||||
|
||||
static void ggml_sycl_mul_mat(ggml_backend_sycl_context & ctx, const ggml_tensor * src0, const ggml_tensor * src1, ggml_tensor * dst) {
|
||||
scope_op_debug_print scope_dbg_print(__func__, dst, /*num_src=*/2);
|
||||
const bool split = ggml_backend_buffer_is_sycl_split(src0->buffer);
|
||||
int64_t min_compute_capability = INT_MAX;
|
||||
|
||||
@ -3153,7 +3180,6 @@ static void ggml_sycl_mul_mat(ggml_backend_sycl_context & ctx, const ggml_tensor
|
||||
constexpr bool convert_src1_to_q8_1 = false;
|
||||
ggml_sycl_op_mul_mat(ctx, src0, src1, dst, ggml_sycl_op_mul_mat_sycl, convert_src1_to_q8_1);
|
||||
}
|
||||
GGML_SYCL_DEBUG("call %s done\n", __func__);
|
||||
}
|
||||
|
||||
|
||||
@ -3224,6 +3250,7 @@ __dpct_inline__ static void k_copy_dst_from_contiguous(
|
||||
|
||||
static void ggml_sycl_mul_mat_id(ggml_backend_sycl_context & ctx,
|
||||
ggml_tensor *dst) try {
|
||||
scope_op_debug_print scope_dbg_print(__func__, dst, /*num_src=*/3);
|
||||
const ggml_tensor *src0 = dst->src[0];
|
||||
const ggml_tensor *src1 = dst->src[1];
|
||||
GGML_ASSERT(!ggml_backend_buffer_is_sycl_split(src0->buffer) && "mul_mat_id does not support split buffers");
|
||||
@ -3392,37 +3419,45 @@ catch (sycl::exception const &exc) {
|
||||
}
|
||||
|
||||
static void ggml_sycl_scale(ggml_backend_sycl_context & ctx, ggml_tensor * dst) {
|
||||
scope_op_debug_print scope_dbg_print(__func__, dst, /*num_src=*/1);
|
||||
ggml_sycl_op_scale(ctx, dst);
|
||||
}
|
||||
|
||||
static void ggml_sycl_diag_mask_inf(ggml_backend_sycl_context & ctx, ggml_tensor * dst) {
|
||||
scope_op_debug_print scope_dbg_print(__func__, dst, /*num_src=*/1);
|
||||
ggml_sycl_op_diag_mask_inf(ctx, dst);
|
||||
}
|
||||
|
||||
static void ggml_sycl_pool2d(ggml_backend_sycl_context & ctx, ggml_tensor * dst) {
|
||||
scope_op_debug_print scope_dbg_print(__func__, dst, /*num_src=*/1);
|
||||
ggml_sycl_op_pool2d(ctx, dst);
|
||||
}
|
||||
|
||||
static void ggml_sycl_im2col(ggml_backend_sycl_context & ctx, ggml_tensor * dst) {
|
||||
scope_op_debug_print scope_dbg_print(__func__, dst, /*num_src=*/2);
|
||||
ggml_sycl_op_im2col(ctx, dst);
|
||||
}
|
||||
|
||||
static void ggml_sycl_sum(ggml_backend_sycl_context & ctx, ggml_tensor * dst) {
|
||||
scope_op_debug_print scope_dbg_print(__func__, dst, /*num_src=*/1);
|
||||
GGML_ASSERT(ggml_is_contiguous(dst->src[0]));
|
||||
ggml_sycl_op_sum(ctx, dst);
|
||||
}
|
||||
|
||||
static void ggml_sycl_sum_rows(ggml_backend_sycl_context & ctx, ggml_tensor * dst) {
|
||||
scope_op_debug_print scope_dbg_print(__func__, dst, /*num_src=*/1);
|
||||
GGML_ASSERT(ggml_is_contiguous(dst->src[0]));
|
||||
ggml_sycl_op_sum_rows(ctx, dst);
|
||||
}
|
||||
|
||||
static void ggml_sycl_argsort(ggml_backend_sycl_context & ctx, ggml_tensor * dst) {
|
||||
scope_op_debug_print scope_dbg_print(__func__, dst, /*num_src=*/1);
|
||||
GGML_ASSERT(ggml_is_contiguous(dst->src[0]));
|
||||
ggml_sycl_op_argsort(ctx, dst);
|
||||
}
|
||||
|
||||
static void ggml_sycl_argmax(ggml_backend_sycl_context & ctx, ggml_tensor * dst) {
|
||||
scope_op_debug_print scope_dbg_print(__func__, dst, /*num_src=*/1);
|
||||
GGML_ASSERT(ggml_is_contiguous(dst->src[0]));
|
||||
ggml_sycl_op_argmax(ctx, dst);
|
||||
}
|
||||
@ -3716,6 +3751,9 @@ static void ggml_backend_sycl_set_tensor_async(ggml_backend_t backend,
|
||||
ggml_tensor *tensor,
|
||||
const void *data, size_t offset,
|
||||
size_t size) try {
|
||||
GGML_SYCL_DEBUG("[SYCL] call %s", __func__);
|
||||
debug_print_tensor(": tensor=", tensor);
|
||||
GGML_SYCL_DEBUG(" size=%zu offset=%zu\n", size, offset);
|
||||
ggml_backend_sycl_context * sycl_ctx = (ggml_backend_sycl_context *)backend->context;
|
||||
ggml_backend_buffer_t buf = tensor->view_src ? tensor->view_src->buffer : tensor->buffer;
|
||||
|
||||
@ -3734,6 +3772,9 @@ static void ggml_backend_sycl_get_tensor_async(ggml_backend_t backend,
|
||||
const ggml_tensor *tensor,
|
||||
void *data, size_t offset,
|
||||
size_t size) try {
|
||||
GGML_SYCL_DEBUG("[SYCL] call %s", __func__);
|
||||
debug_print_tensor(": tensor=", tensor);
|
||||
GGML_SYCL_DEBUG(" size=%zu offset=%zu\n", size, offset);
|
||||
ggml_backend_sycl_context * sycl_ctx = (ggml_backend_sycl_context *)backend->context;
|
||||
ggml_backend_buffer_t buf = tensor->view_src ? tensor->view_src->buffer : tensor->buffer;
|
||||
|
||||
@ -3752,7 +3793,13 @@ static bool ggml_backend_sycl_cpy_tensor_async(ggml_backend_t backend,
|
||||
const ggml_tensor *src,
|
||||
ggml_tensor *dst) try {
|
||||
ggml_backend_sycl_context * sycl_ctx = (ggml_backend_sycl_context *)backend->context;
|
||||
if (dst->buffer->buft == ggml_backend_sycl_buffer_type(sycl_ctx->device) && ggml_backend_buffer_is_sycl(src->buffer)) {
|
||||
bool is_cpy_supported = dst->buffer->buft == ggml_backend_sycl_buffer_type(sycl_ctx->device) &&
|
||||
ggml_backend_buffer_is_sycl(src->buffer);
|
||||
GGML_SYCL_DEBUG("[SYCL] call %s", __func__);
|
||||
debug_print_tensor(": dst=", dst);
|
||||
debug_print_tensor(" src=", src);
|
||||
GGML_SYCL_DEBUG(" is_cpy_supported=%d\n", is_cpy_supported);
|
||||
if (is_cpy_supported) {
|
||||
/*
|
||||
DPCT1009:215: SYCL uses exceptions to report errors and does not use the
|
||||
error codes. The original code was commented out and a warning string
|
||||
@ -3773,6 +3820,7 @@ catch (sycl::exception const &exc) {
|
||||
}
|
||||
|
||||
static void ggml_backend_sycl_synchronize(ggml_backend_t backend) try {
|
||||
GGML_SYCL_DEBUG("[SYCL] call %s\n", __func__);
|
||||
ggml_backend_sycl_context * sycl_ctx = (ggml_backend_sycl_context *)backend->context;
|
||||
const queue_ptr stream = sycl_ctx->stream(sycl_ctx->device, 0);
|
||||
SYCL_CHECK(CHECK_TRY_ERROR((stream)->wait()));
|
||||
@ -3906,7 +3954,7 @@ catch (sycl::exception const &exc)
|
||||
}
|
||||
|
||||
static void ggml_backend_sycl_event_wait(ggml_backend_t backend, ggml_backend_event_t event) try {
|
||||
|
||||
GGML_SYCL_DEBUG("[SYCL] call %s\n", __func__);
|
||||
sycl::event* sycl_event = static_cast<sycl::event*>(event->context);
|
||||
|
||||
if (ggml_backend_is_sycl(backend)) {
|
||||
@ -4301,6 +4349,7 @@ static void ggml_backend_sycl_device_event_free(ggml_backend_dev_t dev, ggml_bac
|
||||
|
||||
static void ggml_backend_sycl_device_event_synchronize(ggml_backend_dev_t dev, ggml_backend_event_t event) try {
|
||||
GGML_UNUSED(dev);
|
||||
GGML_SYCL_DEBUG("[SYCL] call %s\n", __func__);
|
||||
|
||||
sycl::event *sycl_event = static_cast<sycl::event *>(event->context);
|
||||
SYCL_CHECK(CHECK_TRY_ERROR(sycl_event->wait()));
|
||||
|
Reference in New Issue
Block a user