mirror of
https://github.com/ggerganov/whisper.cpp.git
synced 2025-06-23 00:48:59 +00:00
sycl: Remove not needed copy f16->f32 for dnnl mul mat (llama/14125)
This commit is contained in:
committed by
Georgi Gerganov
parent
a96a880f7b
commit
0097eaf839
@ -65,6 +65,9 @@ public:
|
|||||||
|
|
||||||
dnnl::primitive_attr primitive_attr;
|
dnnl::primitive_attr primitive_attr;
|
||||||
primitive_attr.set_scratchpad_mode(dnnl::scratchpad_mode::user);
|
primitive_attr.set_scratchpad_mode(dnnl::scratchpad_mode::user);
|
||||||
|
#ifdef GGML_SYCL_F16
|
||||||
|
primitive_attr.set_fpmath_mode(dnnl::fpmath_mode::f16);
|
||||||
|
#endif
|
||||||
|
|
||||||
auto a_mem = dnnl::memory(a_in_md, eng, const_cast<void*>(a));
|
auto a_mem = dnnl::memory(a_in_md, eng, const_cast<void*>(a));
|
||||||
auto b_mem = dnnl::memory(b_in_md, eng, const_cast<void*>(b));
|
auto b_mem = dnnl::memory(b_in_md, eng, const_cast<void*>(b));
|
||||||
|
@ -2127,21 +2127,18 @@ inline void ggml_sycl_op_mul_mat_sycl(
|
|||||||
const sycl::half *src1_ptr = src1->type == GGML_TYPE_F16
|
const sycl::half *src1_ptr = src1->type == GGML_TYPE_F16
|
||||||
? (const sycl::half *)src1->data + src1_padded_row_size
|
? (const sycl::half *)src1->data + src1_padded_row_size
|
||||||
: src1_as_f16.get();
|
: src1_as_f16.get();
|
||||||
ggml_sycl_pool_alloc<sycl::half> dst_f16(ctx.pool(), row_diff * src1_ncols);
|
|
||||||
|
|
||||||
#if GGML_SYCL_DNNL
|
#if GGML_SYCL_DNNL
|
||||||
if (!g_ggml_sycl_disable_dnn) {
|
if (!g_ggml_sycl_disable_dnn) {
|
||||||
DnnlGemmWrapper::row_gemm(ctx, src1_ncols, row_diff, ne10, src1_ptr,
|
DnnlGemmWrapper::row_gemm(ctx, src1_ncols, row_diff, ne10, src1_ptr,
|
||||||
DnnlGemmWrapper::to_dt<sycl::half>(), src0_ptr, DnnlGemmWrapper::to_dt<sycl::half>(),
|
DnnlGemmWrapper::to_dt<sycl::half>(), src0_ptr, DnnlGemmWrapper::to_dt<sycl::half>(),
|
||||||
dst_f16.get(), DnnlGemmWrapper::to_dt<sycl::half>(), stream);
|
dst_dd_i, DnnlGemmWrapper::to_dt<float>(), stream);
|
||||||
scope_op_debug_print scope_dbg_print(__func__, "/to_fp32_sycl", dst, /*num_src=*/2,
|
|
||||||
" : converting dst to fp32");
|
|
||||||
const to_fp32_sycl_t to_fp32_sycl = ggml_get_to_fp32_sycl(GGML_TYPE_F16, dst);
|
|
||||||
to_fp32_sycl(dst_f16.get(), dst_dd_i, row_diff* src1_ncols, stream);
|
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
#endif
|
#endif
|
||||||
{
|
{
|
||||||
|
ggml_sycl_pool_alloc<sycl::half> dst_f16(ctx.pool(), row_diff * src1_ncols);
|
||||||
|
|
||||||
const sycl::half alpha_f16 = 1.0f;
|
const sycl::half alpha_f16 = 1.0f;
|
||||||
const sycl::half beta_f16 = 0.0f;
|
const sycl::half beta_f16 = 0.0f;
|
||||||
SYCL_CHECK(CHECK_TRY_ERROR(dpct::gemm(
|
SYCL_CHECK(CHECK_TRY_ERROR(dpct::gemm(
|
||||||
|
Reference in New Issue
Block a user