mirror of
https://github.com/ggerganov/whisper.cpp.git
synced 2025-06-22 16:38:58 +00:00
SYCL: Implement few same quantized type copy kernels (llama/13739)
* SYCL: Implement few same quantized type copy kernels * Use memcpy for copying contiguous tensors ggml-ci * feat(sycl): add contiguous tensor copy support and device checks Adds a memcpy path for contiguous tensors of the same type to optimize data transfer. Updates device support checks to recognize contiguous tensor operations, improving compatibility and performance. * refactor: replace specific block copy functions with template The changes replace multiple redundant block copy functions (e.g., cpy_block_q8_0_q8_0, cpy_block_q5_0_q5_0) with a single templated function cpy_blck_q_q. This reduces code duplication by using a generic template that works for any block type, improving maintainability while preserving the same functionality. The template is instantiated with specific block types (e.g., block_q8_0) where needed. * Exclude BF16 support for COPY tensors for now ggml-ci * perf: adjust SYCL copy kernel block sizes for efficiency Use ceil_div to ensure full element coverage and update nd_range parameters to better align with SYCL block sizes, improving parallelism and device utilization in copy operations.
This commit is contained in:
committed by
Georgi Gerganov
parent
f0f5a9f7fb
commit
489dc158a6
@ -4226,6 +4226,9 @@ static bool ggml_backend_sycl_device_supports_op(ggml_backend_dev_t dev, const g
|
||||
{
|
||||
ggml_type src0_type = op->src[0]->type;
|
||||
ggml_type src1_type = op->src[1]->type;
|
||||
if (src0_type == src1_type && (ggml_is_contiguous(op->src[0]) && ggml_is_contiguous(op->src[1])) && src0_type != GGML_TYPE_BF16) {
|
||||
return true;
|
||||
}
|
||||
if (src0_type == GGML_TYPE_F32 && src1_type == GGML_TYPE_F32) {
|
||||
return true;
|
||||
}
|
||||
@ -4271,6 +4274,21 @@ static bool ggml_backend_sycl_device_supports_op(ggml_backend_dev_t dev, const g
|
||||
if (src0_type == GGML_TYPE_F32 && src1_type == GGML_TYPE_IQ4_NL) {
|
||||
return true;
|
||||
}
|
||||
if(src0_type == GGML_TYPE_Q8_0 && src1_type == GGML_TYPE_Q8_0) {
|
||||
return true;
|
||||
}
|
||||
if(src0_type == GGML_TYPE_Q5_0 && src1_type == GGML_TYPE_Q5_0) {
|
||||
return true;
|
||||
}
|
||||
if(src0_type == GGML_TYPE_Q5_1 && src1_type == GGML_TYPE_Q5_1) {
|
||||
return true;
|
||||
}
|
||||
if(src0_type == GGML_TYPE_Q4_0 && src1_type == GGML_TYPE_Q4_0) {
|
||||
return true;
|
||||
}
|
||||
if(src0_type == GGML_TYPE_Q4_1 && src1_type == GGML_TYPE_Q4_1) {
|
||||
return true;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
case GGML_OP_CONCAT:
|
||||
|
Reference in New Issue
Block a user