mirror of
https://github.com/ggerganov/whisper.cpp.git
synced 2025-01-18 02:39:47 +00:00
ggml : fix cont with transposed tensors when one dimension is 1 (ggml/934)
* ggml_cont: fix issue with transposed tensors when one dimension is 1 when using multiple threads, it is not enough to check for the tensors to be contiguous for ggml_compute_forward_dup_same_cont to work correctly. The tensors strides also need to match. Signed-off-by: Salvatore Mesoraca <s.mesoraca16@gmail.com> * Add ggml_cont tests Signed-off-by: Salvatore Mesoraca <s.mesoraca16@gmail.com> * Remove dead code it isn't possible to reach this code because all these functions are invoked by ggml_compute_forward_dup if and only if src0->type != dst->type Signed-off-by: Salvatore Mesoraca <s.mesoraca16@gmail.com> * Make ggml_compute_forward_dup_same_cont work with contiguous tensors Co-authored-by: Georgi Gerganov <ggerganov@gmail.com> Signed-off-by: Salvatore Mesoraca <s.mesoraca16@gmail.com> --------- Signed-off-by: Salvatore Mesoraca <s.mesoraca16@gmail.com> Co-authored-by: Georgi Gerganov <ggerganov@gmail.com>
This commit is contained in:
parent
c96906d84d
commit
4a4a52bf98
@ -8120,8 +8120,7 @@ static void ggml_compute_forward_dup_same_cont(
|
|||||||
GGML_ASSERT(ggml_is_contiguous(dst) && ggml_is_contiguous(src0));
|
GGML_ASSERT(ggml_is_contiguous(dst) && ggml_is_contiguous(src0));
|
||||||
GGML_ASSERT(src0->type == dst->type);
|
GGML_ASSERT(src0->type == dst->type);
|
||||||
|
|
||||||
const size_t nb00 = src0->nb[0];
|
const size_t nb0 = ggml_type_size(src0->type);
|
||||||
const size_t nb0 = dst->nb[0];
|
|
||||||
|
|
||||||
const int ith = params->ith; // thread index
|
const int ith = params->ith; // thread index
|
||||||
const int nth = params->nth; // number of threads
|
const int nth = params->nth; // number of threads
|
||||||
@ -8135,8 +8134,8 @@ static void ggml_compute_forward_dup_same_cont(
|
|||||||
if (ie0 < ie1) {
|
if (ie0 < ie1) {
|
||||||
memcpy(
|
memcpy(
|
||||||
((char *) dst->data + ie0*nb0),
|
((char *) dst->data + ie0*nb0),
|
||||||
((char *) src0->data + ie0*nb00),
|
((char *) src0->data + ie0*nb0),
|
||||||
(ie1 - ie0) * ggml_type_size(src0->type));
|
(ie1 - ie0) * nb0);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -8153,11 +8152,6 @@ static void ggml_compute_forward_dup_f16(
|
|||||||
const int ith = params->ith; // thread index
|
const int ith = params->ith; // thread index
|
||||||
const int nth = params->nth; // number of threads
|
const int nth = params->nth; // number of threads
|
||||||
|
|
||||||
if (ggml_is_contiguous(src0) && ggml_is_contiguous(dst) && src0->type == dst->type) {
|
|
||||||
ggml_compute_forward_dup_same_cont(params, dst);
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
|
|
||||||
// parallelize by rows
|
// parallelize by rows
|
||||||
const int nr = ne01;
|
const int nr = ne01;
|
||||||
// number of rows per thread
|
// number of rows per thread
|
||||||
@ -8422,11 +8416,6 @@ static void ggml_compute_forward_dup_bf16(
|
|||||||
const int ith = params->ith; // thread index
|
const int ith = params->ith; // thread index
|
||||||
const int nth = params->nth; // number of threads
|
const int nth = params->nth; // number of threads
|
||||||
|
|
||||||
if (ggml_is_contiguous(src0) && ggml_is_contiguous(dst) && src0->type == dst->type) {
|
|
||||||
ggml_compute_forward_dup_same_cont(params, dst);
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
|
|
||||||
// parallelize by rows
|
// parallelize by rows
|
||||||
const int nr = ne01;
|
const int nr = ne01;
|
||||||
// number of rows per thread
|
// number of rows per thread
|
||||||
@ -8778,11 +8767,6 @@ static void ggml_compute_forward_dup_f32(
|
|||||||
const int ith = params->ith; // thread index
|
const int ith = params->ith; // thread index
|
||||||
const int nth = params->nth; // number of threads
|
const int nth = params->nth; // number of threads
|
||||||
|
|
||||||
if (ggml_is_contiguous(src0) && ggml_is_contiguous(dst) && src0->type == dst->type) {
|
|
||||||
ggml_compute_forward_dup_same_cont(params, dst);
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
|
|
||||||
// parallelize by rows
|
// parallelize by rows
|
||||||
const int nr = ne01;
|
const int nr = ne01;
|
||||||
// number of rows per thread
|
// number of rows per thread
|
||||||
@ -9092,13 +9076,13 @@ static void ggml_compute_forward_dup_bytes(
|
|||||||
GGML_ASSERT(ggml_nelements(dst) == ggml_nelements(src0));
|
GGML_ASSERT(ggml_nelements(dst) == ggml_nelements(src0));
|
||||||
GGML_ASSERT(src0->type == dst->type);
|
GGML_ASSERT(src0->type == dst->type);
|
||||||
|
|
||||||
|
GGML_TENSOR_UNARY_OP_LOCALS;
|
||||||
|
|
||||||
if (ggml_is_contiguous(src0) && ggml_is_contiguous(dst)) {
|
if (ggml_is_contiguous(src0) && ggml_is_contiguous(dst)) {
|
||||||
ggml_compute_forward_dup_same_cont(params, dst);
|
ggml_compute_forward_dup_same_cont(params, dst);
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
GGML_TENSOR_UNARY_OP_LOCALS;
|
|
||||||
|
|
||||||
const size_t type_size = ggml_type_size(src0->type);
|
const size_t type_size = ggml_type_size(src0->type);
|
||||||
const int ith = params->ith; // thread index
|
const int ith = params->ith; // thread index
|
||||||
const int nth = params->nth; // number of threads
|
const int nth = params->nth; // number of threads
|
||||||
|
Loading…
Reference in New Issue
Block a user