mirror of
https://github.com/ggerganov/whisper.cpp.git
synced 2025-06-17 14:28:07 +00:00
llava : add MobileVLM support (llama/5132)
* New Feature: 1. Sum_Rows: fix cuda kernel overflow fix block shape error when nrows too big 2. Im2Col: Support Batch in cuda Support f32 to f32 both in cpu && cuda 3. DepthWiseConv: Support by Im2Col && MulMat 4. Pool_2d: Supoort avg pooling in cuda 5. HardSigmoid: Imp in cuda 6. HardSwish: Imp in cuda * fix tabs instead of spaces * code clean * CUDA POOL2D * ADD POOL2D test case in test-backend-ops.cpp * code clean * fix pool2d_kernel nits * fix bug in pool2d kernel * fix avg pooling, count_include_pad nits * test-backend-ops : add more pool_2d tests * cuda : fix warnings and formatting * ggml : check types in release builds too in pool_2d * test-backend-ops : remove f16 pool_2d tests * cuda : more style fixes * Add assert in ggml_cuda_op_pool2d * pool2d float padding fallback * test-backend-ops : add dst_type to im2col --------- Co-authored-by: slaren <slarengh@gmail.com>
This commit is contained in:
committed by
Georgi Gerganov
parent
fc7b0e2c28
commit
12c462d656
118
ggml.c
118
ggml.c
@ -5349,7 +5349,7 @@ GGML_API struct ggml_tensor * ggml_conv_1d(
|
||||
int s0,
|
||||
int p0,
|
||||
int d0) {
|
||||
struct ggml_tensor * im2col = ggml_im2col(ctx, a, b, s0, 0, p0, 0, d0, 0, false); // [N, OL, IC * K]
|
||||
struct ggml_tensor * im2col = ggml_im2col(ctx, a, b, s0, 0, p0, 0, d0, 0, false, GGML_TYPE_F16); // [N, OL, IC * K]
|
||||
|
||||
struct ggml_tensor * result =
|
||||
ggml_mul_mat(ctx,
|
||||
@ -5427,16 +5427,15 @@ struct ggml_tensor * ggml_conv_depthwise_2d(
|
||||
int p1,
|
||||
int d0,
|
||||
int d1) {
|
||||
|
||||
struct ggml_tensor * new_a = ggml_reshape_4d(ctx, a, a->ne[0], a->ne[1], 1, a->ne[2] * a->ne[3]);
|
||||
struct ggml_tensor * im2col = ggml_im2col(ctx, new_a,
|
||||
ggml_reshape_4d(ctx, b, b->ne[0], b->ne[1], 1, b->ne[2] * b->ne[3]),
|
||||
s0, s1, p0, p1, d0, d1, true); // [N * IC, OH, OW, KH * KW]
|
||||
|
||||
struct ggml_tensor * result =
|
||||
ggml_mul_mat(ctx,
|
||||
ggml_reshape_4d(ctx, new_a, (new_a->ne[0] * new_a->ne[1]), new_a->ne[2], new_a->ne[3], 1), // [OC,1, KH, KW] => [1, OC, 1, KH * KW]
|
||||
ggml_reshape_4d(ctx, im2col, im2col->ne[0], im2col->ne[2] * im2col->ne[1], b->ne[2], b->ne[3])); // [N * IC, OH, OW, KH * KW] => [N, IC, OH * OW, KH * KW]
|
||||
s0, s1, p0, p1, d0, d1, true, GGML_TYPE_F16); // [N * IC, OH, OW, KH * KW]
|
||||
struct ggml_tensor * new_b = ggml_reshape_4d(ctx, im2col, im2col->ne[0], im2col->ne[2] * im2col->ne[1], b->ne[2], b->ne[3]); // [N * IC, OH, OW, KH * KW] => [N, IC, OH * OW, KH * KW]
|
||||
|
||||
new_a = ggml_reshape_4d(ctx, new_a, (new_a->ne[0] * new_a->ne[1]), new_a->ne[2], new_a->ne[3], 1); // [OC,1, KH, KW] => [1, OC, 1, KH * KW]
|
||||
struct ggml_tensor * result = ggml_mul_mat(ctx, new_a, new_b);
|
||||
result = ggml_reshape_4d(ctx, result, im2col->ne[1], im2col->ne[2], b->ne[2], b->ne[3]); // [N, OC, OH, OW]
|
||||
|
||||
return result;
|
||||
@ -5457,7 +5456,8 @@ struct ggml_tensor * ggml_im2col(
|
||||
int p1,
|
||||
int d0,
|
||||
int d1,
|
||||
bool is_2D) {
|
||||
bool is_2D,
|
||||
enum ggml_type dst_type) {
|
||||
|
||||
if(is_2D) {
|
||||
GGML_ASSERT(a->ne[2] == b->ne[2]);
|
||||
@ -5481,7 +5481,7 @@ struct ggml_tensor * ggml_im2col(
|
||||
is_2D ? b->ne[3] : 1,
|
||||
};
|
||||
|
||||
struct ggml_tensor * result = ggml_new_tensor(ctx, GGML_TYPE_F16, 4, ne);
|
||||
struct ggml_tensor * result = ggml_new_tensor(ctx, dst_type, 4, ne);
|
||||
int32_t params[] = { s0, s1, p0, p1, d0, d1, (is_2D ? 1 : 0) };
|
||||
ggml_set_op_params(result, params, sizeof(params));
|
||||
|
||||
@ -5506,7 +5506,7 @@ struct ggml_tensor * ggml_conv_2d(
|
||||
int p1,
|
||||
int d0,
|
||||
int d1) {
|
||||
struct ggml_tensor * im2col = ggml_im2col(ctx, a, b, s0, s1, p0, p1, d0, d1, true); // [N, OH, OW, IC * KH * KW]
|
||||
struct ggml_tensor * im2col = ggml_im2col(ctx, a, b, s0, s1, p0, p1, d0, d1, true, GGML_TYPE_F16); // [N, OH, OW, IC * KH * KW]
|
||||
|
||||
struct ggml_tensor * result =
|
||||
ggml_mul_mat(ctx,
|
||||
@ -5632,12 +5632,13 @@ struct ggml_tensor * ggml_pool_2d(
|
||||
is_node = true;
|
||||
}
|
||||
|
||||
struct ggml_tensor * result;
|
||||
const int64_t ne[3] = {
|
||||
ggml_calc_pool_output_size(a->ne[0], k0, s0, p0),
|
||||
ggml_calc_pool_output_size(a->ne[1], k1, s1, p1),
|
||||
a->ne[2],
|
||||
};
|
||||
struct ggml_tensor * result = ggml_new_tensor(ctx, GGML_TYPE_F32, 3, ne);
|
||||
result = ggml_new_tensor(ctx, GGML_TYPE_F32, 3, ne);
|
||||
|
||||
int32_t params[] = { op, k0, k1, s0, s1, p0, p1 };
|
||||
ggml_set_op_params(result, params, sizeof(params));
|
||||
@ -5645,7 +5646,6 @@ struct ggml_tensor * ggml_pool_2d(
|
||||
result->op = GGML_OP_POOL_2D;
|
||||
result->grad = is_node ? ggml_dup_tensor(ctx, result) : NULL;
|
||||
result->src[0] = a;
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
@ -12493,6 +12493,92 @@ static void ggml_compute_forward_conv_transpose_1d(
|
||||
}
|
||||
}
|
||||
|
||||
// src0: kernel [OC, IC, KH, KW]
|
||||
// src1: image [N, IC, IH, IW]
|
||||
// dst: result [N, OH, OW, IC*KH*KW]
|
||||
static void ggml_compute_forward_im2col_f32(
|
||||
const struct ggml_compute_params * params,
|
||||
const struct ggml_tensor * src0,
|
||||
const struct ggml_tensor * src1,
|
||||
struct ggml_tensor * dst) {
|
||||
GGML_ASSERT(src0->type == GGML_TYPE_F16);
|
||||
GGML_ASSERT(src1->type == GGML_TYPE_F32);
|
||||
GGML_ASSERT( dst->type == GGML_TYPE_F32);
|
||||
|
||||
int64_t t0 = ggml_perf_time_us();
|
||||
UNUSED(t0);
|
||||
|
||||
GGML_TENSOR_BINARY_OP_LOCALS;
|
||||
|
||||
const int32_t s0 = ((const int32_t *)(dst->op_params))[0];
|
||||
const int32_t s1 = ((const int32_t *)(dst->op_params))[1];
|
||||
const int32_t p0 = ((const int32_t *)(dst->op_params))[2];
|
||||
const int32_t p1 = ((const int32_t *)(dst->op_params))[3];
|
||||
const int32_t d0 = ((const int32_t *)(dst->op_params))[4];
|
||||
const int32_t d1 = ((const int32_t *)(dst->op_params))[5];
|
||||
const bool is_2D = ((const int32_t *)(dst->op_params))[6] == 1;
|
||||
|
||||
const int ith = params->ith;
|
||||
const int nth = params->nth;
|
||||
|
||||
const int64_t N = is_2D ? ne13 : ne12;
|
||||
const int64_t IC = is_2D ? ne12 : ne11;
|
||||
const int64_t IH = is_2D ? ne11 : 1;
|
||||
const int64_t IW = ne10;
|
||||
|
||||
const int64_t KH = is_2D ? ne01 : 1;
|
||||
const int64_t KW = ne00;
|
||||
|
||||
const int64_t OH = is_2D ? ne2 : 1;
|
||||
const int64_t OW = ne1;
|
||||
|
||||
int ofs0 = is_2D ? nb13 : nb12;
|
||||
int ofs1 = is_2D ? nb12 : nb11;
|
||||
|
||||
GGML_ASSERT(nb00 == sizeof(ggml_fp16_t));
|
||||
GGML_ASSERT(nb10 == sizeof(float));
|
||||
|
||||
if (params->type == GGML_TASK_INIT) {
|
||||
return;
|
||||
}
|
||||
|
||||
if (params->type == GGML_TASK_FINALIZE) {
|
||||
return;
|
||||
}
|
||||
|
||||
// im2col: [N, IC, IH, IW] => [N, OH, OW, IC*KH*KW]
|
||||
{
|
||||
float * const wdata = (float *) dst->data;
|
||||
|
||||
for (int64_t in = 0; in < N; in++) {
|
||||
for (int64_t ioh = 0; ioh < OH; ioh++) { // 1
|
||||
for (int64_t iow = 0; iow < OW; iow++) {
|
||||
for (int64_t iic = ith; iic < IC; iic += nth) {
|
||||
|
||||
// micro kernel
|
||||
float * dst_data = wdata + (in*OH*OW + ioh*OW + iow)*(IC*KH*KW); // [IC, KH, KW]
|
||||
const float * const src_data = (float *)((char *) src1->data + in*ofs0 + iic*ofs1); // [IH, IW]
|
||||
|
||||
for (int64_t ikh = 0; ikh < KH; ikh++) { // 1
|
||||
for (int64_t ikw = 0; ikw < KW; ikw++) {
|
||||
const int64_t iiw = iow*s0 + ikw*d0 - p0;
|
||||
const int64_t iih = ioh*s1 + ikh*d1 - p1;
|
||||
|
||||
if (iih < 0 || iih >= IH || iiw < 0 || iiw >= IW) {
|
||||
dst_data[iic*(KH*KW) + ikh*KW + ikw] = 0;
|
||||
} else {
|
||||
dst_data[iic*(KH*KW) + ikh*KW + ikw] = (src_data[iih*IW + iiw]);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
// src0: kernel [OC, IC, KH, KW]
|
||||
// src1: image [N, IC, IH, IW]
|
||||
// dst: result [N, OH, OW, IC*KH*KW]
|
||||
@ -12583,14 +12669,14 @@ static void ggml_compute_forward_im2col(
|
||||
const struct ggml_tensor * src0,
|
||||
const struct ggml_tensor * src1,
|
||||
struct ggml_tensor * dst) {
|
||||
switch (src0->type) {
|
||||
switch (dst->type) {
|
||||
case GGML_TYPE_F16:
|
||||
{
|
||||
ggml_compute_forward_im2col_f16(params, src0, src1, dst);
|
||||
} break;
|
||||
case GGML_TYPE_F32:
|
||||
{
|
||||
GGML_ASSERT(false);
|
||||
ggml_compute_forward_im2col_f32(params, src0, src1, dst);
|
||||
} break;
|
||||
default:
|
||||
{
|
||||
@ -12781,8 +12867,8 @@ static void ggml_compute_forward_pool_2d(
|
||||
const struct ggml_compute_params * params,
|
||||
const struct ggml_tensor * src,
|
||||
struct ggml_tensor * dst) {
|
||||
assert(src->type == GGML_TYPE_F32);
|
||||
assert(params->ith == 0);
|
||||
GGML_ASSERT(src->type == GGML_TYPE_F32);
|
||||
GGML_ASSERT(params->ith == 0);
|
||||
|
||||
if (params->type == GGML_TASK_INIT || params->type == GGML_TASK_FINALIZE) {
|
||||
return;
|
||||
|
Reference in New Issue
Block a user