From f25edade2b84e06e65d6b82d3ab0170946c90ca5 Mon Sep 17 00:00:00 2001 From: Georgi Gerganov Date: Mon, 12 Feb 2024 16:32:26 +0200 Subject: [PATCH] whisper : alternative way to handle the external encoders --- whisper.cpp | 33 +++++++-------------------------- 1 file changed, 7 insertions(+), 26 deletions(-) diff --git a/whisper.cpp b/whisper.cpp index d8d3b84c..a1472834 100644 --- a/whisper.cpp +++ b/whisper.cpp @@ -1659,19 +1659,16 @@ static struct ggml_cgraph * whisper_build_graph_conv( ggml_set_name(cur, "embd_conv"); wstate.embd_conv = cur; } else { - // keep the "mel" tensor alive - we will use it to store the input data for the external encoders - // TODO: is there a better way to do this - mel = ggml_scale(ctx0, mel, 1.0f); - ggml_build_forward_expand(gf, mel); - - cur = ggml_new_tensor_2d(ctx0, GGML_TYPE_F32, n_state, n_ctx); + // transform the "mel" tensor to "embd_enc" via a sequence of ggml ops + // these are not actually executed when using external encoder + // necessary only to prepare tensors with the appropriate memory sizes + cur = ggml_conv_1d_ph(ctx0, model.e_conv_1_w, mel, 1, 1); // (conv) + cur = ggml_conv_1d_ph(ctx0, model.e_conv_2_w, cur, 2, 1); // (conv) + cur = ggml_add(ctx0, model.e_pe, ggml_cont(ctx0, ggml_transpose(ctx0, cur))); // (cross) ggml_set_name(cur, "embd_enc"); ggml_set_output(cur); - wstate.embd_enc = cur; - // TODO: without this op, the "embd_enc" tensor ends up being not allocated - // is there a better fix? - cur = ggml_scale(ctx0, cur, 1.0f); + wstate.embd_enc = cur; } ggml_build_forward_expand(gf, cur); @@ -1702,14 +1699,6 @@ static struct ggml_cgraph * whisper_build_graph_encoder( ggml_cgraph * gf = ggml_new_graph_custom(ctx0, WHISPER_MAX_NODES, false); - //ggml_allocr * alloc = wstate.alloc_encode.alloc; - - //struct ggml_tensor * cur = ggml_new_tensor_2d(ctx0, GGML_TYPE_F32, n_ctx, n_state); - //ggml_allocr_alloc(alloc, cur); - - //if (!ggml_allocr_is_measure(alloc)) { - // ggml_backend_tensor_copy(wstate.embd_conv, cur); - //} struct ggml_tensor * cur = ggml_view_tensor(ctx0, wstate.embd_conv); const float KQscale = 1.0f/sqrtf(float(n_state)/n_head); @@ -1951,14 +1940,6 @@ static struct ggml_cgraph * whisper_build_graph_cross( ggml_cgraph * gf = ggml_new_graph(ctx0); - //ggml_allocr * alloc = wstate.alloc_cross.alloc; - - //struct ggml_tensor * cur = ggml_new_tensor_2d(ctx0, GGML_TYPE_F32, n_state, n_ctx); - //ggml_allocr_alloc(alloc, cur); - - //if (!ggml_allocr_is_measure(alloc)) { - // ggml_backend_tensor_copy(wstate.embd_enc, cur); - //} struct ggml_tensor * cur = ggml_view_tensor(ctx0, wstate.embd_enc); const float Kscale = pow(float(n_state) / n_head, -0.25);