From b5bb5c85d4ce050aa6970310b594ab5e7a1a4fab Mon Sep 17 00:00:00 2001 From: Georgi Gerganov Date: Sun, 27 Aug 2023 20:02:57 +0300 Subject: [PATCH] whisper : allow whisper_full from mel spectrogram - no audio (#1214) Co-authored-by: jbrough --- whisper.cpp | 28 ++++++++++++++-------------- 1 file changed, 14 insertions(+), 14 deletions(-) diff --git a/whisper.cpp b/whisper.cpp index 9cdb2714..e7f760b6 100644 --- a/whisper.cpp +++ b/whisper.cpp @@ -3140,7 +3140,6 @@ int whisper_decode(struct whisper_context * ctx, const whisper_token * tokens, i return false; } - if (!whisper_decode_internal(*ctx, *ctx->state, ctx->state->decoders[selected_decoder_id], tokens, n_tokens, n_past, n_threads)) { log("%s: failed to eval\n", __func__); return 1; @@ -3374,7 +3373,6 @@ float * whisper_get_logits(struct whisper_context * ctx) { return ctx->state->logits.data(); } - float * whisper_get_logits_from_state(struct whisper_state * state) { return state->logits.data(); } @@ -4087,15 +4085,17 @@ int whisper_full_with_state( result_all.clear(); - // compute log mel spectrogram - if (params.speed_up) { - // TODO: Replace PV with more advanced algorithm - log("%s: failed to compute log mel spectrogram\n", __func__); - return -1; - } else { - if (whisper_pcm_to_mel_with_state(ctx, state, samples, n_samples, params.n_threads) != 0) { + if (n_samples > 0) { + // compute log mel spectrogram + if (params.speed_up) { + // TODO: Replace PV with more advanced algorithm log("%s: failed to compute log mel spectrogram\n", __func__); - return -2; + return -1; + } else { + if (whisper_pcm_to_mel_with_state(ctx, state, samples, n_samples, params.n_threads) != 0) { + log("%s: failed to compute log mel spectrogram\n", __func__); + return -2; + } } } @@ -4121,7 +4121,9 @@ int whisper_full_with_state( state->t_beg = 0; state->t_last = 0; state->tid_last = 0; - state->energy = get_signal_energy(samples, n_samples, 32); + if (n_samples > 0) { + state->energy = get_signal_energy(samples, n_samples, 32); + } } const int seek_start = params.offset_ms/10; @@ -4258,7 +4260,7 @@ int whisper_full_with_state( while (true) { if (params.progress_callback) { const int progress_cur = (100*(seek - seek_start))/(seek_end - seek_start); - + params.progress_callback( ctx, ctx->state, progress_cur, params.progress_callback_user_data); } @@ -4813,7 +4815,6 @@ int whisper_full_with_state( return 0; } - int whisper_full( struct whisper_context * ctx, struct whisper_full_params params, @@ -4890,7 +4891,6 @@ int whisper_full_parallel( result.t0 += 100 * ((i + 1) * n_samples_per_processor) / WHISPER_SAMPLE_RATE + offset_t; result.t1 += 100 * ((i + 1) * n_samples_per_processor) / WHISPER_SAMPLE_RATE + offset_t; - // make sure that segments are not overlapping if (!ctx->state->result_all.empty()) { result.t0 = std::max(result.t0, ctx->state->result_all.back().t1);