whisper : remove speed_up and phase_vocoder* functions (#2198)

* whisper : fix cast warning * whisper : remove phase_vocoder functions, ref #2195 * whisper : remove speed_up from whisper_full_params, closes #2195
2025-06-14 12:58:10 +00:00 · 2024-05-31 11:37:29 +03:00
parent b87494bb8f
commit af5833e298
20 changed files with 14 additions and 161 deletions
--- a/examples/lsp/lsp.cpp
+++ b/examples/lsp/lsp.cpp
@ -26,7 +26,6 @@ struct whisper_params {
    float vad_thold    = 0.6f;
    float freq_thold   = 100.0f;

-    bool speed_up      = false;
    bool translate     = false;
    bool print_special = false;
    bool print_energy  = false;
@ -70,7 +69,6 @@ bool whisper_params_parse(int argc, char ** argv, whisper_params & params) {
        else if (arg == "-ac"  || arg == "--audio-ctx")     { params.audio_ctx     = std::stoi(argv[++i]); }
        else if (arg == "-vth" || arg == "--vad-thold")     { params.vad_thold     = std::stof(argv[++i]); }
        else if (arg == "-fth" || arg == "--freq-thold")    { params.freq_thold    = std::stof(argv[++i]); }
-        else if (arg == "-su"  || arg == "--speed-up")      { params.speed_up      = true; }
        else if (arg == "-tr"  || arg == "--translate")     { params.translate     = true; }
        else if (arg == "-ps"  || arg == "--print-special") { params.print_special = true; }
        else if (arg == "-pe"  || arg == "--print-energy")  { params.print_energy  = true; }
@ -102,7 +100,6 @@ void whisper_print_usage(int /*argc*/, char ** argv, const whisper_params & para
    fprintf(stderr, "  -ac N,      --audio-ctx N    [%-7d] audio context size (0 - all)\n",                params.audio_ctx);
    fprintf(stderr, "  -vth N,     --vad-thold N    [%-7.2f] voice activity detection threshold\n",        params.vad_thold);
    fprintf(stderr, "  -fth N,     --freq-thold N   [%-7.2f] high-pass frequency cutoff\n",                params.freq_thold);
-    fprintf(stderr, "  -su,        --speed-up       [%-7s] speed up audio by x2 (reduced accuracy)\n",     params.speed_up ? "true" : "false");
    fprintf(stderr, "  -tr,        --translate      [%-7s] translate from source language to english\n",   params.translate ? "true" : "false");
    fprintf(stderr, "  -ps,        --print-special  [%-7s] print special tokens\n",                        params.print_special ? "true" : "false");
    fprintf(stderr, "  -pe,        --print-energy   [%-7s] print sound energy (for debugging)\n",          params.print_energy ? "true" : "false");
@ -184,7 +181,6 @@ json unguided_transcription(struct whisper_context * ctx, audio_async &audio, js
    wparams.n_threads        = params.n_threads;

    wparams.audio_ctx        = params.audio_ctx;
-    wparams.speed_up         = params.speed_up;
    wparams.suppress_non_speech_tokens = true;
    // run the transformer and a single decoding pass
    if (whisper_full(ctx, wparams, pcmf32.data(), pcmf32.size()) != 0) {
@ -223,7 +219,6 @@ json guided_transcription(struct whisper_context * ctx, audio_async &audio, cons
    wparams.n_threads        = params.n_threads;

    wparams.audio_ctx        = params.audio_ctx;
-    wparams.speed_up         = params.speed_up;

    // TODO: Do some time testing. Does an overly long prompt slow down processing?
    // Set up command sets/precompute prompts