whisper : restore decoder temperature fallbacks

I disabled this because there were many complaints about slow decoding. The current implementation does not allow batching the decoders when using the "best of" or "beam size" parameters, so the decoding time is proportional to the number of decoders, which is obviously not great. However, now there are even more complaints about wrong decodings and repetition. So, making a compromise by re-enabling the fallbacks, but defaulting to just 2 "best of" / "beam size" decoders. Also, the temperature step is increased from 0.2 to 0.4 - i.e. from maximum of 5 fallbacks to maximum of 2. Also, the stream example now has fallbacks enabled by default. close #471 #477 #508 #612 #719 #731
2025-06-15 21:38:07 +00:00 · 2023-04-15 16:04:07 +03:00
parent ea1f8a50d4
commit f19e23fbd1
3 changed files with 25 additions and 21 deletions
--- a/whisper.cpp
+++ b/whisper.cpp
@ -3220,7 +3220,7 @@ struct whisper_full_params whisper_full_default_params(enum whisper_sampling_str
        /*.max_initial_ts   =*/  1.0f,
        /*.length_penalty   =*/ -1.0f,

-        /*.temperature_inc  =*/  0.0f, // TODO: temporary disabled until improve performance
+        /*.temperature_inc  =*/  0.4f,
        /*.entropy_thold    =*/  2.4f,
        /*.logprob_thold    =*/ -1.0f,
        /*.no_speech_thold  =*/  0.6f,
@ -3252,13 +3252,13 @@ struct whisper_full_params whisper_full_default_params(enum whisper_sampling_str
        case WHISPER_SAMPLING_GREEDY:
            {
                result.greedy = {
-                    /*.best_of   =*/ 1,
+                    /*.best_of   =*/ 2, // TODO: increase to 5 when we speed-up batch decoding
                };
            } break;
        case WHISPER_SAMPLING_BEAM_SEARCH:
            {
                result.beam_search = {
-                    /*.beam_size =*/ 5,
+                    /*.beam_size =*/ 2, // TODO: increase to 5 when we speed-up batch decoding

                    /*.patience  =*/ -1.0f,
                };