mirror of
https://github.com/ggerganov/whisper.cpp.git
synced 2025-06-15 21:38:07 +00:00
whisper : restore decoder temperature fallbacks
I disabled this because there were many complaints about slow decoding. The current implementation does not allow batching the decoders when using the "best of" or "beam size" parameters, so the decoding time is proportional to the number of decoders, which is obviously not great. However, now there are even more complaints about wrong decodings and repetition. So, making a compromise by re-enabling the fallbacks, but defaulting to just 2 "best of" / "beam size" decoders. Also, the temperature step is increased from 0.2 to 0.4 - i.e. from maximum of 5 fallbacks to maximum of 2. Also, the stream example now has fallbacks enabled by default. close #471 #477 #508 #612 #719 #731
This commit is contained in:
@ -3220,7 +3220,7 @@ struct whisper_full_params whisper_full_default_params(enum whisper_sampling_str
|
||||
/*.max_initial_ts =*/ 1.0f,
|
||||
/*.length_penalty =*/ -1.0f,
|
||||
|
||||
/*.temperature_inc =*/ 0.0f, // TODO: temporary disabled until improve performance
|
||||
/*.temperature_inc =*/ 0.4f,
|
||||
/*.entropy_thold =*/ 2.4f,
|
||||
/*.logprob_thold =*/ -1.0f,
|
||||
/*.no_speech_thold =*/ 0.6f,
|
||||
@ -3252,13 +3252,13 @@ struct whisper_full_params whisper_full_default_params(enum whisper_sampling_str
|
||||
case WHISPER_SAMPLING_GREEDY:
|
||||
{
|
||||
result.greedy = {
|
||||
/*.best_of =*/ 1,
|
||||
/*.best_of =*/ 2, // TODO: increase to 5 when we speed-up batch decoding
|
||||
};
|
||||
} break;
|
||||
case WHISPER_SAMPLING_BEAM_SEARCH:
|
||||
{
|
||||
result.beam_search = {
|
||||
/*.beam_size =*/ 5,
|
||||
/*.beam_size =*/ 2, // TODO: increase to 5 when we speed-up batch decoding
|
||||
|
||||
/*.patience =*/ -1.0f,
|
||||
};
|
||||
|
Reference in New Issue
Block a user