mirror of
https://github.com/ggerganov/whisper.cpp.git
synced 2024-12-18 20:27:53 +00:00
whisper : add single-timestamp logic (#2629)
* Fix hallucinations during silence When the predicted tokens end with a single timestamp the the entire 30 segment should be considered as done, to avoid hallucinations for the remaining part of segment. This behaviour is on par with openai's whisper. Refer to logic related to `single_timestamp_ending` in https://github.com/openai/whisper/blob/main/whisper/transcribe.py * Accept review comments related to formatting. Co-authored-by: Georgi Gerganov <ggerganov@gmail.com> --------- Co-authored-by: Georgi Gerganov <ggerganov@gmail.com>
This commit is contained in:
parent
09a1b61218
commit
2f2841bfce
@ -6060,7 +6060,7 @@ int whisper_full_with_state(
|
||||
{
|
||||
const auto & best_decoder = state->decoders[best_decoder_id];
|
||||
|
||||
const auto seek_delta = best_decoder.seek_delta;
|
||||
auto seek_delta = best_decoder.seek_delta;
|
||||
const auto result_len = best_decoder.sequence.result_len;
|
||||
|
||||
const auto & tokens_cur = best_decoder.sequence.tokens;
|
||||
@ -6201,6 +6201,15 @@ int whisper_full_with_state(
|
||||
}
|
||||
}
|
||||
|
||||
// ref: https://github.com/ggerganov/whisper.cpp/pull/2629
|
||||
const bool single_timestamp_ending = tokens_cur.size() > 1 &&
|
||||
tokens_cur[tokens_cur.size() - 2].id < whisper_token_beg(ctx) &&
|
||||
tokens_cur[tokens_cur.size() - 1].id > whisper_token_beg(ctx);
|
||||
if (single_timestamp_ending) {
|
||||
WHISPER_LOG_DEBUG("single timestamp ending - skip entire chunk\n");
|
||||
seek_delta = std::min(seek_end - seek, WHISPER_CHUNK_SIZE * 100);
|
||||
}
|
||||
|
||||
// update audio window
|
||||
seek += seek_delta;
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user