From 6a7c82501e3794724ba80bfb9a983810af036803 Mon Sep 17 00:00:00 2001
From: Georgi Gerganov <ggerganov@gmail.com>
Date: Fri, 16 Dec 2022 18:31:17 +0200
Subject: [PATCH] whisper : improve decoding strategy (#244)

- Clear past prompt when there is very short audio left for processing.
  My observation is that in these cases the decoding tends to repeat and
  hallucinate stuff and I think this is induced by the existing prompt
- When we fail to sample timestamp token, retry by clearing the past
  prompt. If it fails again, then we advance the window by 1 second
---
 whisper.cpp | 17 +++++++++++++++--
 1 file changed, 15 insertions(+), 2 deletions(-)

diff --git a/whisper.cpp b/whisper.cpp
index 1bc79967..da35456a 100644
--- a/whisper.cpp
+++ b/whisper.cpp
@@ -2650,10 +2650,17 @@ int whisper_full(
             }
         }
 
+        // of only 1 second left, then stop
         if (seek + 100 >= seek_end) {
             break;
         }
 
+        // if there is a very short audio segment left to process, we remove any past prompt since it tends
+        // to confuse the decoder and often make it repeat or hallucinate stuff
+        if (seek > seek_start && seek + 500 >= seek_end) {
+            prompt_past.clear();
+        }
+
         if (params.encoder_begin_callback) {
             if (params.encoder_begin_callback(ctx, params.encoder_begin_callback_user_data) == false) {
                 fprintf(stderr, "%s: encoder_begin_callback returned false - aborting\n", __func__);
@@ -2780,8 +2787,14 @@ int whisper_full(
         }
 
         if (failed) {
-            fprintf(stderr, "\n%s: failed to generate timestamp token - using fallback strategy\n\n", __func__);
-            seek += 100;
+            // when we fail to sample timestamp token, retry by clearing the past prompt
+            // if it fails again, then we advance the window by 1 second
+            if (prompt_past.size() > 0) {
+                prompt_past.clear();
+            } else {
+                fprintf(stderr, "\n%s: failed to generate timestamp token - skipping one second\n\n", __func__);
+                seek += 100;
+            }
             continue;
         }