mirror of
https://github.com/ggerganov/whisper.cpp.git
synced 2025-01-18 18:56:35 +00:00
stream : add "max_tokens" parameter
Used to limit the number of tokens in a segment. Useful to battle with word repetition when using partial encoder context
This commit is contained in:
parent
d351771a4b
commit
62b5ff875c
@ -322,6 +322,7 @@ int main(int argc, char ** argv) {
|
|||||||
{
|
{
|
||||||
whisper_full_params wparams = whisper_full_default_params(WHISPER_SAMPLING_GREEDY);
|
whisper_full_params wparams = whisper_full_default_params(WHISPER_SAMPLING_GREEDY);
|
||||||
|
|
||||||
|
wparams.max_tokens = 32;
|
||||||
wparams.print_progress = false;
|
wparams.print_progress = false;
|
||||||
wparams.print_special_tokens = params.print_special_tokens;
|
wparams.print_special_tokens = params.print_special_tokens;
|
||||||
wparams.print_realtime = false;
|
wparams.print_realtime = false;
|
||||||
|
@ -2402,6 +2402,7 @@ struct whisper_full_params whisper_full_default_params(enum whisper_sampling_str
|
|||||||
/*.thold_pt =*/ 0.01f,
|
/*.thold_pt =*/ 0.01f,
|
||||||
/*.thold_ptsum =*/ 0.01f,
|
/*.thold_ptsum =*/ 0.01f,
|
||||||
/*.max_len =*/ 0,
|
/*.max_len =*/ 0,
|
||||||
|
/*.max_tokens =*/ 0,
|
||||||
|
|
||||||
/*.speed_up =*/ false,
|
/*.speed_up =*/ false,
|
||||||
|
|
||||||
@ -2443,6 +2444,7 @@ struct whisper_full_params whisper_full_default_params(enum whisper_sampling_str
|
|||||||
/*.thold_pt =*/ 0.01f,
|
/*.thold_pt =*/ 0.01f,
|
||||||
/*.thold_ptsum =*/ 0.01f,
|
/*.thold_ptsum =*/ 0.01f,
|
||||||
/*.max_len =*/ 0,
|
/*.max_len =*/ 0,
|
||||||
|
/*.max_tokens =*/ 0,
|
||||||
|
|
||||||
/*.speed_up =*/ false,
|
/*.speed_up =*/ false,
|
||||||
|
|
||||||
@ -2685,7 +2687,7 @@ int whisper_full(
|
|||||||
//}
|
//}
|
||||||
|
|
||||||
// end of text token
|
// end of text token
|
||||||
if (token.id == whisper_token_eot(ctx) || (i > WHISPER_EXPERIMENT_MAX_TOKENS_PER_SEGMENT)) {
|
if (token.id == whisper_token_eot(ctx) || (params.max_tokens > 0 && i > params.max_tokens)) {
|
||||||
if (result_len == 0) {
|
if (result_len == 0) {
|
||||||
if (seek + seek_delta + 100 >= seek_end) {
|
if (seek + seek_delta + 100 >= seek_end) {
|
||||||
result_len = i + 1;
|
result_len = i + 1;
|
||||||
|
@ -25,7 +25,6 @@
|
|||||||
#define WHISPER_CHUNK_SIZE 30
|
#define WHISPER_CHUNK_SIZE 30
|
||||||
|
|
||||||
#define WHISPER_EXPERIMENT_AUDIO_CTX 512
|
#define WHISPER_EXPERIMENT_AUDIO_CTX 512
|
||||||
#define WHISPER_EXPERIMENT_MAX_TOKENS_PER_SEGMENT 32
|
|
||||||
|
|
||||||
#ifdef __cplusplus
|
#ifdef __cplusplus
|
||||||
extern "C" {
|
extern "C" {
|
||||||
@ -205,6 +204,7 @@ extern "C" {
|
|||||||
float thold_pt; // timestamp token probability threshold (~0.01)
|
float thold_pt; // timestamp token probability threshold (~0.01)
|
||||||
float thold_ptsum; // timestamp token sum probability threshold (~0.01)
|
float thold_ptsum; // timestamp token sum probability threshold (~0.01)
|
||||||
int max_len; // max segment length in characters
|
int max_len; // max segment length in characters
|
||||||
|
int max_tokens; // max tokens per segment (0 = no limit)
|
||||||
|
|
||||||
// [EXPERIMENTAL] speed-up techniques
|
// [EXPERIMENTAL] speed-up techniques
|
||||||
bool speed_up; // speed-up the audio by 2x using Phase Vocoder
|
bool speed_up; // speed-up the audio by 2x using Phase Vocoder
|
||||||
|
Loading…
Reference in New Issue
Block a user