mirror of
https://github.com/ggerganov/whisper.cpp.git
synced 2025-06-13 04:28:07 +00:00
whisper : suppress tokens with a regex (#1997)
* Allow a regular expression to describe tokens to suppress. Example: --suppress-tokens-re "[,\.]|[ ]?[0-9]+" will suppress commas, periods, and numeric tokens. Technique inspired by https://github.com/openai/whisper/discussions/1041 Co-authored-by: Georgi Gerganov <ggerganov@gmail.com> * Blind change to fix Java test. --------- Co-authored-by: Georgi Gerganov <ggerganov@gmail.com>
This commit is contained in:
@ -52,6 +52,9 @@ struct whisper_params {
|
||||
std::string prompt;
|
||||
std::string context;
|
||||
std::string grammar;
|
||||
|
||||
// A regular expression that matches tokens to suppress
|
||||
std::string suppress_regex;
|
||||
};
|
||||
|
||||
void whisper_print_usage(int argc, char ** argv, const whisper_params & params);
|
||||
@ -85,6 +88,7 @@ bool whisper_params_parse(int argc, char ** argv, whisper_params & params) {
|
||||
else if (arg == "-ctx" || arg == "--context") { params.context = argv[++i]; }
|
||||
else if ( arg == "--grammar") { params.grammar = argv[++i]; }
|
||||
else if ( arg == "--grammar-penalty") { params.grammar_penalty = std::stof(argv[++i]); }
|
||||
else if ( arg == "--suppress-regex") { params.suppress_regex = argv[++i]; }
|
||||
else {
|
||||
fprintf(stderr, "error: unknown argument: %s\n", arg.c_str());
|
||||
whisper_print_usage(argc, argv, params);
|
||||
@ -122,6 +126,7 @@ void whisper_print_usage(int /*argc*/, char ** argv, const whisper_params & para
|
||||
fprintf(stderr, " -ctx, --context [%-7s] sample text to help the transcription\n", params.context.c_str());
|
||||
fprintf(stderr, " --grammar GRAMMAR [%-7s] GBNF grammar to guide decoding\n", params.grammar.c_str());
|
||||
fprintf(stderr, " --grammar-penalty N [%-7.1f] scales down logits of nongrammar tokens\n", params.grammar_penalty);
|
||||
fprintf(stderr, " --suppress-regex REGEX [%-7s] regular expression matching tokens to suppress\n", params.suppress_regex.c_str());
|
||||
fprintf(stderr, "\n");
|
||||
}
|
||||
|
||||
@ -167,6 +172,8 @@ std::string transcribe(
|
||||
|
||||
wparams.initial_prompt = params.context.data();
|
||||
|
||||
wparams.suppress_regex = params.suppress_regex.c_str();
|
||||
|
||||
const auto & grammar_parsed = params.grammar_parsed;
|
||||
auto grammar_rules = grammar_parsed.c_rules();
|
||||
|
||||
|
Reference in New Issue
Block a user