From f389d7e3e56bbbfec49fd333551927a0fcbb7213 Mon Sep 17 00:00:00 2001 From: Daniel Bevenius Date: Wed, 14 May 2025 19:21:48 +0200 Subject: [PATCH] examples : add --print-confidence option to cli (#3150) * examples : add --print-confidence option to cli This commit adds a new command-line option `--print-confidence` to the whisper-cli. When enabled, this option prints the confidence level of each token in the transcribed text using ANSI formatting codes. The confidence levels are represented using different styles: ```console main: confidence: highlighted (low confidence), underlined (medium), dim (high confidence) ``` Refs: https://github.com/ggml-org/whisper.cpp/issues/3135 --- examples/cli/cli.cpp | 25 +++++++++++++++++++++++++ examples/common.h | 20 ++++++++++++++++++++ 2 files changed, 45 insertions(+) diff --git a/examples/cli/cli.cpp b/examples/cli/cli.cpp index 7d5d0ffe..2a0f940d 100644 --- a/examples/cli/cli.cpp +++ b/examples/cli/cli.cpp @@ -70,6 +70,7 @@ struct whisper_params { bool no_prints = false; bool print_special = false; bool print_colors = false; + bool print_confidence= false; bool print_progress = false; bool no_timestamps = false; bool log_score = false; @@ -179,6 +180,7 @@ static bool whisper_params_parse(int argc, char ** argv, whisper_params & params else if (arg == "-np" || arg == "--no-prints") { params.no_prints = true; } else if (arg == "-ps" || arg == "--print-special") { params.print_special = true; } else if (arg == "-pc" || arg == "--print-colors") { params.print_colors = true; } + else if ( arg == "--print-confidence"){ params.print_confidence= true; } else if (arg == "-pp" || arg == "--print-progress") { params.print_progress = true; } else if (arg == "-nt" || arg == "--no-timestamps") { params.no_timestamps = true; } else if (arg == "-l" || arg == "--language") { params.language = whisper_param_turn_lowercase(ARGV_NEXT); } @@ -257,6 +259,7 @@ static void whisper_print_usage(int /*argc*/, char ** argv, const whisper_params fprintf(stderr, " -np, --no-prints [%-7s] do not print anything other than the results\n", params.no_prints ? "true" : "false"); fprintf(stderr, " -ps, --print-special [%-7s] print special tokens\n", params.print_special ? "true" : "false"); fprintf(stderr, " -pc, --print-colors [%-7s] print colors\n", params.print_colors ? "true" : "false"); + fprintf(stderr, " --print-confidence [%-7s] print confidence\n", params.print_confidence ? "true" : "false"); fprintf(stderr, " -pp, --print-progress [%-7s] print progress\n", params.print_progress ? "true" : "false"); fprintf(stderr, " -nt, --no-timestamps [%-7s] do not print timestamps\n", params.no_timestamps ? "true" : "false"); fprintf(stderr, " -l LANG, --language LANG [%-7s] spoken language ('auto' for auto-detect)\n", params.language.c_str()); @@ -386,6 +389,26 @@ static void whisper_print_segment_callback(struct whisper_context * ctx, struct printf("%s%s%s%s", speaker.c_str(), k_colors[col].c_str(), text, "\033[0m"); } + } else if (params.print_confidence) { + for (int j = 0; j < whisper_full_n_tokens(ctx, i); ++j) { + if (params.print_special == false) { + const whisper_token id = whisper_full_get_token_id(ctx, i, j); + if (id >= whisper_token_eot(ctx)) { + continue; + } + } + + const char * text = whisper_full_get_token_text(ctx, i, j); + const float p = whisper_full_get_token_p (ctx, i, j); + + int style_idx = 2; // High confidence - dim + if (p < 0.33) { + style_idx = 0; // Low confidence - inverse (highlighted) + } else if (p < 0.66) { + style_idx = 1; // Medium confidence - underlined + } + printf("%s%s%s%s", speaker.c_str(), k_styles[style_idx].c_str(), text, "\033[0m"); + } } else { const char * text = whisper_full_get_segment_text(ctx, i); @@ -1115,6 +1138,8 @@ int main(int argc, char ** argv) { if (params.print_colors) { fprintf(stderr, "%s: color scheme: red (low confidence), yellow (medium), green (high confidence)\n", __func__); + } else if (params.print_confidence) { + fprintf(stderr, "%s: confidence: highlighted (low confidence), underlined (medium), dim (high confidence)\n", __func__); } fprintf(stderr, "\n"); } diff --git a/examples/common.h b/examples/common.h index 1aa76381..8f99df7d 100644 --- a/examples/common.h +++ b/examples/common.h @@ -294,6 +294,26 @@ const std::vector k_colors = { set_xterm256_foreground( 78, 178, 101), }; +// ANSI formatting codes +static std::string set_inverse() { + return "\033[7m"; +} + +static std::string set_underline() { + return "\033[4m"; +} + +static std::string set_dim() { + return "\033[2m"; +} + +// Style scheme for different confidence levels +const std::vector k_styles = { + set_inverse(), // Low confidence - inverse (highlighted) + set_underline(), // Medium confidence - underlined + set_dim(), // High confidence - dim +}; + // // Other utils //