From a593b932e4233df0d4a9e884ad5847420dfed0f8 Mon Sep 17 00:00:00 2001 From: Niels Mayer Date: Thu, 29 Dec 2022 04:04:00 -0800 Subject: [PATCH] main : add -ocsv, aka --output-csv to output a CSV file Adds -ocsv, aka --output-csv feature to examples/main, which outputs a CSV file containing lines formatted as follows , , "". --- examples/main/main.cpp | 36 ++++++++++++++++++++++++++++++++++++ 1 file changed, 36 insertions(+) diff --git a/examples/main/main.cpp b/examples/main/main.cpp index 6e991b79..ce8b484d 100644 --- a/examples/main/main.cpp +++ b/examples/main/main.cpp @@ -69,6 +69,7 @@ struct whisper_params { bool output_vtt = false; bool output_srt = false; bool output_wts = false; + bool output_csv = false; bool print_special = false; bool print_colors = false; bool print_progress = false; @@ -111,6 +112,7 @@ bool whisper_params_parse(int argc, char ** argv, whisper_params & params) { else if (arg == "-ovtt" || arg == "--output-vtt") { params.output_vtt = true; } else if (arg == "-osrt" || arg == "--output-srt") { params.output_srt = true; } else if (arg == "-owts" || arg == "--output-words") { params.output_wts = true; } + else if (arg == "-ocsv" || arg == "--output-csv") { params.output_csv = true; } else if (arg == "-ps" || arg == "--print-special") { params.print_special = true; } else if (arg == "-pc" || arg == "--print-colors") { params.print_colors = true; } else if (arg == "-pp" || arg == "--print-progress") { params.print_progress = true; } @@ -150,6 +152,7 @@ void whisper_print_usage(int /*argc*/, char ** argv, const whisper_params & para fprintf(stderr, " -ovtt, --output-vtt [%-7s] output result in a vtt file\n", params.output_vtt ? "true" : "false"); fprintf(stderr, " -osrt, --output-srt [%-7s] output result in a srt file\n", params.output_srt ? "true" : "false"); fprintf(stderr, " -owts, --output-words [%-7s] output script for generating karaoke video\n", params.output_wts ? "true" : "false"); + fprintf(stderr, " -ocsv, --output-csv [%-7s] output result in a CSV file\n", params.output_csv ? "true" : "false"); fprintf(stderr, " -ps, --print-special [%-7s] print special tokens\n", params.print_special ? "true" : "false"); fprintf(stderr, " -pc, --print-colors [%-7s] print colors\n", params.print_colors ? "true" : "false"); fprintf(stderr, " -pp, --print-progress [%-7s] print progress\n", params.print_progress ? "true" : "false"); @@ -325,6 +328,32 @@ bool output_srt(struct whisper_context * ctx, const char * fname, const whisper_ return true; } +bool output_csv(struct whisper_context * ctx, const char * fname) { + std::ofstream fout(fname); + if (!fout.is_open()) { + fprintf(stderr, "%s: failed to open '%s' for writing\n", __func__, fname); + return false; + } + + fprintf(stderr, "%s: saving output to '%s'\n", __func__, fname); + + const int n_segments = whisper_full_n_segments(ctx); + for (int i = 0; i < n_segments; ++i) { + const char * text = whisper_full_get_segment_text(ctx, i); + if (text[0] == ' ') + text = text + sizeof(char); //whisper_full_get_segment_text() returns a string with leading space, point to the next character. + const int64_t t0 = whisper_full_get_segment_t0(ctx, i); + const int64_t t1 = whisper_full_get_segment_t1(ctx, i); + //need to multiply times returned from whisper_full_get_segment_t{0,1}() by 10 to get milliseconds. + fout << 10 * t0 << ", " + << 10 * t1 << ", \"" + << text << "\"\n"; + } + + return true; +} + + // karaoke video generation // outputs a bash script that uses ffmpeg to generate a video with the subtitles // TODO: font parameter adjustments @@ -674,6 +703,13 @@ int main(int argc, char ** argv) { const auto fname_wts = fname_inp + ".wts"; output_wts(ctx, fname_wts.c_str(), fname_inp.c_str(), params, float(pcmf32.size() + 1000)/WHISPER_SAMPLE_RATE); } + + // output to CSV file + if (params.output_csv) { + const auto fname_csv = fname_inp + ".csv"; + output_csv(ctx, fname_csv.c_str()); + } + } }