examples : add wer cli example

This commit add as suggestion for a Word Error Rate calculation
example.

The motivation for this is that this could be used for WER testing.
Usage:
```console
$ ./build/bin/whisper-wer
Usage: ./build/bin/whisper-wer [options]
Options:
  -r, --reference PATH   Full path to reference transcriptions directory
  -a, --actual PATH      Full path to actual transcriptions directory
  --help                 Display this help message
```

Example usage:
```console
$ ./build/bin/whisper-wer -r examples/wer/reference_transcriptions/ \
                          -a examples/wer/actual_transcriptions/
Word Error Rate for : jfk.wav.txt
  Reference words: 22
  Actual words: 22
  Substitutions: 1
  Deletions: 0
  Insertions: 0
  Total edits: 1
  WER: 0.045455
```

A more detailed description can be found in examples/wer/README.md.

A unit test is provided in tests/test-wer.cpp.
```console
$ cmake --build build --target test-wer && \
  ctest -R test-wer --test-dir build --output-on-failure
```
This commit is contained in:
Daniel Bevenius 2025-03-24 05:40:05 +01:00
parent 2c502b3c00
commit d4e11f1d41
10 changed files with 370 additions and 0 deletions

View File

@ -56,6 +56,8 @@ add_library(${TARGET} STATIC
common-whisper.cpp
grammar-parser.h
grammar-parser.cpp
wer.h
wer.cpp
${COMMON_SOURCES_FFMPEG}
)
@ -114,6 +116,7 @@ else()
add_subdirectory(sycl)
endif()
endif (WHISPER_SDL2)
add_subdirectory(wer)
add_subdirectory(deprecation-warning)
endif()

114
examples/wer.cpp Normal file
View File

@ -0,0 +1,114 @@
#include "wer.h"
#include <cstdio>
#include <iostream>
#include <vector>
#include <string>
#include <algorithm>
#include <sstream>
#include <tuple>
std::vector<std::string> split_into_words(const std::string& text) {
std::vector<std::string> words;
std::stringstream ss(text);
std::string word;
while (ss >> word) {
words.push_back(word);
}
return words;
}
std::tuple<int, int, int> count_edit_ops(const std::vector<std::string>& reference,
const std::vector<std::string>& actual) {
int m = reference.size();
int n = actual.size();
// Levenshtein matrix
std::vector<std::vector<int>> l_matrix(m + 1, std::vector<int>(n + 1, 0));
// Initialize the first row and column of the matrix.
for (int i = 0; i <= m; i++) {
l_matrix[i][0] = i;
}
for (int j = 0; j <= n; j++) {
l_matrix[0][j] = j;
}
// Fill the matrix.
for (int i = 1; i <= m; i++) {
for (int j = 1; j <= n; j++) {
if (reference[i-1] == actual[j-1]) {
l_matrix[i][j] = l_matrix[i-1][j-1];
} else {
l_matrix[i][j] = 1 + std::min({
l_matrix[i-1][j], // Deletion (top/above)
l_matrix[i][j-1], // Insertion (left)
l_matrix[i-1][j-1] // Substitution (diagonal)
});
}
}
}
// Start backtracking from the bottom-right corner of the matrix.
int i = m; // rows
int j = n; // columns
int substitutions = 0;
int deletions = 0;
int insertions = 0;
// Backtrack to find the edit operations.
while (i > 0 || j > 0) {
if (i > 0 && j > 0 && reference[i-1] == actual[j-1]) {
// Recalll that reference and actual are vectors, so this is just checking
// the same position in both to see if they are equal. If they are equal
// this means there was no edit operation, so we move diagonally.
i--;
j--;
} else if (i > 0 && j > 0 && l_matrix[i][j] == l_matrix[i-1][j-1] + 1) {
// Check the if the current cell is equal to the diagonal cell + 1
// (for the operation cost), which means we have a substitution.
substitutions++;
i--;
j--;
} else if (i > 0 && l_matrix[i][j] == l_matrix[i-1][j] + 1) {
// Check if the current cell is equal the top/above cell + 1
// (for the operation cost) which means we have a deletion.
deletions++;
i--;
} else {
// If there there was no match for the diagonal cell or the top/above
// cell, then we must be at the left cell, which means we have an insertion.
insertions++;
j--;
}
}
return {substitutions, deletions, insertions};
}
wer_result calculate_wer(const std::string& reference_text, const std::string& actual_text) {
std::vector<std::string> reference = split_into_words(reference_text);
std::vector<std::string> actual = split_into_words(actual_text);
auto [n_sub, n_del, n_ins] = count_edit_ops(reference, actual);
int n_edits = n_sub + n_del + n_ins;
double wer = 0.0;
if (!reference.empty()) {
wer = static_cast<double>(n_edits) / reference.size();
}
return wer_result{
/* n_ref_words */ reference.size(),
/* n_act_words */ actual.size(),
/* n_sub */ n_sub,
/* n_del */ n_del,
/* n_ins */ n_ins,
/* n_edits */ n_edits,
/* wer */ wer
};
}

18
examples/wer.h Normal file
View File

@ -0,0 +1,18 @@
#ifndef WER_H
#define WER_H
#include <vector>
#include <string>
struct wer_result {
size_t n_ref_words; // Number of words in the reference text.
size_t n_act_words; // Number of words in the actual (transcribed) text.
int n_sub; // Number of substitutions.
int n_del; // Number of deletions.
int n_ins; // Number of insertions.
int n_edits; // Total number of edits.
double wer; // The word error rate.
};
wer_result calculate_wer(const std::string& reference_text, const std::string& actual_text);
#endif // WER_H

View File

@ -0,0 +1,8 @@
set(TARGET whisper-wer)
add_executable(${TARGET} cli.cpp)
include(DefaultTargetOptions)
target_link_libraries(${TARGET} PRIVATE common ${CMAKE_THREAD_LIBS_INIT})
install(TARGETS ${TARGET} RUNTIME)

51
examples/wer/README.md Normal file
View File

@ -0,0 +1,51 @@
# whisper.cpp/examples/wer
This is a command line tool for calculating the Word Error Rate (WER). This tool
expects that reference transcriptions (the known correct transcriptions)
and acutual transcriptions from whisper.cpp are available in two separate
directories where the file names are the identical.
### Usage
```console
$ ./build/bin/whisper-wer
Usage: ./build/bin/whisper-wer [options]
Options:
-r, --reference PATH Full path to reference transcriptions directory
-a, --actual PATH Full path to actual transcriptions directory
--help Display this help message
```
### Example Usage with whisper-cli
First, generate transcription(s) using whisper-cli:
```
./build/bin/whisper-cli \
-m models/ggml-base.en.bin \
-f samples/jfk.wav \
--output-txt
...
output_txt: saving output to 'samples/jfk.wav.txt'
```
Next, copy the transcription to a directory where the actual transcriptions
are stored. In this example we will use a directory called `actual_transcriptions`
in this examples directory:
```console
$ cp samples/jfk.wav.txt examples/wer/actual_transcriptions
```
In a real world scenario the reference transcriptions would be available
representing the known correct text. In this case we have already placed a file
in `examples/wer/reference_transcriptions` that can be used for testing, where
only a single word was changed (`Americans` -> `Swedes`).
Finally, run the whisper-wer tool:
```console
$ ./build/bin/whisper-wer -r examples/wer/reference_transcriptions/ -a examples/wer/actual_transcriptions/
Word Error Rate for : jfk.wav.txt
Reference words: 22
Actual words: 22
Substitutions: 1
Deletions: 0
Insertions: 0
Total edits: 1
WER: 0.045455
```

View File

@ -0,0 +1 @@
And so my fellow Americans, ask not what your country can do for you, ask what you can do for your country.

148
examples/wer/cli.cpp Normal file
View File

@ -0,0 +1,148 @@
#include "wer.h"
#include <cstdio>
#include <vector>
#include <string>
#include <filesystem>
#include <fstream>
#include <cstring>
#include <map>
std::vector<std::string> read_files_from_directory(const std::string& dir_path) {
std::vector<std::string> file_paths;
try {
for (const auto& entry : std::filesystem::directory_iterator(dir_path)) {
if (entry.is_regular_file() && entry.path().extension() == ".txt") {
file_paths.push_back(entry.path().string());
}
}
} catch (const std::filesystem::filesystem_error& e) {
printf("Error reading directory %s: %s\n", dir_path.c_str(), e.what());
}
return file_paths;
}
std::string read_file_content(const std::string& file_path) {
std::ifstream file(file_path);
std::string content;
if (file.is_open()) {
std::string line;
while (std::getline(file, line)) {
content += line + "\n";
}
file.close();
} else {
printf("Unable to open file: %s\n", file_path.c_str());
}
return content;
}
std::string get_base_filename(const std::string& path) {
return std::filesystem::path(path).filename().string();
}
void print_usage(const char* program_name) {
printf("Usage: %s [options]\n", program_name);
printf("Options:\n");
printf(" -r, --reference PATH Full path to reference transcriptions directory\n");
printf(" -a, --actual PATH Full path to actual transcriptions directory\n");
printf(" --help Display this help message\n");
}
int main(int argc, char** argv) {
if (argc == 1) {
print_usage(argv[0]);
return 0;
}
std::string reference_path;
std::string actual_path;
bool reference_set = false;
bool actual_set = false;
for (int i = 1; i < argc; i++) {
if (strcmp(argv[i], "--help") == 0) {
print_usage(argv[0]);
return 0;
} else if (strcmp(argv[i], "-r") == 0 || strcmp(argv[i], "--reference") == 0) {
if (i + 1 < argc) {
reference_path = argv[++i];
reference_set = true;
} else {
printf("Error: Missing path after %s\n", argv[i]);
print_usage(argv[0]);
return 1;
}
} else if (strcmp(argv[i], "-a") == 0 || strcmp(argv[i], "--actual") == 0) {
if (i + 1 < argc) {
actual_path = argv[++i];
actual_set = true;
} else {
printf("Error: Missing path after %s\n", argv[i]);
print_usage(argv[0]);
return 1;
}
} else {
printf("Error: Unknown option: %s\n", argv[i]);
print_usage(argv[0]);
return 1;
}
}
if (!reference_set || !actual_set) {
printf("Error: Both reference and actual paths must be provided\n");
print_usage(argv[0]);
return 1;
}
if (!std::filesystem::exists(reference_path) || !std::filesystem::is_directory(reference_path)) {
printf("Error: Reference path '%s' does not exist or is not a directory\n", reference_path.c_str());
return 1;
}
if (!std::filesystem::exists(actual_path) || !std::filesystem::is_directory(actual_path)) {
printf("Error: Actual path '%s' does not exist or is not a directory\n", actual_path.c_str());
return 1;
}
std::vector<std::string> reference_files = read_files_from_directory(reference_path);
std::vector<std::string> actual_files = read_files_from_directory(actual_path);
//printf("Found %zu reference files in %s\n", reference_files.size(), reference_path.c_str());
//printf("Found %zu actual files in %s\n", actual_files.size(), actual_path.c_str());
std::map<std::string, std::string> reference_map;
std::map<std::string, std::string> actual_map;
for (const auto& file : reference_files) {
reference_map[get_base_filename(file)] = file;
}
for (const auto& file : actual_files) {
actual_map[get_base_filename(file)] = file;
}
for (const auto& [filename, ref_path] : reference_map) {
auto actual_it = actual_map.find(filename);
if (actual_it != actual_map.end()) {
std::string reference_content = read_file_content(ref_path);
std::string actual_content = read_file_content(actual_it->second);
wer_result result = calculate_wer(reference_content, actual_content);
printf("Word Error Rate for : %s\n", filename.c_str());
printf(" Reference words: %ld\n", result.n_ref_words);
printf(" Actual words: %ld\n", result.n_act_words);
printf(" Substitutions: %d\n", result.n_sub);
printf(" Deletions: %d\n", result.n_del);
printf(" Insertions: %d\n", result.n_ins);
printf(" Total edits: %d\n", result.n_edits);
printf(" WER: %f\n", result.wer);
} else {
printf("Warning: No matching actual file found for reference file: %s\n", filename.c_str());
}
}
return 0;
}

View File

@ -0,0 +1 @@
And so my fellow Swedes, ask not what your country can do for you, ask what you can do for your country.

View File

@ -85,3 +85,9 @@ if (WHISPER_FFMPEG)
set_tests_properties(${TEST_TARGET} PROPERTIES LABELS "tiny;mp3")
endif()
# WER Unit Test
add_executable(test-wer test-wer.cpp)
target_include_directories(test-wer PRIVATE ../examples)
target_link_libraries(test-wer PRIVATE common)
add_test(NAME test-wer COMMAND test-wer)
set_tests_properties(test-wer PROPERTIES LABELS "unit")

20
tests/test-wer.cpp Normal file
View File

@ -0,0 +1,20 @@
#include "wer.h"
#include <cassert>
#include <cstdio>
int main() {
std::string reference = "the cat sat on the mat";
std::string actual = "the cat sat mat";
wer_result result = calculate_wer(reference, actual);
assert(result.n_ref_words == 6);
assert(result.n_act_words == 4);
assert(result.n_sub == 0);
assert(result.n_del == 2);
assert(result.n_ins == 0);
assert(result.n_edits == 2);
assert(std::abs(result.wer - 0.333333) < 0.0001);
return 0;
}