From 4a7d49af9550725b5095fdcba3306a075b3ea32e Mon Sep 17 00:00:00 2001 From: Georgi Gerganov Date: Sun, 30 Apr 2023 19:12:49 +0300 Subject: [PATCH] examples : fix + refactor Levenshtein distance --- examples/command.wasm/emscripten.cpp | 25 ------------------------- examples/command/command.cpp | 25 ------------------------- examples/common.cpp | 24 ++++++++++++++++++++++++ examples/common.h | 2 ++ 4 files changed, 26 insertions(+), 50 deletions(-) diff --git a/examples/command.wasm/emscripten.cpp b/examples/command.wasm/emscripten.cpp index 1cfd0637..e739656d 100644 --- a/examples/command.wasm/emscripten.cpp +++ b/examples/command.wasm/emscripten.cpp @@ -28,31 +28,6 @@ std::string g_transcribed = ""; std::vector g_pcmf32; -// compute similarity between two strings using Levenshtein distance -static float similarity(const std::string & s0, const std::string & s1) { - const size_t len0 = s0.size() + 1; - const size_t len1 = s1.size() + 1; - - std::vector col(len1, 0); - std::vector prevCol(len1, 0); - - for (size_t i = 0; i < len1; i++) { - prevCol[i] = i; - } - - for (size_t i = 0; i < len0; i++) { - col[0] = i; - for (size_t j = 1; j < len1; j++) { - col[j] = std::min(std::min(1 + col[j - 1], 1 + prevCol[j]), prevCol[j - 1] + (s0[i - 1] == s1[j - 1] ? 0 : 1)); - } - col.swap(prevCol); - } - - const float dist = prevCol[len1 - 1]; - - return 1.0f - (dist / std::max(s0.size(), s1.size())); -} - void command_set_status(const std::string & status) { std::lock_guard lock(g_mutex); g_status = status; diff --git a/examples/command/command.cpp b/examples/command/command.cpp index 2b9440a8..54e3549f 100644 --- a/examples/command/command.cpp +++ b/examples/command/command.cpp @@ -163,31 +163,6 @@ std::string transcribe(whisper_context * ctx, const whisper_params & params, con return result; } -// compute similarity between two strings using Levenshtein distance -float similarity(const std::string & s0, const std::string & s1) { - const size_t len0 = s0.size() + 1; - const size_t len1 = s1.size() + 1; - - std::vector col(len1, 0); - std::vector prevCol(len1, 0); - - for (size_t i = 0; i < len1; i++) { - prevCol[i] = i; - } - - for (size_t i = 0; i < len0; i++) { - col[0] = i; - for (size_t j = 1; j < len1; j++) { - col[j] = std::min(std::min(1 + col[j - 1], 1 + prevCol[j]), prevCol[j - 1] + (s0[i - 1] == s1[j - 1] ? 0 : 1)); - } - col.swap(prevCol); - } - - const float dist = prevCol[len1 - 1]; - - return 1.0f - (dist / std::max(s0.size(), s1.size())); -} - std::vector read_allowed_commands(const std::string & fname) { std::vector allowed_commands; diff --git a/examples/common.cpp b/examples/common.cpp index ed31efe9..019a8efa 100644 --- a/examples/common.cpp +++ b/examples/common.cpp @@ -479,3 +479,27 @@ bool vad_simple(std::vector & pcmf32, int sample_rate, int last_ms, float return true; } + +float similarity(const std::string & s0, const std::string & s1) { + const size_t len0 = s0.size() + 1; + const size_t len1 = s1.size() + 1; + + std::vector col(len1, 0); + std::vector prevCol(len1, 0); + + for (size_t i = 0; i < len1; i++) { + prevCol[i] = i; + } + + for (size_t i = 0; i < len0; i++) { + col[0] = i; + for (size_t j = 1; j < len1; j++) { + col[j] = std::min(std::min(1 + col[j - 1], 1 + prevCol[j]), prevCol[j - 1] + (i > 0 && s0[i - 1] == s1[j - 1] ? 0 : 1)); + } + col.swap(prevCol); + } + + const float dist = prevCol[len1 - 1]; + + return 1.0f - (dist / std::max(s0.size(), s1.size())); +} diff --git a/examples/common.h b/examples/common.h index b08e5760..48252cee 100644 --- a/examples/common.h +++ b/examples/common.h @@ -118,3 +118,5 @@ bool vad_simple( float freq_thold, bool verbose); +// compute similarity between two strings using Levenshtein distance +float similarity(const std::string & s0, const std::string & s1);