From 72deb41eb26300f71c50febe29db8ffcce09256c Mon Sep 17 00:00:00 2001 From: Georgi Gerganov Date: Sun, 25 Jun 2023 23:51:01 +0300 Subject: [PATCH] whisper : `split_on_word` no longer trims (#1046) --- whisper.cpp | 28 ---------------------------- 1 file changed, 28 deletions(-) diff --git a/whisper.cpp b/whisper.cpp index 74cfd7b2..5f3888c7 100644 --- a/whisper.cpp +++ b/whisper.cpp @@ -3401,26 +3401,6 @@ static void whisper_exp_compute_token_level_timestamps( float thold_pt, float thold_ptsum); -// trim from start (in place) -static inline void ltrim(std::string &s) { - s.erase(s.begin(), std::find_if_not(s.begin(), s.end(), [](unsigned char ch) { - return std::isspace(ch); - })); -} - -// trim from end (in place) -static inline void rtrim(std::string &s) { - s.erase(std::find_if_not(s.rbegin(), s.rend(), [](unsigned char ch) { - return std::isspace(ch); - }).base(), s.end()); -} - -// trim from both ends (in place) -static inline void trim(std::string &s) { - rtrim(s); - ltrim(s); -} - static inline bool should_split_on_word(const char * txt, bool split_on_word) { if (!split_on_word) return true; @@ -3447,11 +3427,6 @@ static int whisper_wrap_segment(struct whisper_context & ctx, struct whisper_sta const int cur = strlen(txt); if (acc + cur > max_len && i > 0 && should_split_on_word(txt, split_on_word)) { - // split here - if (split_on_word) { - trim(text); - } - state.result_all.back().text = std::move(text); state.result_all.back().t1 = token.t0; state.result_all.back().tokens.resize(i); @@ -3479,9 +3454,6 @@ static int whisper_wrap_segment(struct whisper_context & ctx, struct whisper_sta } } - if (split_on_word) { - trim(text); - } state.result_all.back().text = std::move(text); return res;