mirror of
https://github.com/ggerganov/whisper.cpp.git
synced 2025-01-18 18:56:35 +00:00
whisper : split_on_word
no longer trims (#1046)
This commit is contained in:
parent
3f7a03ebe3
commit
72deb41eb2
28
whisper.cpp
28
whisper.cpp
@ -3401,26 +3401,6 @@ static void whisper_exp_compute_token_level_timestamps(
|
||||
float thold_pt,
|
||||
float thold_ptsum);
|
||||
|
||||
// trim from start (in place)
|
||||
static inline void ltrim(std::string &s) {
|
||||
s.erase(s.begin(), std::find_if_not(s.begin(), s.end(), [](unsigned char ch) {
|
||||
return std::isspace(ch);
|
||||
}));
|
||||
}
|
||||
|
||||
// trim from end (in place)
|
||||
static inline void rtrim(std::string &s) {
|
||||
s.erase(std::find_if_not(s.rbegin(), s.rend(), [](unsigned char ch) {
|
||||
return std::isspace(ch);
|
||||
}).base(), s.end());
|
||||
}
|
||||
|
||||
// trim from both ends (in place)
|
||||
static inline void trim(std::string &s) {
|
||||
rtrim(s);
|
||||
ltrim(s);
|
||||
}
|
||||
|
||||
static inline bool should_split_on_word(const char * txt, bool split_on_word) {
|
||||
if (!split_on_word) return true;
|
||||
|
||||
@ -3447,11 +3427,6 @@ static int whisper_wrap_segment(struct whisper_context & ctx, struct whisper_sta
|
||||
const int cur = strlen(txt);
|
||||
|
||||
if (acc + cur > max_len && i > 0 && should_split_on_word(txt, split_on_word)) {
|
||||
// split here
|
||||
if (split_on_word) {
|
||||
trim(text);
|
||||
}
|
||||
|
||||
state.result_all.back().text = std::move(text);
|
||||
state.result_all.back().t1 = token.t0;
|
||||
state.result_all.back().tokens.resize(i);
|
||||
@ -3479,9 +3454,6 @@ static int whisper_wrap_segment(struct whisper_context & ctx, struct whisper_sta
|
||||
}
|
||||
}
|
||||
|
||||
if (split_on_word) {
|
||||
trim(text);
|
||||
}
|
||||
state.result_all.back().text = std::move(text);
|
||||
|
||||
return res;
|
||||
|
Loading…
Reference in New Issue
Block a user