server : add no-speech threshold parameter and functionality (#2654)

This commit is contained in:
Sacha Arbonel
2024-12-21 16:00:08 +01:00
committed by GitHub
parent f4668169a0
commit 4183517076
3 changed files with 15 additions and 3 deletions

View File

@ -428,6 +428,7 @@ struct whisper_segment {
int64_t t1;
std::string text;
float no_speech_prob;
std::vector<whisper_token_data> tokens;
@ -6147,7 +6148,7 @@ int whisper_full_with_state(
//printf("tt0 = %d, tt1 = %d, text = %s, token = %s, token_id = %d, tid = %d\n", tt0, tt1, text.c_str(), ctx->vocab.id_to_token[tokens_cur[i].id].c_str(), tokens_cur[i].id, tokens_cur[i].tid);
result_all.push_back({ tt0, tt1, text, {}, speaker_turn_next });
result_all.push_back({ tt0, tt1, text, state->no_speech_prob, {}, speaker_turn_next });
for (int j = i0; j <= i; j++) {
result_all.back().tokens.push_back(tokens_cur[j]);
}
@ -6192,7 +6193,7 @@ int whisper_full_with_state(
}
}
result_all.push_back({ tt0, tt1, text, {} , speaker_turn_next });
result_all.push_back({ tt0, tt1, text, state->no_speech_prob, {}, speaker_turn_next });
for (int j = i0; j < (int) tokens_cur.size(); j++) {
result_all.back().tokens.push_back(tokens_cur[j]);
}
@ -6459,6 +6460,10 @@ float whisper_full_get_token_p(struct whisper_context * ctx, int i_segment, int
return ctx->state->result_all[i_segment].tokens[i_token].p;
}
float whisper_full_get_segment_no_speech_prob(struct whisper_context * ctx, int i_segment) {
return ctx->state->result_all[i_segment].no_speech_prob;
}
// =================================================================================================
//