mirror of
https://github.com/ggerganov/whisper.cpp.git
synced 2024-12-18 20:27:53 +00:00
whisper : add whisper_token_count helper
This commit is contained in:
parent
5c2c07d479
commit
ba69578828
@ -3731,6 +3731,10 @@ int whisper_tokenize(struct whisper_context * ctx, const char * text, whisper_to
|
|||||||
return res.size();
|
return res.size();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
int whisper_token_count(struct whisper_context * ctx, const char * text) {
|
||||||
|
return -whisper_tokenize(ctx, text, NULL, 0);
|
||||||
|
}
|
||||||
|
|
||||||
int whisper_lang_max_id() {
|
int whisper_lang_max_id() {
|
||||||
auto max_id = 0;
|
auto max_id = 0;
|
||||||
for (const auto & kv : g_lang) {
|
for (const auto & kv : g_lang) {
|
||||||
|
@ -345,6 +345,10 @@ extern "C" {
|
|||||||
whisper_token * tokens,
|
whisper_token * tokens,
|
||||||
int n_max_tokens);
|
int n_max_tokens);
|
||||||
|
|
||||||
|
// Return the number of tokens in the provided text
|
||||||
|
// Equivalent to: -whisper_tokenize(ctx, text, NULL, 0)
|
||||||
|
int whisper_token_count(struct whisper_context * ctx, const char * text);
|
||||||
|
|
||||||
// Largest language id (i.e. number of available languages - 1)
|
// Largest language id (i.e. number of available languages - 1)
|
||||||
WHISPER_API int whisper_lang_max_id();
|
WHISPER_API int whisper_lang_max_id();
|
||||||
|
|
||||||
@ -504,7 +508,7 @@ extern "C" {
|
|||||||
// tokens to provide to the whisper decoder as initial prompt
|
// tokens to provide to the whisper decoder as initial prompt
|
||||||
// these are prepended to any existing text context from a previous call
|
// these are prepended to any existing text context from a previous call
|
||||||
// use whisper_tokenize() to convert text to tokens
|
// use whisper_tokenize() to convert text to tokens
|
||||||
// maximum of whisper_n_text_ctx()/2 tokens are used
|
// maximum of whisper_n_text_ctx()/2 tokens are used (typically 224)
|
||||||
const char * initial_prompt;
|
const char * initial_prompt;
|
||||||
const whisper_token * prompt_tokens;
|
const whisper_token * prompt_tokens;
|
||||||
int prompt_n_tokens;
|
int prompt_n_tokens;
|
||||||
|
Loading…
Reference in New Issue
Block a user