talk-llama : sync llama.cpp

This commit is contained in:
Georgi Gerganov
2024-01-27 17:24:53 +02:00
parent 7fe3ed5e00
commit ef3c9ed9eb
3 changed files with 832 additions and 276 deletions

View File

@ -107,6 +107,7 @@ extern "C" {
LLAMA_FTYPE_MOSTLY_IQ2_XXS = 19, // except 1d tensors
LLAMA_FTYPE_MOSTLY_IQ2_XS = 20, // except 1d tensors
LLAMA_FTYPE_MOSTLY_Q2_K_S = 21, // except 1d tensors
LLAMA_FTYPE_MOSTLY_Q3_K_XS = 22, // except 1d tensors
LLAMA_FTYPE_GUESSED = 1024, // not specified in the model file
};
@ -774,6 +775,14 @@ extern "C" {
float p,
size_t min_keep);
/// @details Dynamic temperature implementation described in the paper https://arxiv.org/abs/2309.02772.
LLAMA_API void llama_sample_entropy(
struct llama_context * ctx,
llama_token_data_array * candidates_p,
float min_temp,
float max_temp,
float exponent_val);
LLAMA_API void llama_sample_temp(
struct llama_context * ctx,
llama_token_data_array * candidates,