talk-llama : llama.cpp

This commit is contained in:
Georgi Gerganov
2024-01-14 11:06:28 +02:00
parent d08445c9ad
commit 2a5874441d
2 changed files with 132 additions and 36 deletions

View File

@ -249,6 +249,7 @@ extern "C" {
bool quantize_output_tensor; // quantize output.weight
bool only_copy; // only copy tensors - ftype, allow_requantize and quantize_output_tensor are ignored
bool pure; // disable k-quant mixtures and quantize all tensors to the same type
void * imatrix; // pointer to importance matrix data
} llama_model_quantize_params;
// grammar types