talk-llama : llama.cpp

2025-06-17 22:38:07 +00:00 · 2024-01-14 11:06:28 +02:00
parent d08445c9ad
commit 2a5874441d
2 changed files with 132 additions and 36 deletions
--- a/examples/talk-llama/llama.h
+++ b/examples/talk-llama/llama.h
@ -249,6 +249,7 @@ extern "C" {
        bool quantize_output_tensor; // quantize output.weight
        bool only_copy;              // only copy tensors - ftype, allow_requantize and quantize_output_tensor are ignored
        bool pure;                   // disable k-quant mixtures and quantize all tensors to the same type
+        void * imatrix;              // pointer to importance matrix data
    } llama_model_quantize_params;

    // grammar types