mirror of
https://github.com/mudler/LocalAI.git
synced 2025-06-13 12:38:11 +00:00
feat(llama.cpp): expose cache_type_k and cache_type_v for quant of kv cache (#4329)
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
This commit is contained in:
committed by
GitHub
parent
88737e1d76
commit
d4c1746c7d
@ -2241,6 +2241,12 @@ static void params_parse(const backend::ModelOptions* request,
|
||||
}
|
||||
// params.model_alias ??
|
||||
params.model_alias = request->modelfile();
|
||||
if (!request->cachetypekey().empty()) {
|
||||
params.cache_type_k = request->cachetypekey();
|
||||
}
|
||||
if (!request->cachetypevalue().empty()) {
|
||||
params.cache_type_v = request->cachetypevalue();
|
||||
}
|
||||
params.n_ctx = request->contextsize();
|
||||
//params.memory_f16 = request->f16memory();
|
||||
params.cpuparams.n_threads = request->threads();
|
||||
|
Reference in New Issue
Block a user