mirror of
https://github.com/mudler/LocalAI.git
synced 2024-12-19 20:57:54 +00:00
fix(llama.cpp): enable cont batching when parallel is set (#1622)
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
This commit is contained in:
parent
94261b1717
commit
697c769b64
@ -2465,10 +2465,10 @@ static void params_parse(const backend::ModelOptions* request,
|
||||
const char *env_parallel = std::getenv("LLAMACPP_PARALLEL");
|
||||
if (env_parallel != NULL) {
|
||||
params.n_parallel = std::stoi(env_parallel);
|
||||
params.cont_batching = true;
|
||||
} else {
|
||||
params.n_parallel = 1;
|
||||
}
|
||||
params.cont_batching = true;
|
||||
// TODO: Add yarn
|
||||
|
||||
if (!request->tensorsplit().empty()) {
|
||||
|
Loading…
Reference in New Issue
Block a user