mirror of
https://github.com/mudler/LocalAI.git
synced 2025-05-07 19:18:33 +00:00
fix(llama.cpp): enable cont batching when parallel is set (#1622)
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
This commit is contained in:
parent
94261b1717
commit
697c769b64
@ -2465,10 +2465,10 @@ static void params_parse(const backend::ModelOptions* request,
|
|||||||
const char *env_parallel = std::getenv("LLAMACPP_PARALLEL");
|
const char *env_parallel = std::getenv("LLAMACPP_PARALLEL");
|
||||||
if (env_parallel != NULL) {
|
if (env_parallel != NULL) {
|
||||||
params.n_parallel = std::stoi(env_parallel);
|
params.n_parallel = std::stoi(env_parallel);
|
||||||
|
params.cont_batching = true;
|
||||||
} else {
|
} else {
|
||||||
params.n_parallel = 1;
|
params.n_parallel = 1;
|
||||||
}
|
}
|
||||||
params.cont_batching = true;
|
|
||||||
// TODO: Add yarn
|
// TODO: Add yarn
|
||||||
|
|
||||||
if (!request->tensorsplit().empty()) {
|
if (!request->tensorsplit().empty()) {
|
||||||
|
Loading…
x
Reference in New Issue
Block a user