examples : fix n_gpu_layers usage in talk-llama (#1441)

This commit is contained in:
Jhen-Jie Hong 2023-11-07 09:36:23 +08:00 committed by GitHub
parent 0463028bc2
commit 3989b29a9b
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23

View File

@ -266,6 +266,9 @@ int main(int argc, char ** argv) {
llama_backend_init(true); llama_backend_init(true);
auto lmparams = llama_model_default_params(); auto lmparams = llama_model_default_params();
if (!params.use_gpu) {
lcparams.lmparams = 0;
}
struct llama_model * model_llama = llama_load_model_from_file(params.model_llama.c_str(), lmparams); struct llama_model * model_llama = llama_load_model_from_file(params.model_llama.c_str(), lmparams);
@ -276,9 +279,6 @@ int main(int argc, char ** argv) {
lcparams.seed = 1; lcparams.seed = 1;
lcparams.f16_kv = true; lcparams.f16_kv = true;
lcparams.n_threads = params.n_threads; lcparams.n_threads = params.n_threads;
if (!params.use_gpu) {
lcparams.n_gpu_layers = 0;
}
struct llama_context * ctx_llama = llama_new_context_with_model(model_llama, lcparams); struct llama_context * ctx_llama = llama_new_context_with_model(model_llama, lcparams);