chore(deps): update llama.cpp (#3438)

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2025-06-18 06:58:09 +00:00 · 2024-08-31 01:21:45 +02:00
parent 72f97e62bb
commit b8e7a76524
2 changed files with 3 additions and 3 deletions
--- a/2
+++ b/2
@ -8,7 +8,7 @@ DETECT_LIBS?=true
 # llama.cpp versions
 GOLLAMA_REPO?=https://github.com/go-skynet/go-llama.cpp
 GOLLAMA_VERSION?=2b57a8ae43e4699d3dc5d1496a1ccd42922993be
-CPPLLAMA_VERSION?=9fe94ccac92693d4ae1bc283ff0574e8b3f4e765
+CPPLLAMA_VERSION?=0ab30f8d82fc7156b750c194d64a887e80cbfb82

 # go-rwkv version
 RWKV_REPO?=https://github.com/donomii/go-rwkv.cpp
--- a/backend/cpp/llama/grpc-server.cpp
+++ b/backend/cpp/llama/grpc-server.cpp
@ -1119,7 +1119,7 @@ struct llama_server_context
                continue;
            }

-            if (!llava_image_embed_make_with_clip_img(clp_ctx, params.n_threads, img.img_data, &img.image_embedding, &img.image_tokens)) {
+            if (!llava_image_embed_make_with_clip_img(clp_ctx, params.cpuparams.n_threads, img.img_data, &img.image_embedding, &img.image_tokens)) {
                LOG_TEE("Error processing the given image");
                return false;
            }
@ -2210,7 +2210,7 @@ static void params_parse(const backend::ModelOptions* request,
    params.model_alias =  request->modelfile();
    params.n_ctx = request->contextsize();
    //params.memory_f16 = request->f16memory();
-    params.n_threads = request->threads();
+    params.cpuparams.n_threads = request->threads();
    params.n_gpu_layers = request->ngpulayers();
    params.n_batch = request->nbatch();
    // Set params.n_parallel by environment variable (LLAMA_PARALLEL), defaults to 1