diff --git a/Makefile b/Makefile
index 11904d1b..fd9c7627 100644
--- a/Makefile
+++ b/Makefile
@@ -8,7 +8,7 @@ DETECT_LIBS?=true
 # llama.cpp versions
 GOLLAMA_REPO?=https://github.com/go-skynet/go-llama.cpp
 GOLLAMA_VERSION?=2b57a8ae43e4699d3dc5d1496a1ccd42922993be
-CPPLLAMA_VERSION?=2f0ee84b9b02d2a98742308026f060ebdc2423f1
+CPPLLAMA_VERSION?=4b0c638b9a68f577cb2066b638c9f622d91ee661
 
 # whisper.cpp version
 WHISPER_REPO?=https://github.com/ggerganov/whisper.cpp
diff --git a/backend/cpp/llama/grpc-server.cpp b/backend/cpp/llama/grpc-server.cpp
index 98dd8fde..7632aebc 100644
--- a/backend/cpp/llama/grpc-server.cpp
+++ b/backend/cpp/llama/grpc-server.cpp
@@ -492,8 +492,8 @@ struct llama_server_context
         }
 
         common_init_result common_init = common_init_from_params(params);
-        model = common_init.model;
-        ctx = common_init.context;
+        model = common_init.model.release();
+        ctx = common_init.context.release();
         if (model == nullptr)
         {
             LOG_ERR("unable to load model: %s", params.model.c_str());