From f9e368b7c4a9604dbfebeb602d08a17d322d5805 Mon Sep 17 00:00:00 2001
From: Ettore Di Giacinto <mudler@users.noreply.github.com>
Date: Thu, 23 Jan 2025 16:35:44 +0100
Subject: [PATCH 01/85] chore(refactor): group cpu cap detection (#4674)

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
---
 pkg/model/initializers.go | 49 ++++++++++++++-------------------------
 1 file changed, 17 insertions(+), 32 deletions(-)

diff --git a/pkg/model/initializers.go b/pkg/model/initializers.go
index d5f1459b..9fc0c18c 100644
--- a/pkg/model/initializers.go
+++ b/pkg/model/initializers.go
@@ -66,6 +66,17 @@ const (
 	LocalStoreBackend   = "local-store"
 )
 
+var llamaCPPVariants = []string{
+	LLamaCPPAVX2,
+	LLamaCPPAVX,
+	LLamaCPPFallback,
+	LLamaCPPCUDA,
+	LLamaCPPHipblas,
+	LLamaCPPSycl16,
+	LLamaCPPSycl32,
+	LLamaCPPGRPC,
+}
+
 func backendPath(assetDir, backend string) string {
 	return filepath.Join(assetDir, "backend-assets", "grpc", backend)
 }
@@ -107,40 +118,14 @@ ENTRY:
 	if AutoDetect {
 		// if we find the llama.cpp variants, show them of as a single backend (llama-cpp) as later we are going to pick that up
 		// when starting the service
-		foundLCPPAVX, foundLCPPAVX2, foundLCPPFallback, foundLCPPGRPC, foundLCPPCuda, foundLCPPHipblas, foundSycl16, foundSycl32 := false, false, false, false, false, false, false, false
+		foundVariants := map[string]bool{}
 		if _, ok := backends[LLamaCPP]; !ok {
 			for _, e := range entry {
-				if strings.Contains(e.Name(), LLamaCPPAVX2) && !foundLCPPAVX2 {
-					backends[LLamaCPP] = append(backends[LLamaCPP], LLamaCPPAVX2)
-					foundLCPPAVX2 = true
-				}
-				if strings.Contains(e.Name(), LLamaCPPAVX) && !foundLCPPAVX {
-					backends[LLamaCPP] = append(backends[LLamaCPP], LLamaCPPAVX)
-					foundLCPPAVX = true
-				}
-				if strings.Contains(e.Name(), LLamaCPPFallback) && !foundLCPPFallback {
-					backends[LLamaCPP] = append(backends[LLamaCPP], LLamaCPPFallback)
-					foundLCPPFallback = true
-				}
-				if strings.Contains(e.Name(), LLamaCPPGRPC) && !foundLCPPGRPC {
-					backends[LLamaCPP] = append(backends[LLamaCPP], LLamaCPPGRPC)
-					foundLCPPGRPC = true
-				}
-				if strings.Contains(e.Name(), LLamaCPPCUDA) && !foundLCPPCuda {
-					backends[LLamaCPP] = append(backends[LLamaCPP], LLamaCPPCUDA)
-					foundLCPPCuda = true
-				}
-				if strings.Contains(e.Name(), LLamaCPPHipblas) && !foundLCPPHipblas {
-					backends[LLamaCPP] = append(backends[LLamaCPP], LLamaCPPHipblas)
-					foundLCPPHipblas = true
-				}
-				if strings.Contains(e.Name(), LLamaCPPSycl16) && !foundSycl16 {
-					backends[LLamaCPP] = append(backends[LLamaCPP], LLamaCPPSycl16)
-					foundSycl16 = true
-				}
-				if strings.Contains(e.Name(), LLamaCPPSycl32) && !foundSycl32 {
-					backends[LLamaCPP] = append(backends[LLamaCPP], LLamaCPPSycl32)
-					foundSycl32 = true
+				for _, v := range llamaCPPVariants {
+					if strings.Contains(e.Name(), v) && !foundVariants[v] {
+						backends[LLamaCPP] = append(backends[LLamaCPP], v)
+						foundVariants[v] = true
+					}
 				}
 			}
 		}

From 5177837ab045c3df2a6096baad1a01f63083b130 Mon Sep 17 00:00:00 2001
From: Ettore Di Giacinto <mudler@users.noreply.github.com>
Date: Fri, 24 Jan 2025 08:26:44 +0100
Subject: [PATCH 02/85] chore: detect and enable avx512 builds (#4675)

chore(avx512): add support

Fixes https://github.com/mudler/LocalAI/issues/4662

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
---
 Dockerfile                | 2 +-
 Makefile                  | 8 ++++++++
 pkg/model/initializers.go | 8 ++++++++
 3 files changed, 17 insertions(+), 1 deletion(-)

diff --git a/Dockerfile b/Dockerfile
index 566e03bc..2f2bcafa 100644
--- a/Dockerfile
+++ b/Dockerfile
@@ -303,7 +303,7 @@ RUN make prepare
 ## We only leave the most CPU-optimized variant and the fallback for the cublas/hipblas build
 ## (both will use CUDA or hipblas for the actual computation)
 RUN if [ "${BUILD_TYPE}" = "cublas" ] || [ "${BUILD_TYPE}" = "hipblas" ]; then \
-        SKIP_GRPC_BACKEND="backend-assets/grpc/llama-cpp-avx backend-assets/grpc/llama-cpp-avx2" make build; \
+        SKIP_GRPC_BACKEND="backend-assets/grpc/llama-cpp-avx512 backend-assets/grpc/llama-cpp-avx backend-assets/grpc/llama-cpp-avx2" make build; \
     else \
         make build; \
     fi
diff --git a/Makefile b/Makefile
index 9c4f3778..e3c28039 100644
--- a/Makefile
+++ b/Makefile
@@ -186,6 +186,7 @@ endif
 ALL_GRPC_BACKENDS=backend-assets/grpc/huggingface
 ALL_GRPC_BACKENDS+=backend-assets/grpc/llama-cpp-avx
 ALL_GRPC_BACKENDS+=backend-assets/grpc/llama-cpp-avx2
+ALL_GRPC_BACKENDS+=backend-assets/grpc/llama-cpp-avx512
 ALL_GRPC_BACKENDS+=backend-assets/grpc/llama-cpp-fallback
 ALL_GRPC_BACKENDS+=backend-assets/grpc/llama-ggml
 ALL_GRPC_BACKENDS+=backend-assets/grpc/llama-cpp-grpc
@@ -699,6 +700,13 @@ backend-assets/grpc/llama-cpp-avx2: backend-assets/grpc backend/cpp/llama/llama.
 	CMAKE_ARGS="$(CMAKE_ARGS) -DGGML_AVX=on -DGGML_AVX2=on -DGGML_AVX512=off -DGGML_FMA=on -DGGML_F16C=on" $(MAKE) VARIANT="llama-avx2" build-llama-cpp-grpc-server
 	cp -rfv backend/cpp/llama-avx2/grpc-server backend-assets/grpc/llama-cpp-avx2
 
+backend-assets/grpc/llama-cpp-avx512: backend-assets/grpc backend/cpp/llama/llama.cpp
+	cp -rf backend/cpp/llama backend/cpp/llama-avx512
+	$(MAKE) -C backend/cpp/llama-avx512 purge
+	$(info ${GREEN}I llama-cpp build info:avx512${RESET})
+	CMAKE_ARGS="$(CMAKE_ARGS) -DGGML_AVX=on -DGGML_AVX2=off -DGGML_AVX512=on -DGGML_FMA=on -DGGML_F16C=on" $(MAKE) VARIANT="llama-avx512" build-llama-cpp-grpc-server
+	cp -rfv backend/cpp/llama-avx512/grpc-server backend-assets/grpc/llama-cpp-avx512
+
 backend-assets/grpc/llama-cpp-avx: backend-assets/grpc backend/cpp/llama/llama.cpp
 	cp -rf backend/cpp/llama backend/cpp/llama-avx
 	$(MAKE) -C backend/cpp/llama-avx purge
diff --git a/pkg/model/initializers.go b/pkg/model/initializers.go
index 9fc0c18c..ace72fa3 100644
--- a/pkg/model/initializers.go
+++ b/pkg/model/initializers.go
@@ -48,6 +48,7 @@ const (
 	LLamaCPP = "llama-cpp"
 
 	LLamaCPPAVX2     = "llama-cpp-avx2"
+	LLamaCPPAVX512   = "llama-cpp-avx512"
 	LLamaCPPAVX      = "llama-cpp-avx"
 	LLamaCPPFallback = "llama-cpp-fallback"
 	LLamaCPPCUDA     = "llama-cpp-cuda"
@@ -68,6 +69,7 @@ const (
 
 var llamaCPPVariants = []string{
 	LLamaCPPAVX2,
+	LLamaCPPAVX512,
 	LLamaCPPAVX,
 	LLamaCPPFallback,
 	LLamaCPPCUDA,
@@ -268,6 +270,12 @@ func selectGRPCProcessByHostCapabilities(backend, assetDir string, f16 bool) str
 			log.Info().Msgf("[%s] attempting to load with AVX2 variant", backend)
 			selectedProcess = p
 		}
+	} else if xsysinfo.HasCPUCaps(cpuid.AVX512F) {
+		p := backendPath(assetDir, LLamaCPPAVX512)
+		if _, err := os.Stat(p); err == nil {
+			log.Info().Msgf("[%s] attempting to load with AVX512 variant", backend)
+			selectedProcess = p
+		}
 	} else if xsysinfo.HasCPUCaps(cpuid.AVX) {
 		p := backendPath(assetDir, LLamaCPPAVX)
 		if _, err := os.Stat(p); err == nil {

From d1d7ce83d4195113b45d6f0d7dba79d321a86df4 Mon Sep 17 00:00:00 2001
From: Gianluca Boiano <491117+M0Rf30@users.noreply.github.com>
Date: Fri, 24 Jan 2025 08:27:02 +0100
Subject: [PATCH 03/85] chore(model gallery): add MiniCPM-o-2.6-7.6b (#4676)

Signed-off-by: Gianluca Boiano <morf3089@gmail.com>
---
 gallery/index.yaml | 26 ++++++++++++++++++++++++++
 1 file changed, 26 insertions(+)

diff --git a/gallery/index.yaml b/gallery/index.yaml
index 4ce19bb4..d37f0ab4 100644
--- a/gallery/index.yaml
+++ b/gallery/index.yaml
@@ -5667,6 +5667,32 @@
     - filename: marco-o1-uncensored.Q4_K_M.gguf
       sha256: ad0440270a7254098f90779744d3e5b34fe49b7baf97c819909ba9c5648cc0d9
       uri: huggingface://QuantFactory/marco-o1-uncensored-GGUF/marco-o1-uncensored.Q4_K_M.gguf
+- !!merge <<: *qwen2
+  name: "minicpm-o-2_6"
+  icon: https://avatars.githubusercontent.com/u/89920203
+  urls:
+    - https://huggingface.co/openbmb/MiniCPM-o-2_6-gguf
+    - https://huggingface.co/openbmb/MiniCPM-o-2_6
+  description: |
+    MiniCPM-o 2.6 is the latest and most capable model in the MiniCPM-o series. The model is built in an end-to-end fashion based on SigLip-400M, Whisper-medium-300M, ChatTTS-200M, and Qwen2.5-7B with a total of 8B parameters
+  tags:
+    - llm
+    - multimodal
+    - gguf
+    - gpu
+    - qwen2
+    - cpu
+  overrides:
+    mmproj: minicpm-o-2_6-mmproj-f16.gguf
+    parameters:
+      model: minicpm-o-2_6-Q4_K_M.gguf
+  files:
+    - filename: minicpm-o-2_6-Q4_K_M.gguf
+      sha256: 4f635fc0c0bb88d50ccd9cf1f1e5892b5cb085ff88fe0d8e1148fd9a8a836bc2
+      uri: huggingface://openbmb/MiniCPM-o-2_6-gguf/Model-7.6B-Q4_K_M.gguf
+    - filename: minicpm-o-2_6-mmproj-f16.gguf
+      sha256: efa4f7d96aa0f838f2023fc8d28e519179b16f1106777fa9280b32628191aa3e
+      uri: huggingface://openbmb/MiniCPM-o-2_6-gguf/mmproj-model-f16.gguf
 - !!merge <<: *qwen2
   name: "minicpm-v-2_6"
   license: apache-2.0

From 82824145839bc4dd3dfad64519ac1151a03a260a Mon Sep 17 00:00:00 2001
From: Ettore Di Giacinto <mudler@users.noreply.github.com>
Date: Fri, 24 Jan 2025 08:27:22 +0100
Subject: [PATCH 04/85] chore(downloader): support hf.co and hf:// URIs (#4677)

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
---
 pkg/downloader/uri.go | 24 +++++++++++++++---------
 1 file changed, 15 insertions(+), 9 deletions(-)

diff --git a/pkg/downloader/uri.go b/pkg/downloader/uri.go
index 2e0363c8..54b8eb10 100644
--- a/pkg/downloader/uri.go
+++ b/pkg/downloader/uri.go
@@ -21,14 +21,16 @@ import (
 )
 
 const (
-	HuggingFacePrefix = "huggingface://"
-	OCIPrefix         = "oci://"
-	OllamaPrefix      = "ollama://"
-	HTTPPrefix        = "http://"
-	HTTPSPrefix       = "https://"
-	GithubURI         = "github:"
-	GithubURI2        = "github://"
-	LocalPrefix       = "file://"
+	HuggingFacePrefix  = "huggingface://"
+	HuggingFacePrefix1 = "hf://"
+	HuggingFacePrefix2 = "hf.co/"
+	OCIPrefix          = "oci://"
+	OllamaPrefix       = "ollama://"
+	HTTPPrefix         = "http://"
+	HTTPSPrefix        = "https://"
+	GithubURI          = "github:"
+	GithubURI2         = "github://"
+	LocalPrefix        = "file://"
 )
 
 type URI string
@@ -127,6 +129,8 @@ func (u URI) LooksLikeURL() bool {
 	return strings.HasPrefix(string(u), HTTPPrefix) ||
 		strings.HasPrefix(string(u), HTTPSPrefix) ||
 		strings.HasPrefix(string(u), HuggingFacePrefix) ||
+		strings.HasPrefix(string(u), HuggingFacePrefix1) ||
+		strings.HasPrefix(string(u), HuggingFacePrefix2) ||
 		strings.HasPrefix(string(u), GithubURI) ||
 		strings.HasPrefix(string(u), OllamaPrefix) ||
 		strings.HasPrefix(string(u), OCIPrefix) ||
@@ -170,8 +174,10 @@ func (s URI) ResolveURL() string {
 		projectPath := strings.Join(repoPath[2:], "/")
 
 		return fmt.Sprintf("https://raw.githubusercontent.com/%s/%s/%s/%s", org, project, branch, projectPath)
-	case strings.HasPrefix(string(s), HuggingFacePrefix):
+	case strings.HasPrefix(string(s), HuggingFacePrefix) || strings.HasPrefix(string(s), HuggingFacePrefix1) || strings.HasPrefix(string(s), HuggingFacePrefix2):
 		repository := strings.Replace(string(s), HuggingFacePrefix, "", 1)
+		repository = strings.Replace(repository, HuggingFacePrefix1, "", 1)
+		repository = strings.Replace(repository, HuggingFacePrefix2, "", 1)
 		// convert repository to a full URL.
 		// e.g. TheBloke/Mixtral-8x7B-v0.1-GGUF/mixtral-8x7b-v0.1.Q2_K.gguf@main -> https://huggingface.co/TheBloke/Mixtral-8x7B-v0.1-GGUF/resolve/main/mixtral-8x7b-v0.1.Q2_K.gguf
 		owner := strings.Split(repository, "/")[0]

From 66e9ef3f33b35b7c4879ddfe76f9223061b3a7f9 Mon Sep 17 00:00:00 2001
From: Gianluca Boiano <491117+M0Rf30@users.noreply.github.com>
Date: Fri, 24 Jan 2025 08:28:44 +0100
Subject: [PATCH 05/85] chore(model gallery): add DeepSeek R1 14b, 32b and 70b
 (#4679)

Signed-off-by: Gianluca Boiano <morf3089@gmail.com>
---
 gallery/index.yaml | 113 ++++++++++++++++++++++++++++++++-------------
 1 file changed, 80 insertions(+), 33 deletions(-)

diff --git a/gallery/index.yaml b/gallery/index.yaml
index d37f0ab4..619f43b6 100644
--- a/gallery/index.yaml
+++ b/gallery/index.yaml
@@ -2696,39 +2696,6 @@
     - filename: Qwentile2.5-32B-Instruct-Q4_K_M.gguf
       sha256: e476d6e3c15c78fc3f986d7ae8fa35c16116843827f2e6243c05767cef2f3615
       uri: huggingface://bartowski/Qwentile2.5-32B-Instruct-GGUF/Qwentile2.5-32B-Instruct-Q4_K_M.gguf
-- !!merge <<: *qwen25
-  name: "deepseek-r1-distill-qwen-1.5b"
-  icon: "https://avatars.githubusercontent.com/u/148330874"
-  urls:
-    - https://huggingface.co/deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5b
-    - https://huggingface.co/unsloth/DeepSeek-R1-Distill-Qwen-1.5B-GGUF
-  description: |
-    DeepSeek-R1 is our advanced first-generation reasoning model designed to enhance performance in reasoning tasks.
-    Building on the foundation laid by its predecessor, DeepSeek-R1-Zero, which was trained using large-scale reinforcement learning (RL) without supervised fine-tuning, DeepSeek-R1 addresses the challenges faced by R1-Zero, such as endless repetition, poor readability, and language mixing.
-    By incorporating cold-start data prior to the RL phase,DeepSeek-R1 significantly improves reasoning capabilities and achieves performance levels comparable to OpenAI-o1 across a variety of domains, including mathematics, coding, and complex reasoning tasks.
-  overrides:
-    parameters:
-      model: deepseek-r1-distill-qwen-1.5b-Q4_K_M.gguf
-  files:
-    - filename: deepseek-r1-distill-qwen-1.5b-Q4_K_M.gguf
-      sha256: c2c43b6018cf7700ce0ddee8807deb1a9a26758ef878232f3a142d16df81f0fe
-      uri: huggingface://unsloth/DeepSeek-R1-Distill-Qwen-1.5B-GGUF/DeepSeek-R1-Distill-Qwen-1.5B-Q4_K_M.gguf
-- !!merge <<: *qwen25
-  name: "deepseek-r1-distill-qwen-7b"
-  urls:
-    - https://huggingface.co/deepseek-ai/DeepSeek-R1-Distill-Qwen-7B
-    - https://huggingface.co/bartowski/DeepSeek-R1-Distill-Qwen-7B-GGUF
-  description: |
-    DeepSeek-R1 is our advanced first-generation reasoning model designed to enhance performance in reasoning tasks.
-    Building on the foundation laid by its predecessor, DeepSeek-R1-Zero, which was trained using large-scale reinforcement learning (RL) without supervised fine-tuning, DeepSeek-R1 addresses the challenges faced by R1-Zero, such as endless repetition, poor readability, and language mixing.
-    By incorporating cold-start data prior to the RL phase,DeepSeek-R1 significantly improves reasoning capabilities and achieves performance levels comparable to OpenAI-o1 across a variety of domains, including mathematics, coding, and complex reasoning tasks.
-  overrides:
-    parameters:
-      model: DeepSeek-R1-Distill-Qwen-7B-Q4_K_M.gguf
-  files:
-    - filename: DeepSeek-R1-Distill-Qwen-7B-Q4_K_M.gguf
-      sha256: 731ece8d06dc7eda6f6572997feb9ee1258db0784827e642909d9b565641937b
-      uri: huggingface://bartowski/DeepSeek-R1-Distill-Qwen-7B-GGUF/DeepSeek-R1-Distill-Qwen-7B-Q4_K_M.gguf
 - &archfunct
   license: apache-2.0
   tags:
@@ -5334,6 +5301,86 @@
     - filename: archangel_sft_pythia2-8b.Q4_K_M.gguf
       sha256: a47782c55ef2b39b19644213720a599d9849511a73c9ebb0c1de749383c0a0f8
       uri: huggingface://RichardErkhov/ContextualAI_-_archangel_sft_pythia2-8b-gguf/archangel_sft_pythia2-8b.Q4_K_M.gguf
+- &deepseek-r1  ## Start DeepSeek-R1
+  url: "github:mudler/LocalAI/gallery/chatml.yaml@master"
+  name: "deepseek-r1-distill-qwen-1.5b"
+  icon: "https://avatars.githubusercontent.com/u/148330874"
+  urls:
+    - https://huggingface.co/deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5b
+    - https://huggingface.co/bartowski/DeepSeek-R1-Distill-Qwen-1.5B-GGUF
+  description: |
+    DeepSeek-R1 is our advanced first-generation reasoning model designed to enhance performance in reasoning tasks.
+    Building on the foundation laid by its predecessor, DeepSeek-R1-Zero, which was trained using large-scale reinforcement learning (RL) without supervised fine-tuning, DeepSeek-R1 addresses the challenges faced by R1-Zero, such as endless repetition, poor readability, and language mixing.
+    By incorporating cold-start data prior to the RL phase,DeepSeek-R1 significantly improves reasoning capabilities and achieves performance levels comparable to OpenAI-o1 across a variety of domains, including mathematics, coding, and complex reasoning tasks.
+  overrides:
+    parameters:
+      model: DeepSeek-R1-Distill-Qwen-1.5B-Q4_K_M.gguf
+  files:
+    - filename: DeepSeek-R1-Distill-Qwen-1.5B-Q4_K_M.gguf
+      sha256: 1741e5b2d062b07acf048bf0d2c514dadf2a48f94e2b4aa0cfe069af3838ee2f
+      uri: huggingface://bartowski/DeepSeek-R1-Distill-Qwen-1.5B-GGUF/DeepSeek-R1-Distill-Qwen-1.5B-Q4_K_M.gguf
+- !!merge <<: *deepseek-r1
+  name: "deepseek-r1-distill-qwen-7b"
+  urls:
+    - https://huggingface.co/deepseek-ai/DeepSeek-R1-Distill-Qwen-7B
+    - https://huggingface.co/bartowski/DeepSeek-R1-Distill-Qwen-7B-GGUF
+  overrides:
+    parameters:
+      model: DeepSeek-R1-Distill-Qwen-7B-Q4_K_M.gguf
+  files:
+    - filename: DeepSeek-R1-Distill-Qwen-7B-Q4_K_M.gguf
+      sha256: 731ece8d06dc7eda6f6572997feb9ee1258db0784827e642909d9b565641937b
+      uri: huggingface://bartowski/DeepSeek-R1-Distill-Qwen-7B-GGUF/DeepSeek-R1-Distill-Qwen-7B-Q4_K_M.gguf
+- !!merge <<: *deepseek-r1
+  name: "deepseek-r1-distill-qwen-14b"
+  urls:
+    - https://huggingface.co/deepseek-ai/DeepSeek-R1-Distill-Qwen-14B
+    - https://huggingface.co/bartowski/DeepSeek-R1-Distill-Qwen-14B-GGUF
+  overrides:
+    parameters:
+      model: DeepSeek-R1-Distill-Qwen-14B-Q4_K_M.gguf
+  files:
+    - filename: DeepSeek-R1-Distill-Qwen-14B-Q4_K_M.gguf
+      sha256: 0b319bd0572f2730bfe11cc751defe82045fad5085b4e60591ac2cd2d9633181
+      uri: huggingface://bartowski/DeepSeek-R1-Distill-Qwen-14B-GGUF/DeepSeek-R1-Distill-Qwen-14B-Q4_K_M.gguf
+- !!merge <<: *deepseek-r1
+  name: "deepseek-r1-distill-qwen-32b"
+  urls:
+    - https://huggingface.co/deepseek-ai/DeepSeek-R1-Distill-Qwen-32B
+    - https://huggingface.co/bartowski/DeepSeek-R1-Distill-Qwen-32B-GGUF
+  overrides:
+    parameters:
+      model: DeepSeek-R1-Distill-Qwen-32B-Q4_K_M.gguf
+  files:
+    - filename: DeepSeek-R1-Distill-Qwen-32B-Q4_K_M.gguf
+      sha256: bed9b0f551f5b95bf9da5888a48f0f87c37ad6b72519c4cbd775f54ac0b9fc62
+      uri: huggingface://bartowski/DeepSeek-R1-Distill-Qwen-32B-GGUF/DeepSeek-R1-Distill-Qwen-32B-Q4_K_M.gguf
+- !!merge <<: *deepseek-r1
+  name: "deepseek-r1-distill-llama-8b"
+  icon: "https://avatars.githubusercontent.com/u/148330874"
+  urls:
+    - https://huggingface.co/deepseek-ai/DeepSeek-R1-Distill-Llama-8B
+    - https://huggingface.co/bartowski/DeepSeek-R1-Distill-Llama-8B-GGUF
+  overrides:
+    parameters:
+      model: DeepSeek-R1-Distill-Llama-8B-Q4_K_M.gguf
+  files:
+    - filename: DeepSeek-R1-Distill-Llama-8B-Q4_K_M.gguf
+      sha256: 87bcba20b4846d8dadf753d3ff48f9285d131fc95e3e0e7e934d4f20bc896f5d
+      uri: huggingface://bartowski/DeepSeek-R1-Distill-Llama-8B-GGUF/DeepSeek-R1-Distill-Llama-8B-Q4_K_M.gguf
+- !!merge <<: *deepseek-r1
+  name: "deepseek-r1-distill-llama-70b"
+  icon: "https://avatars.githubusercontent.com/u/148330874"
+  urls:
+    - https://huggingface.co/deepseek-ai/DeepSeek-R1-Distill-Llama-70B
+    - https://huggingface.co/bartowski/DeepSeek-R 1-Distill-Llama-70B-GGUF
+  overrides:
+    parameters:
+      model: DeepSeek-R1-Distill-Llama-70B-Q4_K_M.gguf
+  files:
+    - filename: DeepSeek-R1-Distill-Llama-70B-Q4_K_M.gguf
+      sha256: 181a82a1d6d2fa24fe4db83a68eee030384986bdbdd4773ba76424e3a6eb9fd8
+      uri: huggingface://bartowski/DeepSeek-R1-Distill-Llama-70B-GGUF/DeepSeek-R1-Distill-Llama-70B-Q4_K_M.gguf
 - &qwen2  ## Start QWEN2
   url: "github:mudler/LocalAI/gallery/chatml.yaml@master"
   name: "qwen2-7b-instruct"

From 9a1182fa01f8efcbf4193cf1edaabb908f864dd1 Mon Sep 17 00:00:00 2001
From: Gianluca Boiano <491117+M0Rf30@users.noreply.github.com>
Date: Fri, 24 Jan 2025 08:29:02 +0100
Subject: [PATCH 06/85] chore(model gallery): add flux.1, stablediffusion and
 whisper icons (#4680)

Signed-off-by: Gianluca Boiano <morf3089@gmail.com>
---
 gallery/index.yaml | 14 +++++++++-----
 1 file changed, 9 insertions(+), 5 deletions(-)

diff --git a/gallery/index.yaml b/gallery/index.yaml
index 619f43b6..15dbf1e2 100644
--- a/gallery/index.yaml
+++ b/gallery/index.yaml
@@ -11137,7 +11137,7 @@
       uri: huggingface://Lykon/DreamShaper/DreamShaper_8_pruned.safetensors
       sha256: 879db523c30d3b9017143d56705015e15a2cb5628762c11d086fed9538abd7fd
 - name: stable-diffusion-3-medium
-  icon: https://huggingface.co/leo009/stable-diffusion-3-medium/resolve/main/sd3demo.jpg
+  icon: https://avatars.githubusercontent.com/u/100950301
   license: other
   description: |
     Stable Diffusion 3 Medium is a Multimodal Diffusion Transformer (MMDiT) text-to-image model that features greatly improved performance in image quality, typography, complex prompt understanding, and resource-efficiency.
@@ -11152,6 +11152,7 @@
     - gpu
   url: "github:mudler/LocalAI/gallery/stablediffusion3.yaml@master"
 - name: sd-1.5-ggml
+  icon: https://avatars.githubusercontent.com/u/37351293
   license: creativeml-openrail-m
   url: "github:mudler/LocalAI/gallery/sd-ggml.yaml@master"
   description: |
@@ -11185,7 +11186,7 @@
     - stablediffusion
     - gpu
     - cpu
-  icon: https://huggingface.co/stabilityai/stable-diffusion-3.5-medium/media/main/sd3.5_medium_demo.jpg
+  icon: https://avatars.githubusercontent.com/u/100950301
   overrides:
     options:
       - "clip_l_path:clip_l-Q4_0.gguf"
@@ -11220,7 +11221,7 @@
     - stablediffusion
     - gpu
     - cpu
-  icon: https://huggingface.co/stabilityai/stable-diffusion-3.5-large/media/main/sd3.5_large_demo.png
+  icon: https://avatars.githubusercontent.com/u/100950301
   overrides:
     parameters:
       model: sd3.5_large-Q4_0.gguf
@@ -11239,6 +11240,7 @@
       uri: huggingface://second-state/stable-diffusion-3.5-large-GGUF/t5xxl-Q5_0.gguf
 - &flux
   name: flux.1-dev
+  icon: https://avatars.githubusercontent.com/u/164064024
   license: flux-1-dev-non-commercial-license
   description: |
     FLUX.1 [dev] is a 12 billion parameter rectified flow transformer capable of generating images from text descriptions. For more information, please read our blog post.
@@ -11262,7 +11264,6 @@
 - !!merge <<: *flux
   name: flux.1-schnell
   license: apache-2
-  icon: https://huggingface.co/black-forest-labs/FLUX.1-schnell/resolve/main/schnell_grid.jpeg
   description: |
     FLUX.1 [schnell] is a 12 billion parameter rectified flow transformer capable of generating images from text descriptions. For more information, please read our blog post.
     Key Features
@@ -11295,7 +11296,6 @@
     - flux
     - gpu
     - cpu
-  icon: https://huggingface.co/black-forest-labs/FLUX.1-schnell/resolve/main/schnell_grid.jpeg
   overrides:
     parameters:
       model: flux1-dev-Q2_K.gguf
@@ -11315,6 +11315,7 @@
 - &whisper  ## Whisper
   url: "github:mudler/LocalAI/gallery/whisper-base.yaml@master"
   name: "whisper-1"
+  icon: https://avatars.githubusercontent.com/u/14957082
   license: "MIT"
   urls:
     - https://github.com/ggerganov/whisper.cpp
@@ -11492,6 +11493,7 @@
   description: |
     Stable Diffusion in NCNN with c++, supported txt2img and img2img
   name: stablediffusion-cpp
+  icon: https://avatars.githubusercontent.com/u/100950301
 - &piper  ## Piper TTS
   url: github:mudler/LocalAI/gallery/piper.yaml@master
   name: voice-en-us-kathleen-low
@@ -12072,6 +12074,7 @@
       uri: https://github.com/rhasspy/piper/releases/download/v0.0.2/voice-zh_CN-huayan-medium.tar.gz
       sha256: 0299a5e7f481ba853404e9f0e1515a94d5409585d76963fa4d30c64bd630aa99
 - name: "silero-vad"
+  icon: https://github.com/snakers4/silero-models/raw/master/files/silero_logo.jpg
   url: github:mudler/LocalAI/gallery/virtual.yaml@master
   urls:
     - https://github.com/snakers4/silero-vad
@@ -12091,6 +12094,7 @@
       uri: https://huggingface.co/onnx-community/silero-vad/resolve/main/onnx/model.onnx
       sha256: a4a068cd6cf1ea8355b84327595838ca748ec29a25bc91fc82e6c299ccdc5808
 - name: "bark-cpp-small"
+  icon: https://avatars.githubusercontent.com/u/99442120
   url: github:mudler/LocalAI/gallery/virtual.yaml@master
   license: mit
   urls:

From 4d44ebc2f2f261deaf20699d68c22a1ba18e7054 Mon Sep 17 00:00:00 2001
From: Ettore Di Giacinto <mudler@users.noreply.github.com>
Date: Fri, 24 Jan 2025 10:18:22 +0100
Subject: [PATCH 07/85] chore(deps): bump grpcio to 1.70.0 (#4682)

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
---
 backend/python/autogptq/requirements.txt        | 2 +-
 backend/python/bark/requirements.txt            | 2 +-
 backend/python/common/template/requirements.txt | 2 +-
 backend/python/coqui/requirements.txt           | 2 +-
 backend/python/diffusers/requirements.txt       | 2 +-
 backend/python/exllama2/requirements.txt        | 2 +-
 backend/python/faster-whisper/requirements.txt  | 2 +-
 backend/python/kokoro/requirements.txt          | 2 +-
 backend/python/rerankers/requirements.txt       | 2 +-
 backend/python/transformers/requirements.txt    | 2 +-
 backend/python/vllm/requirements.txt            | 2 +-
 11 files changed, 11 insertions(+), 11 deletions(-)

diff --git a/backend/python/autogptq/requirements.txt b/backend/python/autogptq/requirements.txt
index c857a867..af596d9e 100644
--- a/backend/python/autogptq/requirements.txt
+++ b/backend/python/autogptq/requirements.txt
@@ -1,6 +1,6 @@
 accelerate
 auto-gptq==0.7.1
-grpcio==1.69.0
+grpcio==1.70.0
 protobuf
 certifi
 transformers
\ No newline at end of file
diff --git a/backend/python/bark/requirements.txt b/backend/python/bark/requirements.txt
index 81c1273d..f4beaec1 100644
--- a/backend/python/bark/requirements.txt
+++ b/backend/python/bark/requirements.txt
@@ -1,4 +1,4 @@
 bark==0.1.5
-grpcio==1.69.0
+grpcio==1.70.0
 protobuf
 certifi
\ No newline at end of file
diff --git a/backend/python/common/template/requirements.txt b/backend/python/common/template/requirements.txt
index 0f43df10..125b18dd 100644
--- a/backend/python/common/template/requirements.txt
+++ b/backend/python/common/template/requirements.txt
@@ -1,3 +1,3 @@
-grpcio==1.69.0
+grpcio==1.70.0
 protobuf
 grpcio-tools
\ No newline at end of file
diff --git a/backend/python/coqui/requirements.txt b/backend/python/coqui/requirements.txt
index 76c9ba4b..5ec13b5f 100644
--- a/backend/python/coqui/requirements.txt
+++ b/backend/python/coqui/requirements.txt
@@ -1,4 +1,4 @@
-grpcio==1.69.0
+grpcio==1.70.0
 protobuf
 certifi
 packaging==24.1
\ No newline at end of file
diff --git a/backend/python/diffusers/requirements.txt b/backend/python/diffusers/requirements.txt
index d49155ed..8c450dca 100644
--- a/backend/python/diffusers/requirements.txt
+++ b/backend/python/diffusers/requirements.txt
@@ -1,5 +1,5 @@
 setuptools
-grpcio==1.69.0
+grpcio==1.70.0
 pillow
 protobuf
 certifi
diff --git a/backend/python/exllama2/requirements.txt b/backend/python/exllama2/requirements.txt
index 77464406..cb622d0c 100644
--- a/backend/python/exllama2/requirements.txt
+++ b/backend/python/exllama2/requirements.txt
@@ -1,4 +1,4 @@
-grpcio==1.69.0
+grpcio==1.70.0
 protobuf
 certifi
 wheel
diff --git a/backend/python/faster-whisper/requirements.txt b/backend/python/faster-whisper/requirements.txt
index 0f43df10..125b18dd 100644
--- a/backend/python/faster-whisper/requirements.txt
+++ b/backend/python/faster-whisper/requirements.txt
@@ -1,3 +1,3 @@
-grpcio==1.69.0
+grpcio==1.70.0
 protobuf
 grpcio-tools
\ No newline at end of file
diff --git a/backend/python/kokoro/requirements.txt b/backend/python/kokoro/requirements.txt
index 75d65ba1..06e60389 100644
--- a/backend/python/kokoro/requirements.txt
+++ b/backend/python/kokoro/requirements.txt
@@ -1,4 +1,4 @@
-grpcio==1.69.0
+grpcio==1.70.0
 protobuf
 phonemizer
 scipy
diff --git a/backend/python/rerankers/requirements.txt b/backend/python/rerankers/requirements.txt
index afc8b2a9..566fdae0 100644
--- a/backend/python/rerankers/requirements.txt
+++ b/backend/python/rerankers/requirements.txt
@@ -1,3 +1,3 @@
-grpcio==1.69.0
+grpcio==1.70.0
 protobuf
 certifi
\ No newline at end of file
diff --git a/backend/python/transformers/requirements.txt b/backend/python/transformers/requirements.txt
index db41b928..c0fa0c0b 100644
--- a/backend/python/transformers/requirements.txt
+++ b/backend/python/transformers/requirements.txt
@@ -1,4 +1,4 @@
-grpcio==1.69.0
+grpcio==1.70.0
 protobuf
 certifi
 setuptools
diff --git a/backend/python/vllm/requirements.txt b/backend/python/vllm/requirements.txt
index a1eea776..1f92add8 100644
--- a/backend/python/vllm/requirements.txt
+++ b/backend/python/vllm/requirements.txt
@@ -1,4 +1,4 @@
-grpcio==1.69.0
+grpcio==1.70.0
 protobuf
 certifi
 setuptools
\ No newline at end of file

From 9409c99738f32921255878af7c7b98db6e427b11 Mon Sep 17 00:00:00 2001
From: "LocalAI [bot]" <139863280+localai-bot@users.noreply.github.com>
Date: Fri, 24 Jan 2025 22:45:54 +0100
Subject: [PATCH 08/85] chore: :arrow_up: Update ggerganov/llama.cpp to
 `c5d9effb49649db80a52caf5c0626de6f342f526` (#4685)

:arrow_up: Update ggerganov/llama.cpp

Signed-off-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com>
Co-authored-by: mudler <2420543+mudler@users.noreply.github.com>
---
 Makefile | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/Makefile b/Makefile
index e3c28039..0e4dd391 100644
--- a/Makefile
+++ b/Makefile
@@ -8,7 +8,7 @@ DETECT_LIBS?=true
 # llama.cpp versions
 GOLLAMA_REPO?=https://github.com/go-skynet/go-llama.cpp
 GOLLAMA_VERSION?=2b57a8ae43e4699d3dc5d1496a1ccd42922993be
-CPPLLAMA_VERSION?=6152129d05870cb38162c422c6ba80434e021e9f
+CPPLLAMA_VERSION?=c5d9effb49649db80a52caf5c0626de6f342f526
 
 # whisper.cpp version
 WHISPER_REPO?=https://github.com/ggerganov/whisper.cpp

From e9cace137b52b37b32c6284ce842b432bd4e21c3 Mon Sep 17 00:00:00 2001
From: Gianluca Boiano <491117+M0Rf30@users.noreply.github.com>
Date: Sat, 25 Jan 2025 09:04:38 +0100
Subject: [PATCH 09/85] chore(model gallery): update deepseek-r1 prompt
 template (#4686)

Signed-off-by: Gianluca Boiano <morf3089@gmail.com>
---
 gallery/deepseek-r1.yaml | 23 +++++++++++++++++++++++
 gallery/index.yaml       |  2 +-
 2 files changed, 24 insertions(+), 1 deletion(-)
 create mode 100644 gallery/deepseek-r1.yaml

diff --git a/gallery/deepseek-r1.yaml b/gallery/deepseek-r1.yaml
new file mode 100644
index 00000000..29ca9db1
--- /dev/null
+++ b/gallery/deepseek-r1.yaml
@@ -0,0 +1,23 @@
+---
+name: "deepseek-r1"
+
+config_file: |
+  context_size: 131072
+  mmap: true
+  f16: true
+  stopwords:
+    - <｜begin▁of▁sentence｜>
+    - <｜end▁of▁sentence｜>
+    - <｜User｜>
+    - <｜Assistant｜>
+  template:
+    chat_message: |
+      {{if eq .RoleName "system" -}}{{.Content }}
+      {{ end -}}
+      {{if eq .RoleName "user" -}}<｜User｜>{{.Content}}
+      {{end -}}
+      {{if eq .RoleName "assistant" -}}<｜Assistant｜>{{.Content}}<｜end▁of▁sentence｜>{{end}}
+    completion: |
+      {{.Input}}
+    chat: |
+      {{.Input -}}<｜Assistant｜>
diff --git a/gallery/index.yaml b/gallery/index.yaml
index 15dbf1e2..11e48fa5 100644
--- a/gallery/index.yaml
+++ b/gallery/index.yaml
@@ -5302,7 +5302,7 @@
       sha256: a47782c55ef2b39b19644213720a599d9849511a73c9ebb0c1de749383c0a0f8
       uri: huggingface://RichardErkhov/ContextualAI_-_archangel_sft_pythia2-8b-gguf/archangel_sft_pythia2-8b.Q4_K_M.gguf
 - &deepseek-r1  ## Start DeepSeek-R1
-  url: "github:mudler/LocalAI/gallery/chatml.yaml@master"
+  url: "github:mudler/LocalAI/gallery/deepseek-r1.yaml@master"
   name: "deepseek-r1-distill-qwen-1.5b"
   icon: "https://avatars.githubusercontent.com/u/148330874"
   urls:

From 8eef5a2c5ef85a045a10b7255520a7ca4fd9df81 Mon Sep 17 00:00:00 2001
From: Ettore Di Giacinto <mudler@users.noreply.github.com>
Date: Sat, 25 Jan 2025 11:04:12 +0100
Subject: [PATCH 10/85] chore(model gallery): add lamarck-14b-v0.7 (#4687)

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
---
 gallery/index.yaml | 15 +++++++++++++++
 1 file changed, 15 insertions(+)

diff --git a/gallery/index.yaml b/gallery/index.yaml
index 11e48fa5..80a60dee 100644
--- a/gallery/index.yaml
+++ b/gallery/index.yaml
@@ -3274,6 +3274,21 @@
     - filename: DRT-o1-14B-Q4_K_M.gguf
       sha256: 9619ca984cf4ce8e4f69bcde831de17b2ce05dd89536e3130608877521e3d328
       uri: huggingface://bartowski/DRT-o1-14B-GGUF/DRT-o1-14B-Q4_K_M.gguf
+- !!merge <<: *qwen25
+  name: "lamarck-14b-v0.7"
+  icon: https://huggingface.co/sometimesanotion/Lamarck-14B-v0.7/resolve/main/LamarckShades.webp
+  urls:
+    - https://huggingface.co/sometimesanotion/Lamarck-14B-v0.7
+    - https://huggingface.co/bartowski/Lamarck-14B-v0.7-GGUF
+  description: |
+    Lamarck 14B v0.7: A generalist merge with emphasis on multi-step reasoning, prose, and multi-language ability. The 14B parameter model class has a lot of strong performers, and Lamarck strives to be well-rounded and solid.
+  overrides:
+    parameters:
+      model: Lamarck-14B-v0.7-Q4_K_M.gguf
+  files:
+    - filename: Lamarck-14B-v0.7-Q4_K_M.gguf
+      sha256: ff8eba82b77a4c6b6d556b85629414655d881f8af4601bcf891c6a7b0345b442
+      uri: huggingface://bartowski/Lamarck-14B-v0.7-GGUF/Lamarck-14B-v0.7-Q4_K_M.gguf
 - &smollm  ## SmolLM
   url: "github:mudler/LocalAI/gallery/chatml.yaml@master"
   name: "smollm-1.7b-instruct"

From 901b06284adaddddbf2cbbc58fd490080950b6a0 Mon Sep 17 00:00:00 2001
From: Ettore Di Giacinto <mudler@users.noreply.github.com>
Date: Sat, 25 Jan 2025 11:06:05 +0100
Subject: [PATCH 11/85] chore(model gallery): add art-v0-3b (#4688)

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
---
 gallery/index.yaml | 16 ++++++++++++++++
 1 file changed, 16 insertions(+)

diff --git a/gallery/index.yaml b/gallery/index.yaml
index 80a60dee..cc96f770 100644
--- a/gallery/index.yaml
+++ b/gallery/index.yaml
@@ -3289,6 +3289,22 @@
     - filename: Lamarck-14B-v0.7-Q4_K_M.gguf
       sha256: ff8eba82b77a4c6b6d556b85629414655d881f8af4601bcf891c6a7b0345b442
       uri: huggingface://bartowski/Lamarck-14B-v0.7-GGUF/Lamarck-14B-v0.7-Q4_K_M.gguf
+- !!merge <<: *qwen25
+  name: "art-v0-3b"
+  icon: https://blog.agi-0.com/_next/image?url=%2Fabout_img2.jpeg&w=1920&q=75
+  urls:
+    - https://huggingface.co/AGI-0/Art-v0-3B
+    - https://huggingface.co/bartowski/Art-v0-3B-GGUF
+    - https://blog.agi-0.com/posts/art-series
+  description: |
+    Art v0 3B is our inaugural model in the Art series, fine-tuned from Qwen/Qwen2.5-3B-Instruct using a specialized dataset generated with Gemini 2.0 Flash Thinking. Read more about the Art series
+  overrides:
+    parameters:
+      model: Art-v0-3B-Q4_K_M.gguf
+  files:
+    - filename: Art-v0-3B-Q4_K_M.gguf
+      sha256: 551acd326ce9a743b6e06e094865eb2f06c23c81c812ce221d757bf27ceec9f7
+      uri: huggingface://bartowski/Art-v0-3B-GGUF/Art-v0-3B-Q4_K_M.gguf
 - &smollm  ## SmolLM
   url: "github:mudler/LocalAI/gallery/chatml.yaml@master"
   name: "smollm-1.7b-instruct"

From 4c3710a5319269fa159c8521dd74a13fe3be11c7 Mon Sep 17 00:00:00 2001
From: Ettore Di Giacinto <mudler@users.noreply.github.com>
Date: Sat, 25 Jan 2025 11:07:31 +0100
Subject: [PATCH 12/85] chore(model gallery): add chuluun-qwen2.5-72b-v0.08
 (#4689)

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
---
 gallery/index.yaml | 18 ++++++++++++++++++
 1 file changed, 18 insertions(+)

diff --git a/gallery/index.yaml b/gallery/index.yaml
index cc96f770..12f1bc2e 100644
--- a/gallery/index.yaml
+++ b/gallery/index.yaml
@@ -3305,6 +3305,24 @@
     - filename: Art-v0-3B-Q4_K_M.gguf
       sha256: 551acd326ce9a743b6e06e094865eb2f06c23c81c812ce221d757bf27ceec9f7
       uri: huggingface://bartowski/Art-v0-3B-GGUF/Art-v0-3B-Q4_K_M.gguf
+- !!merge <<: *qwen25
+  name: "chuluun-qwen2.5-72b-v0.08"
+  icon: https://huggingface.co/DatToad/Chuluun-Qwen2.5-72B-v0.08/resolve/main/Chuluun8-2.png
+  urls:
+    - https://huggingface.co/DatToad/Chuluun-Qwen2.5-72B-v0.08
+    - https://huggingface.co/bartowski/Chuluun-Qwen2.5-72B-v0.08-GGUF
+  description: |
+    This is a merge of pre-trained language models created using mergekit.
+    I re-ran the original Chuluun formula including the newly released Ink from Allura-Org. I've found the addition gives the model a lot more variability, likely because of aggressive de-slop applied to its dataset. Sometimes this means a word choice will be strange and you'll want to manually edit when needed, but it means you'll see less ministrations sparkling with mischief.
+    Because of this the best way to approach the model is to run multiple regens and choose the one you like, edit mercilessly, and continue. Like the original Chuluun this variant is very steerable for complex storywriting and RP. It's probably also a little spicier than v0.01 with both Magnum and whatever the heck Fizz threw into the data for Ink.
+    I've also been hearing praise for a level of character intelligence not seen in other models, including Largestral finetunes and merges. I'm not about to say any model of mine is smarter because it was a dumb idea to use Tess as the base and it somehow worked.
+  overrides:
+    parameters:
+      model: Chuluun-Qwen2.5-72B-v0.08-Q4_K_M.gguf
+  files:
+    - filename: Chuluun-Qwen2.5-72B-v0.08-Q4_K_M.gguf
+      sha256: 0fec82625f74a9a340837de7af287b1d9042e5aeb70cda2621426db99958b0af
+      uri: huggingface://bartowski/Chuluun-Qwen2.5-72B-v0.08-GGUF/Chuluun-Qwen2.5-72B-v0.08-Q4_K_M.gguf
 - &smollm  ## SmolLM
   url: "github:mudler/LocalAI/gallery/chatml.yaml@master"
   name: "smollm-1.7b-instruct"

From 4ab107bc1ae1323f80dcad8b13fbefd943a067cb Mon Sep 17 00:00:00 2001
From: "LocalAI [bot]" <139863280+localai-bot@users.noreply.github.com>
Date: Sat, 25 Jan 2025 22:44:14 +0100
Subject: [PATCH 13/85] chore: :arrow_up: Update ggerganov/llama.cpp to
 `26771a1491f3a4c3d5b99c4c267b81aca9a7dfa0` (#4690)

:arrow_up: Update ggerganov/llama.cpp

Signed-off-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com>
Co-authored-by: mudler <2420543+mudler@users.noreply.github.com>
---
 Makefile | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/Makefile b/Makefile
index 0e4dd391..f6ee9a08 100644
--- a/Makefile
+++ b/Makefile
@@ -8,7 +8,7 @@ DETECT_LIBS?=true
 # llama.cpp versions
 GOLLAMA_REPO?=https://github.com/go-skynet/go-llama.cpp
 GOLLAMA_VERSION?=2b57a8ae43e4699d3dc5d1496a1ccd42922993be
-CPPLLAMA_VERSION?=c5d9effb49649db80a52caf5c0626de6f342f526
+CPPLLAMA_VERSION?=26771a1491f3a4c3d5b99c4c267b81aca9a7dfa0
 
 # whisper.cpp version
 WHISPER_REPO?=https://github.com/ggerganov/whisper.cpp

From a6bc8aa7c7583a989b0e86ea113e7d66900ee760 Mon Sep 17 00:00:00 2001
From: Ettore Di Giacinto <mudler@users.noreply.github.com>
Date: Sun, 26 Jan 2025 10:01:37 +0100
Subject: [PATCH 14/85] chore(model gallery): add l3.3-nevoria-r1-70b (#4691)

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
---
 gallery/index.yaml | 19 +++++++++++++++++++
 1 file changed, 19 insertions(+)

diff --git a/gallery/index.yaml b/gallery/index.yaml
index 12f1bc2e..51f36da9 100644
--- a/gallery/index.yaml
+++ b/gallery/index.yaml
@@ -456,6 +456,25 @@
     - filename: L3.3-Prikol-70B-v0.2-Q4_K_M.gguf
       sha256: fc0ff514efbc0b67981c2bf1423d5a2e1b8801e4266ba0c653ea148414fe5ffc
       uri: huggingface://bartowski/L3.3-Prikol-70B-v0.2-GGUF/L3.3-Prikol-70B-v0.2-Q4_K_M.gguf
+- !!merge <<: *llama33
+  name: "l3.3-nevoria-r1-70b"
+  icon: https://cdn-uploads.huggingface.co/production/uploads/64545af5ec40bbbd01242ca6/_oWpsvCZ-graNKzJBBjGo.jpeg
+  urls:
+    - https://huggingface.co/Steelskull/L3.3-Nevoria-R1-70b
+    - https://huggingface.co/bartowski/L3.3-Nevoria-R1-70b-GGUF
+  description: |
+    This model builds upon the original Nevoria foundation, incorporating the Deepseek-R1 reasoning architecture to enhance dialogue interaction and scene comprehension. While maintaining Nevoria's core strengths in storytelling and scene description (derived from EVA, EURYALE, and Anubis), this iteration aims to improve prompt adherence and creative reasoning capabilities. The model also retains the balanced perspective introduced by Negative_LLAMA and Nemotron elements. Also, the model plays the card to almost a fault, It'll pick up on minor issues and attempt to run with them. Users had it call them out for misspelling a word while playing in character.
+
+    Note: While Nevoria-R1 represents a significant architectural change, rather than a direct successor to Nevoria, it operates as a distinct model with its own characteristics.
+
+    The lorablated model base choice was intentional, creating unique weight interactions similar to the original Astoria model and Astoria V2 model. This "weight twisting" effect, achieved by subtracting the lorablated base model during merging, creates an interesting balance in the model's behavior. While unconventional compared to sequential component application, this approach was chosen for its unique response characteristics.
+  overrides:
+    parameters:
+      model: L3.3-Nevoria-R1-70b-Q4_K_M.gguf
+  files:
+    - filename: L3.3-Nevoria-R1-70b-Q4_K_M.gguf
+      sha256: 9f32f202fb5b1465c942693bb11eea9e8a1c5686b00602715b495c068eaf1c58
+      uri: huggingface://bartowski/L3.3-Nevoria-R1-70b-GGUF/L3.3-Nevoria-R1-70b-Q4_K_M.gguf
 - &rwkv
   url: "github:mudler/LocalAI/gallery/rwkv.yaml@master"
   name: "rwkv-6-world-7b"

From 8f5aa2d9deeb4817e950c753c90bdf38738cf681 Mon Sep 17 00:00:00 2001
From: Ettore Di Giacinto <mudler@users.noreply.github.com>
Date: Sun, 26 Jan 2025 10:03:46 +0100
Subject: [PATCH 15/85] chore(model gallery): add dumpling-qwen2.5-32b (#4692)

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
---
 gallery/index.yaml | 27 +++++++++++++++++++++++++++
 1 file changed, 27 insertions(+)

diff --git a/gallery/index.yaml b/gallery/index.yaml
index 51f36da9..f4ce6f6d 100644
--- a/gallery/index.yaml
+++ b/gallery/index.yaml
@@ -3400,6 +3400,33 @@
     - filename: Vikhr-Qwen-2.5-1.5B-Instruct.Q4_K_M.gguf
       sha256: eaeac314e30b461413bc1cc819cdc0cd6a79265711fd0b8268702960a082c7bd
       uri: huggingface://QuantFactory/Vikhr-Qwen-2.5-1.5B-Instruct-GGUF/Vikhr-Qwen-2.5-1.5B-Instruct.Q4_K_M.gguf
+- !!merge <<: *qwen25
+  name: "dumpling-qwen2.5-32b"
+  icon: https://huggingface.co/nbeerbower/Dumpling-Qwen2.5-32B/resolve/main/dumpling_cover.png?download=true
+  urls:
+    - https://huggingface.co/nbeerbower/Dumpling-Qwen2.5-32B
+    - https://huggingface.co/bartowski/Dumpling-Qwen2.5-32B-GGUF
+  description: |
+   nbeerbower/Rombos-EVAGutenberg-TIES-Qwen2.5-32B finetuned on:
+    nbeerbower/GreatFirewall-DPO
+    nbeerbower/Schule-DPO
+    nbeerbower/Purpura-DPO
+    nbeerbower/Arkhaios-DPO
+    jondurbin/truthy-dpo-v0.1
+    antiven0m/physical-reasoning-dpo
+    flammenai/Date-DPO-NoAsterisks
+    flammenai/Prude-Phi3-DPO
+    Atsunori/HelpSteer2-DPO
+    jondurbin/gutenberg-dpo-v0.1
+    nbeerbower/gutenberg2-dpo
+    nbeerbower/gutenberg-moderne-dpo.
+  overrides:
+    parameters:
+      model: Dumpling-Qwen2.5-32B-Q4_K_M.gguf
+  files:
+    - filename: Dumpling-Qwen2.5-32B-Q4_K_M.gguf
+      sha256: c5b7d773cc614650ad3956008e30d0607df6106c28e381870a9b950bd4ee1d17
+      uri: huggingface://bartowski/Dumpling-Qwen2.5-32B-GGUF/Dumpling-Qwen2.5-32B-Q4_K_M.gguf
 - &llama31  ## LLama3.1
   url: "github:mudler/LocalAI/gallery/llama3.1-instruct.yaml@master"
   icon: https://avatars.githubusercontent.com/u/153379578

From 3b6b37a81bb6224edd77276efbd661a3b2dc337e Mon Sep 17 00:00:00 2001
From: Ettore Di Giacinto <mudler@users.noreply.github.com>
Date: Sun, 26 Jan 2025 10:06:06 +0100
Subject: [PATCH 16/85] chore(model gallery): add
 deepseek-r1-qwen-2.5-32b-ablated (#4693)

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
---
 gallery/index.yaml | 19 +++++++++++++++++++
 1 file changed, 19 insertions(+)

diff --git a/gallery/index.yaml b/gallery/index.yaml
index f4ce6f6d..da601b35 100644
--- a/gallery/index.yaml
+++ b/gallery/index.yaml
@@ -5476,6 +5476,25 @@
     - filename: DeepSeek-R1-Distill-Llama-70B-Q4_K_M.gguf
       sha256: 181a82a1d6d2fa24fe4db83a68eee030384986bdbdd4773ba76424e3a6eb9fd8
       uri: huggingface://bartowski/DeepSeek-R1-Distill-Llama-70B-GGUF/DeepSeek-R1-Distill-Llama-70B-Q4_K_M.gguf
+- !!merge <<: *deepseek-r1
+  name: "deepseek-r1-qwen-2.5-32b-ablated"
+  icon: https://cdn-uploads.huggingface.co/production/uploads/6587d8dd1b44d0e694104fbf/0dkt6EhZYwXVBxvSWXdaM.png
+  urls:
+    - https://huggingface.co/NaniDAO/deepseek-r1-qwen-2.5-32B-ablated
+    - https://huggingface.co/bartowski/deepseek-r1-qwen-2.5-32B-ablated-GGUF
+  description: |
+    DeepSeek-R1-Distill-Qwen-32B with ablation technique applied for a more helpful (and based) reasoning model.
+
+    This means it will refuse less of your valid requests for an uncensored UX. Use responsibly and use common sense.
+
+    We do not take any responsibility for how you apply this intelligence, just as we do not for how you apply your own.
+  overrides:
+    parameters:
+      model: deepseek-r1-qwen-2.5-32B-ablated-Q4_K_M.gguf
+  files:
+    - filename: deepseek-r1-qwen-2.5-32B-ablated-Q4_K_M.gguf
+      sha256: 7f33898641ebe58fe178c3517efc129f4fe37c6ca2d8b91353c4539b0c3411ec
+      uri: huggingface://bartowski/deepseek-r1-qwen-2.5-32B-ablated-GGUF/deepseek-r1-qwen-2.5-32B-ablated-Q4_K_M.gguf
 - &qwen2  ## Start QWEN2
   url: "github:mudler/LocalAI/gallery/chatml.yaml@master"
   name: "qwen2-7b-instruct"

From 4db8f5cbced8031aa1536b5c4fb906429899477c Mon Sep 17 00:00:00 2001
From: "LocalAI [bot]" <139863280+localai-bot@users.noreply.github.com>
Date: Sun, 26 Jan 2025 22:44:54 +0100
Subject: [PATCH 17/85] chore: :arrow_up: Update ggerganov/llama.cpp to
 `178a7eb952d211b8d4232d5e50ae1b64519172a9` (#4694)

:arrow_up: Update ggerganov/llama.cpp

Signed-off-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com>
Co-authored-by: mudler <2420543+mudler@users.noreply.github.com>
---
 Makefile | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/Makefile b/Makefile
index f6ee9a08..f960194c 100644
--- a/Makefile
+++ b/Makefile
@@ -8,7 +8,7 @@ DETECT_LIBS?=true
 # llama.cpp versions
 GOLLAMA_REPO?=https://github.com/go-skynet/go-llama.cpp
 GOLLAMA_VERSION?=2b57a8ae43e4699d3dc5d1496a1ccd42922993be
-CPPLLAMA_VERSION?=26771a1491f3a4c3d5b99c4c267b81aca9a7dfa0
+CPPLLAMA_VERSION?=178a7eb952d211b8d4232d5e50ae1b64519172a9
 
 # whisper.cpp version
 WHISPER_REPO?=https://github.com/ggerganov/whisper.cpp

From 5cf838c08d304844f78f26098956249c1d132c49 Mon Sep 17 00:00:00 2001
From: Ettore Di Giacinto <mudler@users.noreply.github.com>
Date: Mon, 27 Jan 2025 09:26:00 +0100
Subject: [PATCH 18/85] chore(model gallery): add confucius-o1-14b (#4696)

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
---
 gallery/index.yaml | 14 ++++++++++++++
 1 file changed, 14 insertions(+)

diff --git a/gallery/index.yaml b/gallery/index.yaml
index da601b35..d736ec35 100644
--- a/gallery/index.yaml
+++ b/gallery/index.yaml
@@ -3427,6 +3427,20 @@
     - filename: Dumpling-Qwen2.5-32B-Q4_K_M.gguf
       sha256: c5b7d773cc614650ad3956008e30d0607df6106c28e381870a9b950bd4ee1d17
       uri: huggingface://bartowski/Dumpling-Qwen2.5-32B-GGUF/Dumpling-Qwen2.5-32B-Q4_K_M.gguf
+- !!merge <<: *qwen25
+  name: "confucius-o1-14b"
+  urls:
+    - https://huggingface.co/netease-youdao/Confucius-o1-14B
+    - https://huggingface.co/bartowski/Confucius-o1-14B-GGUF
+  description: |
+    Confucius-o1-14B is a o1-like reasoning model developed by the NetEase Youdao Team, it can be easily deployed on a single GPU without quantization. This model is based on the Qwen2.5-14B-Instruct model and adopts a two-stage learning strategy, enabling the lightweight 14B model to possess thinking abilities similar to those of o1. What sets it apart is that after generating the chain of thought, it can summarize a step-by-step problem-solving process from the chain of thought on its own. This can prevent users from getting bogged down in the complex chain of thought and allows them to easily obtain the correct problem-solving ideas and answers.
+  overrides:
+    parameters:
+      model: Confucius-o1-14B-Q4_K_M.gguf
+  files:
+    - filename: Confucius-o1-14B-Q4_K_M.gguf
+      sha256: 03182920edd8667db7d2a362ca2d25e88f4b615b383b5a55c764f4715fb22dd9
+      uri: huggingface://bartowski/Confucius-o1-14B-GGUF/Confucius-o1-14B-Q4_K_M.gguf
 - &llama31  ## LLama3.1
   url: "github:mudler/LocalAI/gallery/llama3.1-instruct.yaml@master"
   icon: https://avatars.githubusercontent.com/u/153379578

From 26d790a2b6f1ee7ef276238f2475c282444d2e80 Mon Sep 17 00:00:00 2001
From: Ettore Di Giacinto <mudler@users.noreply.github.com>
Date: Mon, 27 Jan 2025 09:28:29 +0100
Subject: [PATCH 19/85] chore(model gallery): add
 fuseo1-deepseekr1-qwen2.5-coder-32b-preview-v0.1 (#4697)

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
---
 gallery/index.yaml | 14 ++++++++++++++
 1 file changed, 14 insertions(+)

diff --git a/gallery/index.yaml b/gallery/index.yaml
index d736ec35..d1b5b822 100644
--- a/gallery/index.yaml
+++ b/gallery/index.yaml
@@ -5509,6 +5509,20 @@
     - filename: deepseek-r1-qwen-2.5-32B-ablated-Q4_K_M.gguf
       sha256: 7f33898641ebe58fe178c3517efc129f4fe37c6ca2d8b91353c4539b0c3411ec
       uri: huggingface://bartowski/deepseek-r1-qwen-2.5-32B-ablated-GGUF/deepseek-r1-qwen-2.5-32B-ablated-Q4_K_M.gguf
+- !!merge <<: *deepseek-r1
+  name: "fuseo1-deepseekr1-qwen2.5-coder-32b-preview-v0.1"
+  urls:
+    - https://huggingface.co/FuseAI/FuseO1-DeepSeekR1-Qwen2.5-Coder-32B-Preview
+    - https://huggingface.co/bartowski/FuseO1-DeepSeekR1-Qwen2.5-Coder-32B-Preview-v0.1-GGUF
+  description: |
+    FuseO1-Preview is our initial endeavor to enhance the System-II reasoning capabilities of large language models (LLMs) through innovative model fusion techniques. By employing our advanced SCE merging methodologies, we integrate multiple open-source o1-like LLMs into a unified model. Our goal is to incorporate the distinct knowledge and strengths from different reasoning LLMs into a single, unified model with strong System-II reasoning abilities, particularly in mathematics, coding, and science domains.
+  overrides:
+    parameters:
+      model: FuseO1-DeepSeekR1-Qwen2.5-Coder-32B-Preview-v0.1-Q4_K_M.gguf
+  files:
+    - filename: FuseO1-DeepSeekR1-Qwen2.5-Coder-32B-Preview-v0.1-Q4_K_M.gguf
+      sha256: d7753547046cd6e3d45a2cfbd5557aa20dd0b9f0330931d3fd5b3d4a0b468b24
+      uri: huggingface://bartowski/FuseO1-DeepSeekR1-Qwen2.5-Coder-32B-Preview-v0.1-GGUF/FuseO1-DeepSeekR1-Qwen2.5-Coder-32B-Preview-v0.1-Q4_K_M.gguf
 - &qwen2  ## Start QWEN2
   url: "github:mudler/LocalAI/gallery/chatml.yaml@master"
   name: "qwen2-7b-instruct"

From e7cffd7afafdf46a3995019bdb8c587881796e68 Mon Sep 17 00:00:00 2001
From: Ettore Di Giacinto <mudler@users.noreply.github.com>
Date: Mon, 27 Jan 2025 09:31:47 +0100
Subject: [PATCH 20/85] chore(model gallery): add
 fuseo1-deepseekr1-qwen2.5-instruct-32b-preview (#4698)

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
---
 gallery/index.yaml | 14 ++++++++++++++
 1 file changed, 14 insertions(+)

diff --git a/gallery/index.yaml b/gallery/index.yaml
index d1b5b822..5cf627f5 100644
--- a/gallery/index.yaml
+++ b/gallery/index.yaml
@@ -5523,6 +5523,20 @@
     - filename: FuseO1-DeepSeekR1-Qwen2.5-Coder-32B-Preview-v0.1-Q4_K_M.gguf
       sha256: d7753547046cd6e3d45a2cfbd5557aa20dd0b9f0330931d3fd5b3d4a0b468b24
       uri: huggingface://bartowski/FuseO1-DeepSeekR1-Qwen2.5-Coder-32B-Preview-v0.1-GGUF/FuseO1-DeepSeekR1-Qwen2.5-Coder-32B-Preview-v0.1-Q4_K_M.gguf
+- !!merge <<: *deepseek-r1
+  name: "fuseo1-deepseekr1-qwen2.5-instruct-32b-preview"
+  urls:
+    - https://huggingface.co/FuseAI/FuseO1-DeepSeekR1-Qwen2.5-Instruct-32B-Preview
+    - https://huggingface.co/bartowski/FuseO1-DeepSeekR1-Qwen2.5-Instruct-32B-Preview-GGUF
+  description: |
+    FuseO1-Preview is our initial endeavor to enhance the System-II reasoning capabilities of large language models (LLMs) through innovative model fusion techniques. By employing our advanced SCE merging methodologies, we integrate multiple open-source o1-like LLMs into a unified model. Our goal is to incorporate the distinct knowledge and strengths from different reasoning LLMs into a single, unified model with strong System-II reasoning abilities, particularly in mathematics, coding, and science domains.
+  overrides:
+    parameters:
+      model: FuseO1-DeepSeekR1-Qwen2.5-Instruct-32B-Preview-Q4_K_M.gguf
+  files:
+    - filename: FuseO1-DeepSeekR1-Qwen2.5-Instruct-32B-Preview-Q4_K_M.gguf
+      sha256: 3b06a004a6bb827f809a7326b30ee73f96a1a86742d8c2dd335d75874fa17aa4
+      uri: huggingface://bartowski/FuseO1-DeepSeekR1-Qwen2.5-Instruct-32B-Preview-GGUF/FuseO1-DeepSeekR1-Qwen2.5-Instruct-32B-Preview-Q4_K_M.gguf
 - &qwen2  ## Start QWEN2
   url: "github:mudler/LocalAI/gallery/chatml.yaml@master"
   name: "qwen2-7b-instruct"

From 0f4f62cf3cdbc34c99a69a83f97a74f9913b64f2 Mon Sep 17 00:00:00 2001
From: Ettore Di Giacinto <mudler@users.noreply.github.com>
Date: Mon, 27 Jan 2025 09:51:06 +0100
Subject: [PATCH 21/85] chore(model gallery): add
 fuseo1-deepseekr1-qwq-32b-preview (#4699)

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
---
 gallery/index.yaml | 14 ++++++++++++++
 1 file changed, 14 insertions(+)

diff --git a/gallery/index.yaml b/gallery/index.yaml
index 5cf627f5..5e081b98 100644
--- a/gallery/index.yaml
+++ b/gallery/index.yaml
@@ -5537,6 +5537,20 @@
     - filename: FuseO1-DeepSeekR1-Qwen2.5-Instruct-32B-Preview-Q4_K_M.gguf
       sha256: 3b06a004a6bb827f809a7326b30ee73f96a1a86742d8c2dd335d75874fa17aa4
       uri: huggingface://bartowski/FuseO1-DeepSeekR1-Qwen2.5-Instruct-32B-Preview-GGUF/FuseO1-DeepSeekR1-Qwen2.5-Instruct-32B-Preview-Q4_K_M.gguf
+- !!merge <<: *deepseek-r1
+  name: "fuseo1-deepseekr1-qwq-32b-preview"
+  urls:
+    - https://huggingface.co/FuseAI/FuseO1-DeepSeekR1-QwQ-32B-Preview
+    - https://huggingface.co/bartowski/FuseO1-DeepSeekR1-QwQ-32B-Preview-GGUF
+  description: |
+    FuseO1-Preview is our initial endeavor to enhance the System-II reasoning capabilities of large language models (LLMs) through innovative model fusion techniques. By employing our advanced SCE merging methodologies, we integrate multiple open-source o1-like LLMs into a unified model. Our goal is to incorporate the distinct knowledge and strengths from different reasoning LLMs into a single, unified model with strong System-II reasoning abilities, particularly in mathematics, coding, and science domains.
+  overrides:
+    parameters:
+      model: FuseO1-DeepSeekR1-QwQ-32B-Preview-Q4_K_M.gguf
+  files:
+    - filename: FuseO1-DeepSeekR1-QwQ-32B-Preview-Q4_K_M.gguf
+      sha256: 16f1fb6bf76bb971a7a63e1a68cddd09421f4a767b86eec55eed1e08178f78f2
+      uri: huggingface://bartowski/FuseO1-DeepSeekR1-QwQ-32B-Preview-GGUF/FuseO1-DeepSeekR1-QwQ-32B-Preview-Q4_K_M.gguf
 - &qwen2  ## Start QWEN2
   url: "github:mudler/LocalAI/gallery/chatml.yaml@master"
   name: "qwen2-7b-instruct"

From 539e94db731badf8878c23a71b00d3b02dacaf7e Mon Sep 17 00:00:00 2001
From: Maximilian Kenfenheuer <maximilian.kenfenheuer@ksol.it>
Date: Mon, 27 Jan 2025 16:53:05 +0100
Subject: [PATCH 22/85] feat: function argument parsing using named regex
 (#4700)

Signed-off-by: Maximilian Kenfenheuer <maximilian.kenfenheuer@ksol.it>
---
 pkg/functions/parse.go | 45 +++++++++++++++++++++++++++++++++++++++++-
 1 file changed, 44 insertions(+), 1 deletion(-)

diff --git a/pkg/functions/parse.go b/pkg/functions/parse.go
index f5593690..7b8df91e 100644
--- a/pkg/functions/parse.go
+++ b/pkg/functions/parse.go
@@ -5,6 +5,7 @@ import (
 	"errors"
 	"io"
 	"regexp"
+	"slices"
 	"strings"
 
 	"github.com/mudler/LocalAI/pkg/functions/grammars"
@@ -71,6 +72,12 @@ type FunctionsConfig struct {
 	// JSONRegexMatch is a regex to extract the JSON object from the response
 	JSONRegexMatch []string `yaml:"json_regex_match"`
 
+	// ArgumentRegex is a named regex to extract the arguments from the response. Use ArgumentRegexKey and ArgumentRegexValue to set the names of the named regex for key and value of the arguments.
+	ArgumentRegex []string `yaml:"argument_regex"`
+	// ArgumentRegex named regex names for key and value extractions. default: key and value
+	ArgumentRegexKey   string `yaml:"argument_regex_key_name"`   // default: key
+	ArgumentRegexValue string `yaml:"argument_regex_value_name"` // default: value
+
 	// ReplaceFunctionResults allow to replace strings in the results before parsing them
 	ReplaceFunctionResults []ReplaceResult `yaml:"replace_function_results"`
 
@@ -310,7 +317,7 @@ func ParseFunctionCall(llmresult string, functionConfig FunctionsConfig) []FuncC
 				if functionName == "" {
 					return results
 				}
-				results = append(results, FuncCallResults{Name: result[functionNameKey], Arguments: result[functionArgumentsKey]})
+				results = append(results, FuncCallResults{Name: result[functionNameKey], Arguments: ParseFunctionCallArgs(result[functionArgumentsKey], functionConfig)})
 			}
 		}
 	} else {
@@ -322,3 +329,39 @@ func ParseFunctionCall(llmresult string, functionConfig FunctionsConfig) []FuncC
 
 	return results
 }
+
+func ParseFunctionCallArgs(functionArguments string, functionConfig FunctionsConfig) string {
+	if len(functionConfig.ArgumentRegex) > 0 {
+		// We use named regexes here to extract the function argument key value pairs and convert this to valid json.
+		// TODO: there might be responses where an object as a value is expected/required. This is currently not handled.
+		args := make(map[string]string)
+
+		agrsRegexKeyName := "key"
+		agrsRegexValueName := "value"
+
+		if functionConfig.ArgumentRegexKey != "" {
+			agrsRegexKeyName = functionConfig.ArgumentRegexKey
+		}
+		if functionConfig.ArgumentRegexValue != "" {
+			agrsRegexValueName = functionConfig.ArgumentRegexValue
+		}
+
+		for _, r := range functionConfig.ArgumentRegex {
+			var respRegex = regexp.MustCompile(r)
+			var nameRange []string = respRegex.SubexpNames()
+			var keyIndex = slices.Index(nameRange, agrsRegexKeyName)
+			var valueIndex = slices.Index(nameRange, agrsRegexValueName)
+			matches := respRegex.FindAllStringSubmatch(functionArguments, -1)
+			for _, match := range matches {
+				args[match[keyIndex]] = match[valueIndex]
+			}
+		}
+
+		jsonBytes, _ := json.Marshal(args)
+		jsonString := string(jsonBytes)
+
+		return jsonString
+	} else {
+		return functionArguments
+	}
+}

From fff35d5528a573935cad76489974d39b8cebfff3 Mon Sep 17 00:00:00 2001
From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com>
Date: Mon, 27 Jan 2025 21:09:50 +0000
Subject: [PATCH 23/85] chore(deps): Bump sentence-transformers from 3.3.1 to
 3.4.0 in /backend/python/transformers (#4702)

chore(deps): Bump sentence-transformers in /backend/python/transformers

Bumps [sentence-transformers](https://github.com/UKPLab/sentence-transformers) from 3.3.1 to 3.4.0.
- [Release notes](https://github.com/UKPLab/sentence-transformers/releases)
- [Commits](https://github.com/UKPLab/sentence-transformers/compare/v3.3.1...v3.4.0)

---
updated-dependencies:
- dependency-name: sentence-transformers
  dependency-type: direct:production
  update-type: version-update:semver-minor
...

Signed-off-by: dependabot[bot] <support@github.com>
Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>
---
 backend/python/transformers/requirements-cpu.txt      | 2 +-
 backend/python/transformers/requirements-cublas11.txt | 2 +-
 backend/python/transformers/requirements-cublas12.txt | 2 +-
 backend/python/transformers/requirements-hipblas.txt  | 2 +-
 backend/python/transformers/requirements-intel.txt    | 2 +-
 5 files changed, 5 insertions(+), 5 deletions(-)

diff --git a/backend/python/transformers/requirements-cpu.txt b/backend/python/transformers/requirements-cpu.txt
index c88508e3..36dc973a 100644
--- a/backend/python/transformers/requirements-cpu.txt
+++ b/backend/python/transformers/requirements-cpu.txt
@@ -5,4 +5,4 @@ accelerate
 transformers
 bitsandbytes
 outetts
-sentence-transformers==3.3.1
\ No newline at end of file
+sentence-transformers==3.4.0
\ No newline at end of file
diff --git a/backend/python/transformers/requirements-cublas11.txt b/backend/python/transformers/requirements-cublas11.txt
index 0faa9cec..a8b1c0c0 100644
--- a/backend/python/transformers/requirements-cublas11.txt
+++ b/backend/python/transformers/requirements-cublas11.txt
@@ -6,4 +6,4 @@ accelerate
 transformers
 bitsandbytes
 outetts
-sentence-transformers==3.3.1
+sentence-transformers==3.4.0
diff --git a/backend/python/transformers/requirements-cublas12.txt b/backend/python/transformers/requirements-cublas12.txt
index 1e22312f..a54c4c88 100644
--- a/backend/python/transformers/requirements-cublas12.txt
+++ b/backend/python/transformers/requirements-cublas12.txt
@@ -5,4 +5,4 @@ numba==0.60.0
 transformers
 bitsandbytes
 outetts
-sentence-transformers==3.3.1
+sentence-transformers==3.4.0
diff --git a/backend/python/transformers/requirements-hipblas.txt b/backend/python/transformers/requirements-hipblas.txt
index 47aa88db..73b7d85b 100644
--- a/backend/python/transformers/requirements-hipblas.txt
+++ b/backend/python/transformers/requirements-hipblas.txt
@@ -7,4 +7,4 @@ numba==0.60.0
 bitsandbytes
 outetts
 bitsandbytes
-sentence-transformers==3.3.1
+sentence-transformers==3.4.0
diff --git a/backend/python/transformers/requirements-intel.txt b/backend/python/transformers/requirements-intel.txt
index 708b0516..5b677199 100644
--- a/backend/python/transformers/requirements-intel.txt
+++ b/backend/python/transformers/requirements-intel.txt
@@ -8,4 +8,4 @@ numba==0.60.0
 intel-extension-for-transformers
 bitsandbytes
 outetts
-sentence-transformers==3.3.1
+sentence-transformers==3.4.0

From 03f3df9a82dd8452abc9bae93f3b7cfb3063e322 Mon Sep 17 00:00:00 2001
From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com>
Date: Tue, 28 Jan 2025 09:13:00 +0100
Subject: [PATCH 24/85] chore(deps): Bump docs/themes/hugo-theme-relearn from
 `8dad5ee` to `5bcb9fe` (#4704)

chore(deps): Bump docs/themes/hugo-theme-relearn

Bumps [docs/themes/hugo-theme-relearn](https://github.com/McShelby/hugo-theme-relearn) from `8dad5ee` to `5bcb9fe`.
- [Release notes](https://github.com/McShelby/hugo-theme-relearn/releases)
- [Commits](https://github.com/McShelby/hugo-theme-relearn/compare/8dad5ee419e5bb2a0b380aa72d7a7389af4945f6...5bcb9fe5e61d2fbe702034a24425992fd2455b0a)

---
updated-dependencies:
- dependency-name: docs/themes/hugo-theme-relearn
  dependency-type: direct:production
...

Signed-off-by: dependabot[bot] <support@github.com>
Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>
---
 docs/themes/hugo-theme-relearn | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/docs/themes/hugo-theme-relearn b/docs/themes/hugo-theme-relearn
index 8dad5ee4..5bcb9fe5 160000
--- a/docs/themes/hugo-theme-relearn
+++ b/docs/themes/hugo-theme-relearn
@@ -1 +1 @@
-Subproject commit 8dad5ee419e5bb2a0b380aa72d7a7389af4945f6
+Subproject commit 5bcb9fe5e61d2fbe702034a24425992fd2455b0a

From 3d0fbcb4f7331d1d7abca50d9719ea7e232cbdb3 Mon Sep 17 00:00:00 2001
From: "LocalAI [bot]" <139863280+localai-bot@users.noreply.github.com>
Date: Tue, 28 Jan 2025 09:13:43 +0100
Subject: [PATCH 25/85] chore: :arrow_up: Update ggerganov/llama.cpp to
 `a4417ddda98fd0558fb4d802253e68a933704b59` (#4705)

:arrow_up: Update ggerganov/llama.cpp

Signed-off-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com>
Co-authored-by: mudler <2420543+mudler@users.noreply.github.com>
---
 Makefile | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/Makefile b/Makefile
index f960194c..08c334a3 100644
--- a/Makefile
+++ b/Makefile
@@ -8,7 +8,7 @@ DETECT_LIBS?=true
 # llama.cpp versions
 GOLLAMA_REPO?=https://github.com/go-skynet/go-llama.cpp
 GOLLAMA_VERSION?=2b57a8ae43e4699d3dc5d1496a1ccd42922993be
-CPPLLAMA_VERSION?=178a7eb952d211b8d4232d5e50ae1b64519172a9
+CPPLLAMA_VERSION?=a4417ddda98fd0558fb4d802253e68a933704b59
 
 # whisper.cpp version
 WHISPER_REPO?=https://github.com/ggerganov/whisper.cpp

From d9204ea3b5b0edbfb1e980fa559a7fa79ac8f1ff Mon Sep 17 00:00:00 2001
From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com>
Date: Tue, 28 Jan 2025 11:50:09 +0100
Subject: [PATCH 26/85] chore(deps): Bump dependabot/fetch-metadata from 2.2.0
 to 2.3.0 (#4701)

Bumps [dependabot/fetch-metadata](https://github.com/dependabot/fetch-metadata) from 2.2.0 to 2.3.0.
- [Release notes](https://github.com/dependabot/fetch-metadata/releases)
- [Commits](https://github.com/dependabot/fetch-metadata/compare/v2.2.0...v2.3.0)

---
updated-dependencies:
- dependency-name: dependabot/fetch-metadata
  dependency-type: direct:production
  update-type: version-update:semver-minor
...

Signed-off-by: dependabot[bot] <support@github.com>
Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>
---
 .github/workflows/dependabot_auto.yml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.github/workflows/dependabot_auto.yml b/.github/workflows/dependabot_auto.yml
index 951e65e1..5bcd84f6 100644
--- a/.github/workflows/dependabot_auto.yml
+++ b/.github/workflows/dependabot_auto.yml
@@ -14,7 +14,7 @@ jobs:
     steps:
       - name: Dependabot metadata
         id: metadata
-        uses: dependabot/fetch-metadata@v2.2.0
+        uses: dependabot/fetch-metadata@v2.3.0
         with:
           github-token: "${{ secrets.GITHUB_TOKEN }}"
           skip-commit-verification: true

From 91e1ff5a95d60fa3a8df250d953640f522adb251 Mon Sep 17 00:00:00 2001
From: "LocalAI [bot]" <139863280+localai-bot@users.noreply.github.com>
Date: Tue, 28 Jan 2025 22:45:14 +0100
Subject: [PATCH 27/85] chore: :arrow_up: Update ggerganov/llama.cpp to
 `cae9fb4361138b937464524eed907328731b81f6` (#4711)

:arrow_up: Update ggerganov/llama.cpp

Signed-off-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com>
Co-authored-by: mudler <2420543+mudler@users.noreply.github.com>
---
 Makefile | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/Makefile b/Makefile
index 08c334a3..6cbc7326 100644
--- a/Makefile
+++ b/Makefile
@@ -8,7 +8,7 @@ DETECT_LIBS?=true
 # llama.cpp versions
 GOLLAMA_REPO?=https://github.com/go-skynet/go-llama.cpp
 GOLLAMA_VERSION?=2b57a8ae43e4699d3dc5d1496a1ccd42922993be
-CPPLLAMA_VERSION?=a4417ddda98fd0558fb4d802253e68a933704b59
+CPPLLAMA_VERSION?=cae9fb4361138b937464524eed907328731b81f6
 
 # whisper.cpp version
 WHISPER_REPO?=https://github.com/ggerganov/whisper.cpp

From b4b67e00bd7b705b1f6497f953ae562d0ea3af64 Mon Sep 17 00:00:00 2001
From: Maximilian Kenfenheuer <maximilian.kenfenheuer@ksol.it>
Date: Tue, 28 Jan 2025 22:58:02 +0100
Subject: [PATCH 28/85] refactor: function argument parsing using named regex
 (#4708)

Signed-off-by: Maximilian Kenfenheuer <maximilian.kenfenheuer@ksol.it>
---
 pkg/functions/parse.go | 61 +++++++++++++++++++++---------------------
 1 file changed, 30 insertions(+), 31 deletions(-)

diff --git a/pkg/functions/parse.go b/pkg/functions/parse.go
index 7b8df91e..50cbb27b 100644
--- a/pkg/functions/parse.go
+++ b/pkg/functions/parse.go
@@ -331,37 +331,36 @@ func ParseFunctionCall(llmresult string, functionConfig FunctionsConfig) []FuncC
 }
 
 func ParseFunctionCallArgs(functionArguments string, functionConfig FunctionsConfig) string {
-	if len(functionConfig.ArgumentRegex) > 0 {
-		// We use named regexes here to extract the function argument key value pairs and convert this to valid json.
-		// TODO: there might be responses where an object as a value is expected/required. This is currently not handled.
-		args := make(map[string]string)
-
-		agrsRegexKeyName := "key"
-		agrsRegexValueName := "value"
-
-		if functionConfig.ArgumentRegexKey != "" {
-			agrsRegexKeyName = functionConfig.ArgumentRegexKey
-		}
-		if functionConfig.ArgumentRegexValue != "" {
-			agrsRegexValueName = functionConfig.ArgumentRegexValue
-		}
-
-		for _, r := range functionConfig.ArgumentRegex {
-			var respRegex = regexp.MustCompile(r)
-			var nameRange []string = respRegex.SubexpNames()
-			var keyIndex = slices.Index(nameRange, agrsRegexKeyName)
-			var valueIndex = slices.Index(nameRange, agrsRegexValueName)
-			matches := respRegex.FindAllStringSubmatch(functionArguments, -1)
-			for _, match := range matches {
-				args[match[keyIndex]] = match[valueIndex]
-			}
-		}
-
-		jsonBytes, _ := json.Marshal(args)
-		jsonString := string(jsonBytes)
-
-		return jsonString
-	} else {
+	if len(functionConfig.ArgumentRegex) == 0 {
 		return functionArguments
 	}
+
+	// We use named regexes here to extract the function argument key value pairs and convert this to valid json.
+	// TODO: there might be responses where an object as a value is expected/required. This is currently not handled.
+	args := make(map[string]string)
+
+	agrsRegexKeyName := "key"
+	agrsRegexValueName := "value"
+
+	if functionConfig.ArgumentRegexKey != "" {
+		agrsRegexKeyName = functionConfig.ArgumentRegexKey
+	}
+	if functionConfig.ArgumentRegexValue != "" {
+		agrsRegexValueName = functionConfig.ArgumentRegexValue
+	}
+
+	for _, r := range functionConfig.ArgumentRegex {
+		var respRegex = regexp.MustCompile(r)
+		var nameRange []string = respRegex.SubexpNames()
+		var keyIndex = slices.Index(nameRange, agrsRegexKeyName)
+		var valueIndex = slices.Index(nameRange, agrsRegexValueName)
+		matches := respRegex.FindAllStringSubmatch(functionArguments, -1)
+		for _, match := range matches {
+			args[match[keyIndex]] = match[valueIndex]
+		}
+	}
+
+	jsonBytes, _ := json.Marshal(args)
+	
+	return string(jsonBytes)
 }

From a37b2c765c2085c5d89cdfffda63e5ca671b4465 Mon Sep 17 00:00:00 2001
From: Maximilian Kenfenheuer <maximilian.kenfenheuer@ksol.it>
Date: Tue, 28 Jan 2025 22:58:35 +0100
Subject: [PATCH 29/85] docs: update advanced-usage.md to reflect changes in
 #4700 (#4709)

Signed-off-by: Maximilian Kenfenheuer <maximilian.kenfenheuer@ksol.it>
---
 docs/content/docs/advanced/advanced-usage.md | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/docs/content/docs/advanced/advanced-usage.md b/docs/content/docs/advanced/advanced-usage.md
index dd9894ef..62c19aba 100644
--- a/docs/content/docs/advanced/advanced-usage.md
+++ b/docs/content/docs/advanced/advanced-usage.md
@@ -148,6 +148,9 @@ function:
     no_action_function_name: "" # Function name to call when no action is determined.
     no_action_description_name: "" # Description name for no-action functions.
     response_regex: [] # Regular expressions to match response from
+    argument_regex: [] # Named regular to extract function arguments from the response.
+    argument_regex_key_name: "key" # Name of the named regex capture to capture the key of the function arguments
+	  argument_regex_value_name: "value" # Name of the named regex capture to capture the value of the function arguments
     json_regex_match: [] # Regular expressions to match JSON data when in tool mode
     replace_function_results: [] # Placeholder to replace function call results with arbitrary strings or patterns.
     replace_llm_results: [] # Replace language model results with arbitrary strings or patterns.

From 1f4e66d63816efe9ed2c917f8c119a5289b8d01d Mon Sep 17 00:00:00 2001
From: Maximilian Kenfenheuer <maximilian.kenfenheuer@ksol.it>
Date: Wed, 29 Jan 2025 10:19:48 +0100
Subject: [PATCH 30/85] chore(model gallery): add specific message templates
 for llama3.2 based models (#4707)

* chore(model gallery): add specific message templates for llama3.2 based models

Signed-off-by: Maximilian Kenfenheuer <maximilian.kenfenheuer@ksol.it>

* fix: yaml lint in llama3.2-quantized.yaml

Signed-off-by: Maximilian Kenfenheuer <maximilian.kenfenheuer@ksol.it>

* fix: yaml lint in llama3.2-quantized.yaml

Signed-off-by: Maximilian Kenfenheuer <maximilian.kenfenheuer@ksol.it>

---------

Signed-off-by: Maximilian Kenfenheuer <maximilian.kenfenheuer@ksol.it>
---
 gallery/index.yaml              |  2 +-
 gallery/llama3.2-quantized.yaml | 55 +++++++++++++++++++++++++++++++++
 2 files changed, 56 insertions(+), 1 deletion(-)
 create mode 100644 gallery/llama3.2-quantized.yaml

diff --git a/gallery/index.yaml b/gallery/index.yaml
index 5e081b98..1716f2b1 100644
--- a/gallery/index.yaml
+++ b/gallery/index.yaml
@@ -839,7 +839,7 @@
       sha256: bac8e8c1d1d9d53cbdb148b8ff9ad378ddb392429207099e85b5aae3a43bff3d
       uri: huggingface://cstr/salamandra-7b-instruct-GGUF/salamandra-7b-instruct.Q4_K_M-f32.gguf
 - &llama32  ## llama3.2
-  url: "github:mudler/LocalAI/gallery/llama3.1-instruct.yaml@master"
+  url: "github:mudler/LocalAI/gallery/llama3.2-quantized.yaml@master"
   icon: https://avatars.githubusercontent.com/u/153379578
   license: llama3.2
   description: |
diff --git a/gallery/llama3.2-quantized.yaml b/gallery/llama3.2-quantized.yaml
new file mode 100644
index 00000000..7e1d2630
--- /dev/null
+++ b/gallery/llama3.2-quantized.yaml
@@ -0,0 +1,55 @@
+---
+name: "llama3.2-quantized"
+
+config_file: |
+  mmap: true
+  function:
+    disable_no_action: true
+    grammar:
+      disable: true
+    response_regex:
+    - \[(?P<name>\w+)\((?P<arguments>.*)\)\]
+    argument_regex:
+    - (?P<key>[^ '\(=,]+)[='"]+(?P<value>[^=,"']+)['"]?
+  template:
+    chat: |
+      <|begin_of_text|><|start_header_id|>system<|end_header_id|>
+      You are a helpful assistant<|eot_id|><|start_header_id|>user<|end_header_id|>
+      {{.Input }}
+      <|start_header_id|>assistant<|end_header_id|>
+    chat_message: |
+      <|start_header_id|>{{if eq .RoleName "assistant"}}assistant{{else if eq .RoleName "system"}}system{{else if eq .RoleName "tool"}}tool{{else if eq .RoleName "user"}}user{{end}}<|end_header_id|>
+      {{ if .FunctionCall -}}
+      {{ else if eq .RoleName "tool" -}}
+      The Function was executed and the response was:
+      {{ end -}}
+      {{ if .Content -}}
+      {{.Content -}}
+      {{ else if .FunctionCall -}}
+      {{ range .FunctionCall }}
+      [{{.FunctionCall.Name}}({{.FunctionCall.Arguments}})]
+      {{ end }}
+      {{ end -}}
+      <|eot_id|>
+    completion: |
+      {{.Input}}
+    function: |
+      <|start_header_id|>system<|end_header_id|>
+      You are an expert in composing functions. You are given a question and a set of possible functions.
+      Based on the question, you will need to make one or more function/tool calls to achieve the purpose.
+      If none of the functions can be used, point it out. If the given question lacks the parameters required by the function, also point it out. You should only return the function call in tools call sections.
+      If you decide to invoke any of the function(s), you MUST put it in the format as follows:
+      [func_name1(params_name1=params_value1,params_name2=params_value2,...),func_name2(params_name1=params_value1,params_name2=params_value2,...)]
+      You SHOULD NOT include any other text in the response.
+      Here is a list of functions in JSON format that you can invoke.
+      {{toJson .Functions}}
+      <|eot_id|><|start_header_id|>user<|end_header_id|>
+      {{.Input}}
+      <|eot_id|><|start_header_id|>assistant<|end_header_id|>
+  context_size: 8192
+  f16: true
+  stopwords:
+  - <|im_end|>
+  - <dummy32000>
+  - "<|eot_id|>"
+  - <|end_of_text|>

From 7f62b418a4c605257183c3bbf1f7b98f0904fe5f Mon Sep 17 00:00:00 2001
From: Ettore Di Giacinto <mudler@localai.io>
Date: Wed, 29 Jan 2025 15:16:07 +0100
Subject: [PATCH 31/85] chore(docs): add documentation for l4t images

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
---
 .../docs/getting-started/container-images.md       | 14 +++++++++++++-
 docs/content/docs/reference/nvidia-l4t.md          | 10 ++++++++--
 2 files changed, 21 insertions(+), 3 deletions(-)

diff --git a/docs/content/docs/getting-started/container-images.md b/docs/content/docs/getting-started/container-images.md
index 64f6dbc9..a6a955ad 100644
--- a/docs/content/docs/getting-started/container-images.md
+++ b/docs/content/docs/getting-started/container-images.md
@@ -154,7 +154,7 @@ Images are available with and without python dependencies. Note that images with
 
 Images with `core` in the tag are smaller and do not contain any python dependencies. 
 
-{{< tabs tabTotal="7" >}}
+{{< tabs tabTotal="8" >}}
 {{% tab tabName="Vanilla / CPU Images" %}}
 
 | Description | Quay | Docker Hub                                   |
@@ -236,6 +236,18 @@ Images with `core` in the tag are smaller and do not contain any python dependen
 | Versioned image including FFMpeg, no python | `quay.io/go-skynet/local-ai:{{< version >}}-vulkan-fmpeg-core` | `localai/localai:{{< version >}}-vulkan-fmpeg-core`             |
 {{% /tab %}}
 
+{{% tab tabName="Nvidia Linux for tegra" %}}
+
+These images are compatible with Nvidia ARM64 devices, such as the Jetson Nano, Jetson Xavier NX, and Jetson AGX Xavier. For more information, see the [Nvidia L4T guide]({{%relref "docs/reference/nvidia-l4t" %}}).
+
+| Description | Quay | Docker Hub                                                  |
+| --- | --- |-------------------------------------------------------------|
+| Latest images from the branch (development) | `quay.io/go-skynet/local-ai:master-nvidia-l4t-arm64-core` | `localai/localai:master-nvidia-l4t-arm64-core`                      |
+| Latest tag | `quay.io/go-skynet/local-ai:latest-nvidia-l4t-arm64-core` | `localai/localai:latest-nvidia-l4t-arm64-core`                 |
+| Versioned image | `quay.io/go-skynet/local-ai:{{< version >}}-nvidia-l4t-arm64-core` | `localai/localai:{{< version >}}-nvidia-l4t-arm64-core`             |
+
+{{% /tab %}}
+
 {{< /tabs >}}
 
 ## See Also
diff --git a/docs/content/docs/reference/nvidia-l4t.md b/docs/content/docs/reference/nvidia-l4t.md
index 028ee531..ce0fd5e9 100644
--- a/docs/content/docs/reference/nvidia-l4t.md
+++ b/docs/content/docs/reference/nvidia-l4t.md
@@ -21,7 +21,13 @@ git clone https://github.com/mudler/LocalAI
 
 cd LocalAI
 
-docker build --build-arg SKIP_DRIVERS=true --build-arg BUILD_TYPE=cublas --build-arg BASE_IMAGE=nvcr.io/nvidia/l4t-jetpack:r36.4.0 --build-arg IMAGE_TYPE=core -t localai-orin .
+docker build --build-arg SKIP_DRIVERS=true --build-arg BUILD_TYPE=cublas --build-arg BASE_IMAGE=nvcr.io/nvidia/l4t-jetpack:r36.4.0 --build-arg IMAGE_TYPE=core -t quay.io/go-skynet/local-ai:master-nvidia-l4t-arm64-core .
+```
+
+Otherwise images are available on quay.io and dockerhub:
+
+```bash
+docker pull quay.io/go-skynet/local-ai:master-nvidia-l4t-arm64-core
 ```
 
 ## Usage
@@ -29,7 +35,7 @@ docker build --build-arg SKIP_DRIVERS=true --build-arg BUILD_TYPE=cublas --build
 Run the LocalAI container on Nvidia ARM64 devices using the following command, where `/data/models` is the directory containing the models:
 
 ```bash
-docker run -e DEBUG=true -p 8080:8080 -v /data/models:/build/models  -ti --restart=always --name local-ai --runtime nvidia --gpus all localai-orin
+docker run -e DEBUG=true -p 8080:8080 -v /data/models:/build/models  -ti --restart=always --name local-ai --runtime nvidia --gpus all quay.io/go-skynet/local-ai:master-nvidia-l4t-arm64-core
 ```
 
 Note: `/data/models` is the directory containing the models. You can replace it with the directory containing your models.

From 1656e1a88e3f5fad247a47d4d9e5c54f2d606550 Mon Sep 17 00:00:00 2001
From: "LocalAI [bot]" <139863280+localai-bot@users.noreply.github.com>
Date: Wed, 29 Jan 2025 22:45:38 +0100
Subject: [PATCH 32/85] chore: :arrow_up: Update ggerganov/llama.cpp to
 `eb7cf15a808d4d7a71eef89cc6a9b96fe82989dc` (#4717)

:arrow_up: Update ggerganov/llama.cpp

Signed-off-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com>
Co-authored-by: mudler <2420543+mudler@users.noreply.github.com>
---
 Makefile | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/Makefile b/Makefile
index 6cbc7326..20ef7199 100644
--- a/Makefile
+++ b/Makefile
@@ -8,7 +8,7 @@ DETECT_LIBS?=true
 # llama.cpp versions
 GOLLAMA_REPO?=https://github.com/go-skynet/go-llama.cpp
 GOLLAMA_VERSION?=2b57a8ae43e4699d3dc5d1496a1ccd42922993be
-CPPLLAMA_VERSION?=cae9fb4361138b937464524eed907328731b81f6
+CPPLLAMA_VERSION?=eb7cf15a808d4d7a71eef89cc6a9b96fe82989dc
 
 # whisper.cpp version
 WHISPER_REPO?=https://github.com/ggerganov/whisper.cpp

From 72e52c4f6a9fb29bfa2d85006245fc3e05ae8082 Mon Sep 17 00:00:00 2001
From: Ettore Di Giacinto <mudler@users.noreply.github.com>
Date: Thu, 30 Jan 2025 00:03:01 +0100
Subject: [PATCH 33/85] chore: drop embedded models (#4715)

Since the remote gallery was introduced this is now completely
superseded by it. In order to keep the code clean and remove redudant
parts let's simplify the usage.

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
---
 Makefile                                      |   2 +-
 core/application/startup.go                   |   2 +-
 core/cli/models.go                            |   2 +-
 core/cli/run.go                               |   2 -
 core/config/application_config.go             |   8 --
 core/services/gallery.go                      |   2 +-
 .../content/docs/advanced/run-other-models.md | 126 ------------------
 .../docs/getting-started/container-images.md  |   2 +-
 embedded/embedded.go                          |  72 ----------
 embedded/model_library.yaml                   |   9 --
 embedded/models/all-minilm-l6-v2.yaml         |  13 --
 embedded/models/animagine-xl.yaml             |  17 ---
 embedded/models/bakllava.yaml                 |  40 ------
 embedded/models/bark.yaml                     |   8 --
 embedded/models/cerbero.yaml                  |  24 ----
 embedded/models/codellama-7b-gguf.yaml        |  20 ---
 embedded/models/codellama-7b.yaml             |  14 --
 embedded/models/coqui.yaml                    |   9 --
 embedded/models/dolphin-2.5-mixtral-8x7b.yaml |  31 -----
 embedded/models/hermes-2-pro-mistral.yaml     |  59 --------
 embedded/models/llama3-instruct.yaml          |  48 -------
 embedded/models/llava-1.5.yaml                |  33 -----
 embedded/models/llava-1.6-mistral.yaml        |  33 -----
 embedded/models/llava-1.6-vicuna.yaml         |  37 -----
 embedded/models/llava.yaml                    |  40 ------
 embedded/models/mamba-bagel.yaml              |  21 ---
 embedded/models/mamba-chat.yaml               |  28 ----
 embedded/models/mistral-openorca.yaml         |  32 -----
 embedded/models/mixtral-instruct.yaml         |  25 ----
 embedded/models/phi-2-chat.yaml               |  25 ----
 embedded/models/phi-2-orange.yaml             |  30 -----
 embedded/models/rhasspy-voice-en-us-amy.yaml  |  13 --
 embedded/models/tinyllama-chat.yaml           |  29 ----
 embedded/models/transformers-tinyllama.yaml   |  31 -----
 embedded/models/vall-e-x.yaml                 |   8 --
 embedded/models/whisper-base.yaml             |  18 ---
 pkg/startup/model_preload.go                  |  28 +---
 pkg/startup/model_preload_test.go             |  53 +-------
 .../webui_static.yaml => webui_static.yaml    |   0
 39 files changed, 8 insertions(+), 986 deletions(-)
 delete mode 100644 docs/content/docs/advanced/run-other-models.md
 delete mode 100644 embedded/embedded.go
 delete mode 100644 embedded/model_library.yaml
 delete mode 100644 embedded/models/all-minilm-l6-v2.yaml
 delete mode 100644 embedded/models/animagine-xl.yaml
 delete mode 100644 embedded/models/bakllava.yaml
 delete mode 100644 embedded/models/bark.yaml
 delete mode 100644 embedded/models/cerbero.yaml
 delete mode 100644 embedded/models/codellama-7b-gguf.yaml
 delete mode 100644 embedded/models/codellama-7b.yaml
 delete mode 100644 embedded/models/coqui.yaml
 delete mode 100644 embedded/models/dolphin-2.5-mixtral-8x7b.yaml
 delete mode 100644 embedded/models/hermes-2-pro-mistral.yaml
 delete mode 100644 embedded/models/llama3-instruct.yaml
 delete mode 100644 embedded/models/llava-1.5.yaml
 delete mode 100644 embedded/models/llava-1.6-mistral.yaml
 delete mode 100644 embedded/models/llava-1.6-vicuna.yaml
 delete mode 100644 embedded/models/llava.yaml
 delete mode 100644 embedded/models/mamba-bagel.yaml
 delete mode 100644 embedded/models/mamba-chat.yaml
 delete mode 100644 embedded/models/mistral-openorca.yaml
 delete mode 100644 embedded/models/mixtral-instruct.yaml
 delete mode 100644 embedded/models/phi-2-chat.yaml
 delete mode 100644 embedded/models/phi-2-orange.yaml
 delete mode 100644 embedded/models/rhasspy-voice-en-us-amy.yaml
 delete mode 100644 embedded/models/tinyllama-chat.yaml
 delete mode 100644 embedded/models/transformers-tinyllama.yaml
 delete mode 100644 embedded/models/vall-e-x.yaml
 delete mode 100644 embedded/models/whisper-base.yaml
 rename embedded/webui_static.yaml => webui_static.yaml (100%)

diff --git a/Makefile b/Makefile
index 20ef7199..5b903d7d 100644
--- a/Makefile
+++ b/Makefile
@@ -861,7 +861,7 @@ swagger:
 
 .PHONY: gen-assets
 gen-assets:
-	$(GOCMD) run core/dependencies_manager/manager.go embedded/webui_static.yaml core/http/static/assets
+	$(GOCMD) run core/dependencies_manager/manager.go webui_static.yaml core/http/static/assets
 
 ## Documentation
 docs/layouts/_default:
diff --git a/core/application/startup.go b/core/application/startup.go
index cd52d37a..fffcd8bb 100644
--- a/core/application/startup.go
+++ b/core/application/startup.go
@@ -62,7 +62,7 @@ func New(opts ...config.AppOption) (*Application, error) {
 		}
 	}
 
-	if err := pkgStartup.InstallModels(options.Galleries, options.ModelLibraryURL, options.ModelPath, options.EnforcePredownloadScans, nil, options.ModelsURL...); err != nil {
+	if err := pkgStartup.InstallModels(options.Galleries, options.ModelPath, options.EnforcePredownloadScans, nil, options.ModelsURL...); err != nil {
 		log.Error().Err(err).Msg("error installing models")
 	}
 
diff --git a/core/cli/models.go b/core/cli/models.go
index 56d13fc7..28b2944f 100644
--- a/core/cli/models.go
+++ b/core/cli/models.go
@@ -100,7 +100,7 @@ func (mi *ModelsInstall) Run(ctx *cliContext.Context) error {
 			log.Info().Str("model", modelName).Str("license", model.License).Msg("installing model")
 		}
 
-		err = startup.InstallModels(galleries, "", mi.ModelsPath, !mi.DisablePredownloadScan, progressCallback, modelName)
+		err = startup.InstallModels(galleries, mi.ModelsPath, !mi.DisablePredownloadScan, progressCallback, modelName)
 		if err != nil {
 			return err
 		}
diff --git a/core/cli/run.go b/core/cli/run.go
index 279ff94b..3162ef14 100644
--- a/core/cli/run.go
+++ b/core/cli/run.go
@@ -32,7 +32,6 @@ type RunCMD struct {
 
 	Galleries           string   `env:"LOCALAI_GALLERIES,GALLERIES" help:"JSON list of galleries" group:"models" default:"${galleries}"`
 	AutoloadGalleries   bool     `env:"LOCALAI_AUTOLOAD_GALLERIES,AUTOLOAD_GALLERIES" group:"models"`
-	RemoteLibrary       string   `env:"LOCALAI_REMOTE_LIBRARY,REMOTE_LIBRARY" default:"${remoteLibraryURL}" help:"A LocalAI remote library URL" group:"models"`
 	PreloadModels       string   `env:"LOCALAI_PRELOAD_MODELS,PRELOAD_MODELS" help:"A List of models to apply in JSON at start" group:"models"`
 	Models              []string `env:"LOCALAI_MODELS,MODELS" help:"A List of model configuration URLs to load" group:"models"`
 	PreloadModelsConfig string   `env:"LOCALAI_PRELOAD_MODELS_CONFIG,PRELOAD_MODELS_CONFIG" help:"A List of models to apply at startup. Path to a YAML config file" group:"models"`
@@ -90,7 +89,6 @@ func (r *RunCMD) Run(ctx *cliContext.Context) error {
 		config.WithDynamicConfigDirPollInterval(r.LocalaiConfigDirPollInterval),
 		config.WithF16(r.F16),
 		config.WithStringGalleries(r.Galleries),
-		config.WithModelLibraryURL(r.RemoteLibrary),
 		config.WithCors(r.CORS),
 		config.WithCorsAllowOrigins(r.CORSAllowOrigins),
 		config.WithCsrf(r.CSRF),
diff --git a/core/config/application_config.go b/core/config/application_config.go
index 1ffcb297..2cc9b01b 100644
--- a/core/config/application_config.go
+++ b/core/config/application_config.go
@@ -44,8 +44,6 @@ type ApplicationConfig struct {
 	DisableGalleryEndpoint             bool
 	LoadToMemory                       []string
 
-	ModelLibraryURL string
-
 	Galleries []Gallery
 
 	BackendAssets     embed.FS
@@ -126,12 +124,6 @@ func WithP2PToken(s string) AppOption {
 	}
 }
 
-func WithModelLibraryURL(url string) AppOption {
-	return func(o *ApplicationConfig) {
-		o.ModelLibraryURL = url
-	}
-}
-
 func WithLibPath(path string) AppOption {
 	return func(o *ApplicationConfig) {
 		o.LibPath = path
diff --git a/core/services/gallery.go b/core/services/gallery.go
index 45bebd4f..f499d381 100644
--- a/core/services/gallery.go
+++ b/core/services/gallery.go
@@ -129,7 +129,7 @@ func (g *GalleryService) Start(c context.Context, cl *config.BackendConfigLoader
 					if op.GalleryModelName != "" {
 						err = gallery.InstallModelFromGallery(op.Galleries, op.GalleryModelName, g.appConfig.ModelPath, op.Req, progressCallback, g.appConfig.EnforcePredownloadScans)
 					} else if op.ConfigURL != "" {
-						err = startup.InstallModels(op.Galleries, op.ConfigURL, g.appConfig.ModelPath, g.appConfig.EnforcePredownloadScans, progressCallback, op.ConfigURL)
+						err = startup.InstallModels(op.Galleries, g.appConfig.ModelPath, g.appConfig.EnforcePredownloadScans, progressCallback, op.ConfigURL)
 						if err != nil {
 							updateError(err)
 							continue
diff --git a/docs/content/docs/advanced/run-other-models.md b/docs/content/docs/advanced/run-other-models.md
deleted file mode 100644
index f9bdc22d..00000000
--- a/docs/content/docs/advanced/run-other-models.md
+++ /dev/null
@@ -1,126 +0,0 @@
-+++
-disableToc = false
-title = "Run other Models"
-weight = 23
-icon = "rocket_launch"
-
-+++
-
-## Running other models
-
-> _Do you have already a model file? Skip to [Run models manually]({{%relref "docs/getting-started/models" %}})_.
-
-To load models into LocalAI, you can either [use models manually]({{%relref "docs/getting-started/models" %}}) or configure LocalAI to pull the models from external sources, like Huggingface and configure the model.
-
-To do that, you can point LocalAI to an URL to a YAML configuration file - however - LocalAI does also have some popular model configuration embedded in the binary as well. Below you can find a list of the models configuration that LocalAI has pre-built, see [Model customization]({{%relref "docs/getting-started/customize-model" %}}) on how to configure models from URLs.
-
-There are different categories of models: [LLMs]({{%relref "docs/features/text-generation" %}}), [Multimodal LLM]({{%relref "docs/features/gpt-vision" %}}) , [Embeddings]({{%relref "docs/features/embeddings" %}}), [Audio to Text]({{%relref "docs/features/audio-to-text" %}}), and [Text to Audio]({{%relref "docs/features/text-to-audio" %}}) depending on the backend being used and the model architecture.
-
-{{% alert icon="💡" %}}
-
-To customize the models, see [Model customization]({{%relref "docs/getting-started/customize-model" %}}). For more model configurations, visit the [Examples Section](https://github.com/mudler/LocalAI-examples/tree/main/configurations) and the configurations for the models below is available [here](https://github.com/mudler/LocalAI/tree/master/embedded/models).
-{{% /alert %}}
-
-{{< tabs tabTotal="3" >}}
-{{% tab tabName="CPU-only" %}}
-
-> 💡Don't need GPU acceleration? use the CPU images which are lighter and do not have Nvidia dependencies
-
-| Model | Category | Docker command |
-| --- | --- | --- |
-| [phi-2](https://huggingface.co/microsoft/phi-2) | [LLM]({{%relref "docs/features/text-generation" %}}) | ```docker run -ti -p 8080:8080 localai/localai:{{< version >}}-ffmpeg-core phi-2``` |
-| 🌋 [bakllava](https://github.com/SkunkworksAI/BakLLaVA) | [Multimodal LLM]({{%relref "docs/features/gpt-vision" %}}) | ```docker run -ti -p 8080:8080 localai/localai:{{< version >}}-ffmpeg-core bakllava``` |
-| 🌋 [llava-1.5](https://llava-vl.github.io/) | [Multimodal LLM]({{%relref "docs/features/gpt-vision" %}}) | ```docker run -ti -p 8080:8080 localai/localai:{{< version >}}-ffmpeg-core llava-1.5``` |
-| 🌋 [llava-1.6-mistral](https://huggingface.co/cjpais/llava-1.6-mistral-7b-gguf) | [Multimodal LLM]({{%relref "docs/features/gpt-vision" %}}) | ```docker run -ti -p 8080:8080 localai/localai:{{< version >}}-ffmpeg-core llava-1.6-mistral``` |
-| 🌋 [llava-1.6-vicuna](https://huggingface.co/cmp-nct/llava-1.6-gguf) | [Multimodal LLM]({{%relref "docs/features/gpt-vision" %}}) | ```docker run -ti -p 8080:8080 localai/localai:{{< version >}}-ffmpeg-core llava-1.6-vicuna``` |
-| [mistral-openorca](https://huggingface.co/Open-Orca/Mistral-7B-OpenOrca) | [LLM]({{%relref "docs/features/text-generation" %}}) | ```docker run -ti -p 8080:8080 localai/localai:{{< version >}}-ffmpeg-core mistral-openorca``` |
-| [bert-cpp](https://github.com/skeskinen/bert.cpp) | [Embeddings]({{%relref "docs/features/embeddings" %}}) | ```docker run -ti -p 8080:8080 localai/localai:{{< version >}}-ffmpeg-core bert-cpp``` |
-| [all-minilm-l6-v2](https://huggingface.co/sentence-transformers/all-MiniLM-L6-v2) | [Embeddings]({{%relref "docs/features/embeddings" %}}) | ```docker run -ti -p 8080:8080 localai/localai:{{< version >}}-ffmpeg all-minilm-l6-v2``` |
-| whisper-base | [Audio to Text]({{%relref "docs/features/audio-to-text" %}}) | ```docker run -ti -p 8080:8080 localai/localai:{{< version >}}-ffmpeg-core whisper-base``` |
-| rhasspy-voice-en-us-amy | [Text to Audio]({{%relref "docs/features/text-to-audio" %}}) | ```docker run -ti -p 8080:8080 localai/localai:{{< version >}}-ffmpeg-core rhasspy-voice-en-us-amy``` |
-| 🐸 [coqui](https://github.com/coqui-ai/TTS) | [Text to Audio]({{%relref "docs/features/text-to-audio" %}}) | ```docker run -ti -p 8080:8080 localai/localai:{{< version >}}-ffmpeg coqui``` |
-| 🐶 [bark](https://github.com/suno-ai/bark) | [Text to Audio]({{%relref "docs/features/text-to-audio" %}}) | ```docker run -ti -p 8080:8080 localai/localai:{{< version >}}-ffmpeg bark``` |
-| 🔊 [vall-e-x](https://github.com/Plachtaa/VALL-E-X)  | [Text to Audio]({{%relref "docs/features/text-to-audio" %}}) | ```docker run -ti -p 8080:8080 localai/localai:{{< version >}}-ffmpeg vall-e-x``` |
-| mixtral-instruct Mixtral-8x7B-Instruct-v0.1 | [LLM]({{%relref "docs/features/text-generation" %}}) | ```docker run -ti -p 8080:8080 localai/localai:{{< version >}}-ffmpeg-core mixtral-instruct``` |
-| [tinyllama-chat](https://huggingface.co/TheBloke/TinyLlama-1.1B-Chat-v0.3-GGUF) [original model](https://huggingface.co/TinyLlama/TinyLlama-1.1B-Chat-v0.3) | [LLM]({{%relref "docs/features/text-generation" %}}) | ```docker run -ti -p 8080:8080 localai/localai:{{< version >}}-ffmpeg-core tinyllama-chat``` |
-| [dolphin-2.5-mixtral-8x7b](https://huggingface.co/TheBloke/dolphin-2.5-mixtral-8x7b-GGUF) | [LLM]({{%relref "docs/features/text-generation" %}}) | ```docker run -ti -p 8080:8080 localai/localai:{{< version >}}-ffmpeg-core dolphin-2.5-mixtral-8x7b``` |
-| 🐍 [mamba](https://github.com/state-spaces/mamba) | [LLM]({{%relref "docs/features/text-generation" %}}) | GPU-only |
-| animagine-xl | [Text to Image]({{%relref "docs/features/image-generation" %}}) | GPU-only |
-| transformers-tinyllama | [LLM]({{%relref "docs/features/text-generation" %}}) | GPU-only |
-| [codellama-7b](https://huggingface.co/codellama/CodeLlama-7b-hf) (with transformers) | [LLM]({{%relref "docs/features/text-generation" %}}) | GPU-only |
-| [codellama-7b-gguf](https://huggingface.co/TheBloke/CodeLlama-7B-GGUF) (with llama.cpp) | [LLM]({{%relref "docs/features/text-generation" %}}) | ```docker run -ti -p 8080:8080 localai/localai:{{< version >}}-ffmpeg-core codellama-7b-gguf``` |
-| [hermes-2-pro-mistral](https://huggingface.co/NousResearch/Hermes-2-Pro-Mistral-7B-GGUF) | [LLM]({{%relref "docs/features/text-generation" %}}) | ```docker run -ti -p 8080:8080 localai/localai:{{< version >}}-ffmpeg-core hermes-2-pro-mistral``` |
-{{% /tab %}}
-
-{{% tab tabName="GPU (CUDA 11)" %}}
-
-
-> To know which version of CUDA do you have available, you can check with `nvidia-smi` or `nvcc --version` see also [GPU acceleration]({{%relref "docs/features/gpu-acceleration" %}}).
-
-| Model | Category | Docker command |
-| --- | --- | --- |
-| [phi-2](https://huggingface.co/microsoft/phi-2) | [LLM]({{%relref "docs/features/text-generation" %}}) | ```docker run -ti -p 8080:8080 --gpus all localai/localai:{{< version >}}-cublas-cuda11-core phi-2``` |
-| 🌋 [bakllava](https://github.com/SkunkworksAI/BakLLaVA) | [Multimodal LLM]({{%relref "docs/features/gpt-vision" %}}) | ```docker run -ti -p 8080:8080 --gpus all localai/localai:{{< version >}}-cublas-cuda11-core bakllava``` |
-| 🌋 [llava-1.5](https://llava-vl.github.io/) | [Multimodal LLM]({{%relref "docs/features/gpt-vision" %}}) | ```docker run -ti -p 8080:8080 localai/localai:{{< version >}}-cublas-cuda11-core llava-1.5``` |
-| 🌋 [llava-1.6-mistral](https://huggingface.co/cjpais/llava-1.6-mistral-7b-gguf) | [Multimodal LLM]({{%relref "docs/features/gpt-vision" %}}) | ```docker run -ti -p 8080:8080 localai/localai:{{< version >}}-cublas-cuda11-core llava-1.6-mistral``` |
-| 🌋 [llava-1.6-vicuna](https://huggingface.co/cmp-nct/llava-1.6-gguf) | [Multimodal LLM]({{%relref "docs/features/gpt-vision" %}}) | ```docker run -ti -p 8080:8080 localai/localai:{{< version >}}-cublas-cuda11-core llava-1.6-vicuna``` |
-| [mistral-openorca](https://huggingface.co/Open-Orca/Mistral-7B-OpenOrca) | [LLM]({{%relref "docs/features/text-generation" %}}) | ```docker run -ti -p 8080:8080 --gpus all localai/localai:{{< version >}}-cublas-cuda11-core mistral-openorca``` |
-| [bert-cpp](https://github.com/skeskinen/bert.cpp) | [Embeddings]({{%relref "docs/features/embeddings" %}}) | ```docker run -ti -p 8080:8080 --gpus all localai/localai:{{< version >}}-cublas-cuda11-core bert-cpp``` |
-| [all-minilm-l6-v2](https://huggingface.co/sentence-transformers/all-MiniLM-L6-v2) | [Embeddings]({{%relref "docs/features/embeddings" %}}) | ```docker run -ti -p 8080:8080 --gpus all localai/localai:{{< version >}}-cublas-cuda11 all-minilm-l6-v2``` |
-| whisper-base | [Audio to Text]({{%relref "docs/features/audio-to-text" %}}) | ```docker run -ti -p 8080:8080 --gpus all localai/localai:{{< version >}}-cublas-cuda11-core whisper-base``` |
-| rhasspy-voice-en-us-amy | [Text to Audio]({{%relref "docs/features/text-to-audio" %}}) | ```docker run -ti -p 8080:8080 --gpus all localai/localai:{{< version >}}-cublas-cuda11-core rhasspy-voice-en-us-amy``` |
-| 🐸 [coqui](https://github.com/coqui-ai/TTS) | [Text to Audio]({{%relref "docs/features/text-to-audio" %}}) | ```docker run -ti -p 8080:8080 --gpus all localai/localai:{{< version >}}-cublas-cuda11 coqui``` |
-| 🐶 [bark](https://github.com/suno-ai/bark) | [Text to Audio]({{%relref "docs/features/text-to-audio" %}}) | ```docker run -ti -p 8080:8080 --gpus all localai/localai:{{< version >}}-cublas-cuda11 bark``` |
-| 🔊 [vall-e-x](https://github.com/Plachtaa/VALL-E-X) | [Text to Audio]({{%relref "docs/features/text-to-audio" %}}) | ```docker run -ti -p 8080:8080 --gpus all localai/localai:{{< version >}}-cublas-cuda11 vall-e-x``` |
-| mixtral-instruct Mixtral-8x7B-Instruct-v0.1 | [LLM]({{%relref "docs/features/text-generation" %}}) | ```docker run -ti -p 8080:8080 --gpus all localai/localai:{{< version >}}-cublas-cuda11-core mixtral-instruct``` |
-| [tinyllama-chat](https://huggingface.co/TheBloke/TinyLlama-1.1B-Chat-v0.3-GGUF) [original model](https://huggingface.co/TinyLlama/TinyLlama-1.1B-Chat-v0.3) | [LLM]({{%relref "docs/features/text-generation" %}}) | ```docker run -ti -p 8080:8080 --gpus all localai/localai:{{< version >}}-cublas-cuda11-core tinyllama-chat``` |
-| [dolphin-2.5-mixtral-8x7b](https://huggingface.co/TheBloke/dolphin-2.5-mixtral-8x7b-GGUF) | [LLM]({{%relref "docs/features/text-generation" %}}) | ```docker run -ti -p 8080:8080 --gpus all localai/localai:{{< version >}}-cublas-cuda11-core dolphin-2.5-mixtral-8x7b``` |
-| 🐍 [mamba](https://github.com/state-spaces/mamba) | [LLM]({{%relref "docs/features/text-generation" %}}) | ```docker run -ti -p 8080:8080 --gpus all localai/localai:{{< version >}}-cublas-cuda11 mamba-chat``` |
-| animagine-xl | [Text to Image]({{%relref "docs/features/image-generation" %}}) |  ```docker run -ti -p 8080:8080 -e COMPEL=0 --gpus all localai/localai:{{< version >}}-cublas-cuda11 animagine-xl``` |
-| transformers-tinyllama | [LLM]({{%relref "docs/features/text-generation" %}}) | ```docker run -ti -p 8080:8080 --gpus all localai/localai:{{< version >}}-cublas-cuda11 transformers-tinyllama``` |
-| [codellama-7b](https://huggingface.co/codellama/CodeLlama-7b-hf) | [LLM]({{%relref "docs/features/text-generation" %}})  | ```docker run -ti -p 8080:8080 --gpus all localai/localai:{{< version >}}-cublas-cuda11 codellama-7b``` |
-| [codellama-7b-gguf](https://huggingface.co/TheBloke/CodeLlama-7B-GGUF) | [LLM]({{%relref "docs/features/text-generation" %}})  | ```docker run -ti -p 8080:8080 --gpus all localai/localai:{{< version >}}-cublas-cuda11-core codellama-7b-gguf``` |
-| [hermes-2-pro-mistral](https://huggingface.co/NousResearch/Hermes-2-Pro-Mistral-7B-GGUF) | [LLM]({{%relref "docs/features/text-generation" %}}) | ```docker run -ti -p 8080:8080 --gpus all localai/localai:{{< version >}}-cublas-cuda11-core hermes-2-pro-mistral``` |
-{{% /tab %}}
-
-
-{{% tab tabName="GPU (CUDA 12)" %}}
-
-> To know which version of CUDA do you have available, you can check with `nvidia-smi` or `nvcc --version` see also [GPU acceleration]({{%relref "docs/features/gpu-acceleration" %}}).
-
-| Model | Category | Docker command |
-| --- | --- | --- |
-| [phi-2](https://huggingface.co/microsoft/phi-2) | [LLM]({{%relref "docs/features/text-generation" %}}) | ```docker run -ti -p 8080:8080 --gpus all localai/localai:{{< version >}}-cublas-cuda12-core phi-2``` |
-| 🌋 [bakllava](https://github.com/SkunkworksAI/BakLLaVA) | [Multimodal LLM]({{%relref "docs/features/gpt-vision" %}}) | ```docker run -ti -p 8080:8080 --gpus all localai/localai:{{< version >}}-cublas-cuda12-core bakllava``` |
-| 🌋 [llava-1.5](https://llava-vl.github.io/) | [Multimodal LLM]({{%relref "docs/features/gpt-vision" %}}) | ```docker run -ti -p 8080:8080 localai/localai:{{< version >}}-cublas-cuda12-core llava-1.5``` |
-| 🌋 [llava-1.6-mistral](https://huggingface.co/cjpais/llava-1.6-mistral-7b-gguf) | [Multimodal LLM]({{%relref "docs/features/gpt-vision" %}}) | ```docker run -ti -p 8080:8080 localai/localai:{{< version >}}-cublas-cuda12-core llava-1.6-mistral``` |
-| 🌋 [llava-1.6-vicuna](https://huggingface.co/cmp-nct/llava-1.6-gguf) | [Multimodal LLM]({{%relref "docs/features/gpt-vision" %}}) | ```docker run -ti -p 8080:8080 localai/localai:{{< version >}}-cublas-cuda12-core llava-1.6-vicuna``` |
-| [mistral-openorca](https://huggingface.co/Open-Orca/Mistral-7B-OpenOrca) | [LLM]({{%relref "docs/features/text-generation" %}}) | ```docker run -ti -p 8080:8080 --gpus all localai/localai:{{< version >}}-cublas-cuda12-core mistral-openorca``` |
-| [bert-cpp](https://github.com/skeskinen/bert.cpp) | [Embeddings]({{%relref "docs/features/embeddings" %}}) | ```docker run -ti -p 8080:8080 --gpus all localai/localai:{{< version >}}-cublas-cuda12-core bert-cpp``` |
-| [all-minilm-l6-v2](https://huggingface.co/sentence-transformers/all-MiniLM-L6-v2) | [Embeddings]({{%relref "docs/features/embeddings" %}}) | ```docker run -ti -p 8080:8080 --gpus all localai/localai:{{< version >}}-cublas-cuda12 all-minilm-l6-v2``` |
-| whisper-base | [Audio to Text]({{%relref "docs/features/audio-to-text" %}}) | ```docker run -ti -p 8080:8080 --gpus all localai/localai:{{< version >}}-cublas-cuda12-core whisper-base``` |
-| rhasspy-voice-en-us-amy | [Text to Audio]({{%relref "docs/features/text-to-audio" %}}) | ```docker run -ti -p 8080:8080 --gpus all localai/localai:{{< version >}}-cublas-cuda12-core rhasspy-voice-en-us-amy``` |
-| 🐸 [coqui](https://github.com/coqui-ai/TTS) | [Text to Audio]({{%relref "docs/features/text-to-audio" %}}) | ```docker run -ti -p 8080:8080 --gpus all localai/localai:{{< version >}}-cublas-cuda12 coqui``` |
-| 🐶 [bark](https://github.com/suno-ai/bark) | [Text to Audio]({{%relref "docs/features/text-to-audio" %}}) | ```docker run -ti -p 8080:8080 --gpus all localai/localai:{{< version >}}-cublas-cuda12 bark``` |
-| 🔊 [vall-e-x](https://github.com/Plachtaa/VALL-E-X) | [Text to Audio]({{%relref "docs/features/text-to-audio" %}}) | ```docker run -ti -p 8080:8080 --gpus all localai/localai:{{< version >}}-cublas-cuda12 vall-e-x``` |
-| mixtral-instruct Mixtral-8x7B-Instruct-v0.1 | [LLM]({{%relref "docs/features/text-generation" %}}) | ```docker run -ti -p 8080:8080 --gpus all localai/localai:{{< version >}}-cublas-cuda12-core mixtral-instruct``` |
-| [tinyllama-chat](https://huggingface.co/TheBloke/TinyLlama-1.1B-Chat-v0.3-GGUF) [original model](https://huggingface.co/TinyLlama/TinyLlama-1.1B-Chat-v0.3) | [LLM]({{%relref "docs/features/text-generation" %}}) | ```docker run -ti -p 8080:8080 --gpus all localai/localai:{{< version >}}-cublas-cuda12-core tinyllama-chat``` |
-| [dolphin-2.5-mixtral-8x7b](https://huggingface.co/TheBloke/dolphin-2.5-mixtral-8x7b-GGUF) | [LLM]({{%relref "docs/features/text-generation" %}}) | ```docker run -ti -p 8080:8080 --gpus all localai/localai:{{< version >}}-cublas-cuda12-core dolphin-2.5-mixtral-8x7b``` |
-| 🐍 [mamba](https://github.com/state-spaces/mamba) | [LLM]({{%relref "docs/features/text-generation" %}}) | ```docker run -ti -p 8080:8080 --gpus all localai/localai:{{< version >}}-cublas-cuda12 mamba-chat``` |
-| animagine-xl | [Text to Image]({{%relref "docs/features/image-generation" %}}) | ```docker run -ti -p 8080:8080 -e COMPEL=0 --gpus all localai/localai:{{< version >}}-cublas-cuda12 animagine-xl``` |
-| transformers-tinyllama | [LLM]({{%relref "docs/features/text-generation" %}}) | ```docker run -ti -p 8080:8080 --gpus all localai/localai:{{< version >}}-cublas-cuda12 transformers-tinyllama``` |
-| [codellama-7b](https://huggingface.co/codellama/CodeLlama-7b-hf) | [LLM]({{%relref "docs/features/text-generation" %}}) | ```docker run -ti -p 8080:8080 --gpus all localai/localai:{{< version >}}-cublas-cuda12 codellama-7b``` |
-| [codellama-7b-gguf](https://huggingface.co/TheBloke/CodeLlama-7B-GGUF) | [LLM]({{%relref "docs/features/text-generation" %}})  | ```docker run -ti -p 8080:8080 --gpus all localai/localai:{{< version >}}-cublas-cuda12-core codellama-7b-gguf``` |
-| [hermes-2-pro-mistral](https://huggingface.co/NousResearch/Hermes-2-Pro-Mistral-7B-GGUF) | [LLM]({{%relref "docs/features/text-generation" %}}) | ```docker run -ti -p 8080:8080 --gpus all localai/localai:{{< version >}}-cublas-cuda12-core hermes-2-pro-mistral``` |
-{{% /tab %}}
-
-{{< /tabs >}}
-
-{{% alert icon="💡" %}}
-**Tip** You can actually specify multiple models to start an instance with the models loaded, for example to have both llava and phi-2 configured:
-
-```bash
-docker run -ti -p 8080:8080 localai/localai:{{< version >}}-ffmpeg-core llava phi-2
-```
-
-{{% /alert %}}
diff --git a/docs/content/docs/getting-started/container-images.md b/docs/content/docs/getting-started/container-images.md
index a6a955ad..d1930805 100644
--- a/docs/content/docs/getting-started/container-images.md
+++ b/docs/content/docs/getting-started/container-images.md
@@ -143,7 +143,7 @@ The AIO Images are inheriting the same environment variables as the base images
 | Variable | Default | Description |
 | ---------------------| ------- | ----------- |
 | `PROFILE` | Auto-detected | The size of the model to use. Available: `cpu`, `gpu-8g` |
-| `MODELS` | Auto-detected | A list of models YAML Configuration file URI/URL (see also [running models]({{%relref "docs/advanced/run-other-models" %}})) |
+| `MODELS` | Auto-detected | A list of models YAML Configuration file URI/URL (see also [running models]({{%relref "docs/getting-started/models" %}})) |
 
 
 ## Standard container images
diff --git a/embedded/embedded.go b/embedded/embedded.go
deleted file mode 100644
index 3a4ea262..00000000
--- a/embedded/embedded.go
+++ /dev/null
@@ -1,72 +0,0 @@
-package embedded
-
-import (
-	"embed"
-	"fmt"
-	"slices"
-	"strings"
-
-	"github.com/mudler/LocalAI/pkg/downloader"
-	"github.com/rs/zerolog/log"
-
-	"github.com/mudler/LocalAI/pkg/assets"
-	"gopkg.in/yaml.v3"
-)
-
-var modelShorteners map[string]string
-
-//go:embed model_library.yaml
-var modelLibrary []byte
-
-//go:embed models/*
-var embeddedModels embed.FS
-
-func ModelShortURL(s string) string {
-	if _, ok := modelShorteners[s]; ok {
-		s = modelShorteners[s]
-	}
-
-	return s
-}
-
-func init() {
-	err := yaml.Unmarshal(modelLibrary, &modelShorteners)
-	if err != nil {
-		log.Error().Err(err).Msg("error while unmarshalling embedded modelLibrary")
-	}
-}
-
-func GetRemoteLibraryShorteners(url string, basePath string) (map[string]string, error) {
-	remoteLibrary := map[string]string{}
-	uri := downloader.URI(url)
-	err := uri.DownloadWithCallback(basePath, func(_ string, i []byte) error {
-		return yaml.Unmarshal(i, &remoteLibrary)
-	})
-	if err != nil {
-		return nil, fmt.Errorf("error downloading remote library: %s", err.Error())
-	}
-
-	return remoteLibrary, err
-}
-
-// ExistsInModelsLibrary checks if a model exists in the embedded models library
-func ExistsInModelsLibrary(s string) bool {
-	f := fmt.Sprintf("%s.yaml", s)
-
-	a := []string{}
-
-	for _, j := range assets.ListFiles(embeddedModels) {
-		a = append(a, strings.TrimPrefix(j, "models/"))
-	}
-
-	return slices.Contains(a, f)
-}
-
-// ResolveContent returns the content in the embedded model library
-func ResolveContent(s string) ([]byte, error) {
-	if ExistsInModelsLibrary(s) {
-		return embeddedModels.ReadFile(fmt.Sprintf("models/%s.yaml", s))
-	}
-
-	return nil, fmt.Errorf("cannot find model %s", s)
-}
diff --git a/embedded/model_library.yaml b/embedded/model_library.yaml
deleted file mode 100644
index 281941a5..00000000
--- a/embedded/model_library.yaml
+++ /dev/null
@@ -1,9 +0,0 @@
-### 
-###
-### This file contains the list of models that are available in the library
-### The URLs are automatically expanded when local-ai is being called with the key as argument
-###
-### For models with an entire YAML file to be embededd, put the file inside the `models`
-### directory, it will be automatically available with the file name as key (without the .yaml extension)
-
-phi-2:  "github://mudler/LocalAI-examples/configurations/phi-2.yaml@main"
diff --git a/embedded/models/all-minilm-l6-v2.yaml b/embedded/models/all-minilm-l6-v2.yaml
deleted file mode 100644
index 512d63a4..00000000
--- a/embedded/models/all-minilm-l6-v2.yaml
+++ /dev/null
@@ -1,13 +0,0 @@
-name: all-minilm-l6-v2
-backend: sentencetransformers
-embeddings: true
-parameters:
-  model: all-MiniLM-L6-v2
-
-usage: |
-    You can test this model with curl like this:
-
-    curl http://localhost:8080/embeddings -X POST -H "Content-Type: application/json" -d '{
-      "input": "Your text string goes here",
-      "model": "all-minilm-l6-v2"
-    }'
\ No newline at end of file
diff --git a/embedded/models/animagine-xl.yaml b/embedded/models/animagine-xl.yaml
deleted file mode 100644
index d492c080..00000000
--- a/embedded/models/animagine-xl.yaml
+++ /dev/null
@@ -1,17 +0,0 @@
-name: animagine-xl
-parameters:
-  model: Linaqruf/animagine-xl
-backend: diffusers
-f16: true
-diffusers:
-  scheduler_type: euler_a
-
-usage: |
-        curl http://localhost:8080/v1/images/generations \
-          -H "Content-Type: application/json" \
-          -d '{
-            "prompt": "<positive prompt>|<negative prompt>",
-            "model": "animagine-xl",
-            "step": 51,
-            "size": "1024x1024"
-          }'
\ No newline at end of file
diff --git a/embedded/models/bakllava.yaml b/embedded/models/bakllava.yaml
deleted file mode 100644
index 52fd9466..00000000
--- a/embedded/models/bakllava.yaml
+++ /dev/null
@@ -1,40 +0,0 @@
-backend: llama-cpp
-context_size: 4096
-f16: true
-
-gpu_layers: 90
-mmap: true
-name: bakllava
-
-roles:
-  user: "USER:"
-  assistant: "ASSISTANT:"
-  system: "SYSTEM:"
-
-mmproj: bakllava-mmproj.gguf
-parameters:
-  model: bakllava.gguf
-  temperature: 0.2
-  top_k: 40
-  top_p: 0.95
-  seed: -1
-mirostat: 2
-mirostat_eta: 1.0
-mirostat_tau: 1.0
-
-template:
-  chat: |
-    A chat between a curious human and an artificial intelligence assistant. The assistant gives helpful, detailed, and polite answers to the human's questions.
-    {{.Input}}
-    ASSISTANT:
-
-download_files:
-- filename: bakllava.gguf
-  uri: huggingface://mys/ggml_bakllava-1/ggml-model-q4_k.gguf
-- filename: bakllava-mmproj.gguf
-  uri: huggingface://mys/ggml_bakllava-1/mmproj-model-f16.gguf
-
-usage: |
-    curl http://localhost:8080/v1/chat/completions -H "Content-Type: application/json" -d '{
-        "model": "bakllava",
-        "messages": [{"role": "user", "content": [{"type":"text", "text": "What is in the image?"}, {"type": "image_url", "image_url": {"url": "https://upload.wikimedia.org/wikipedia/commons/thumb/d/dd/Gfp-wisconsin-madison-the-nature-boardwalk.jpg/2560px-Gfp-wisconsin-madison-the-nature-boardwalk.jpg" }}], "temperature": 0.9}]}'
diff --git a/embedded/models/bark.yaml b/embedded/models/bark.yaml
deleted file mode 100644
index da1b1db4..00000000
--- a/embedded/models/bark.yaml
+++ /dev/null
@@ -1,8 +0,0 @@
-usage: |
-    bark works without any configuration, to test it, you can run the following curl command:
-
-    curl http://localhost:8080/tts -H "Content-Type: application/json" -d '{         
-     "backend": "bark",
-     "input":"Hello, this is a test!"
-    }' | aplay
-# TODO: This is a placeholder until we manage to pre-load HF/Transformers models
\ No newline at end of file
diff --git a/embedded/models/cerbero.yaml b/embedded/models/cerbero.yaml
deleted file mode 100644
index 8ace4e35..00000000
--- a/embedded/models/cerbero.yaml
+++ /dev/null
@@ -1,24 +0,0 @@
-backend: llama
-context_size: 8192
-f16: false
-gpu_layers: 90
-name: cerbero
-mmap: false
-parameters:
-  model: huggingface://galatolo/cerbero-7b-gguf/ggml-model-Q8_0.gguf
-  top_k: 80
-  temperature: 0.2
-  top_p: 0.7
-template:
-  completion: "{{.Input}}"
-  chat: "Questa è una conversazione tra un umano ed un assistente AI.\n{{.Input}}\n[|Assistente|]  "
-roles:
-  user: "[|Umano|] "
-  system: "[|Umano|] "
-  assistant: "[|Assistente|] "
-
-stopwords:
-- "[|Umano|]"
-
-trimsuffix: 
-- "\n"
\ No newline at end of file
diff --git a/embedded/models/codellama-7b-gguf.yaml b/embedded/models/codellama-7b-gguf.yaml
deleted file mode 100644
index 413c838b..00000000
--- a/embedded/models/codellama-7b-gguf.yaml
+++ /dev/null
@@ -1,20 +0,0 @@
-name: codellama-7b-gguf
-backend: transformers
-parameters:
-  model: huggingface://TheBloke/CodeLlama-7B-GGUF/codellama-7b.Q4_K_M.gguf
-  temperature: 0.5
-  top_k: 40
-  seed: -1
-  top_p: 0.95
-mirostat: 2
-mirostat_eta: 1.0
-mirostat_tau: 1.0
-
-context_size: 4096
-f16: true
-gpu_layers: 90
-usage: |
-      curl http://localhost:8080/v1/completions -H "Content-Type: application/json" -d '{
-          "model": "codellama-7b-gguf",
-          "prompt": "import socket\n\ndef ping_exponential_backoff(host: str):"
-      }'
\ No newline at end of file
diff --git a/embedded/models/codellama-7b.yaml b/embedded/models/codellama-7b.yaml
deleted file mode 100644
index d9b5c62c..00000000
--- a/embedded/models/codellama-7b.yaml
+++ /dev/null
@@ -1,14 +0,0 @@
-name: codellama-7b
-backend: transformers
-type: AutoModelForCausalLM
-parameters:
-  model: codellama/CodeLlama-7b-hf
-  temperature: 0.2
-  top_k: 40
-  top_p: 0.95
-
-usage: |
-      curl http://localhost:8080/v1/completions -H "Content-Type: application/json" -d '{
-          "model": "codellama-7b",
-          "prompt": "import socket\n\ndef ping_exponential_backoff(host: str):"
-      }'
diff --git a/embedded/models/coqui.yaml b/embedded/models/coqui.yaml
deleted file mode 100644
index 5d67f241..00000000
--- a/embedded/models/coqui.yaml
+++ /dev/null
@@ -1,9 +0,0 @@
-usage: |
-    coqui works without any configuration, to test it, you can run the following curl command:
-
-    curl http://localhost:8080/tts -H "Content-Type: application/json" -d '{         
-        "backend": "coqui",
-        "model": "tts_models/en/ljspeech/glow-tts",
-        "input":"Hello, this is a test!"
-        }'
-# TODO: This is a placeholder until we manage to pre-load HF/Transformers models
\ No newline at end of file
diff --git a/embedded/models/dolphin-2.5-mixtral-8x7b.yaml b/embedded/models/dolphin-2.5-mixtral-8x7b.yaml
deleted file mode 100644
index 12ee1efc..00000000
--- a/embedded/models/dolphin-2.5-mixtral-8x7b.yaml
+++ /dev/null
@@ -1,31 +0,0 @@
-name: dolphin-mixtral-8x7b
-mmap: true
-parameters:
-  model: huggingface://TheBloke/dolphin-2.5-mixtral-8x7b-GGUF/dolphin-2.5-mixtral-8x7b.Q2_K.gguf
-  temperature: 0.5
-  top_k: 40
-  top_p: 0.95
-  seed: -1
-mirostat: 2
-mirostat_eta: 1.0
-mirostat_tau: 1.0
-template:
-  chat_message: |
-    <|im_start|>{{if eq .RoleName "assistant"}}assistant{{else if eq .RoleName "system"}}system{{else if eq .RoleName "user"}}user{{end}}
-    {{if .Content}}{{.Content}}{{end}}<|im_end|>
-  chat: |
-    {{.Input}}
-    <|im_start|>assistant
-  completion: |
-    {{.Input}}
-context_size: 4096
-f16: true
-stopwords:
-- <|im_end|>
-gpu_layers: 90
-
-usage: |
-      curl http://localhost:8080/v1/chat/completions -H "Content-Type: application/json" -d '{
-          "model": "dolphin-mixtral-8x7b",
-          "messages": [{"role": "user", "content": "How are you doing?", "temperature": 0.1}]
-      }'
\ No newline at end of file
diff --git a/embedded/models/hermes-2-pro-mistral.yaml b/embedded/models/hermes-2-pro-mistral.yaml
deleted file mode 100644
index 74d98eeb..00000000
--- a/embedded/models/hermes-2-pro-mistral.yaml
+++ /dev/null
@@ -1,59 +0,0 @@
-name: hermes-2-pro-mistral
-mmap: true
-parameters:
-  model: huggingface://NousResearch/Hermes-2-Pro-Mistral-7B-GGUF/Hermes-2-Pro-Mistral-7B.Q6_K.gguf
-
-template:
-  chat_message: |
-    <|im_start|>{{if eq .RoleName "assistant"}}assistant{{else if eq .RoleName "system"}}system{{else if eq .RoleName "tool"}}tool{{else if eq .RoleName "user"}}user{{end}}
-    {{- if .FunctionCall }}
-    <tool_call>
-    {{- else if eq .RoleName "tool" }}
-    <tool_response>
-    {{- end }}
-    {{- if .Content}}
-    {{.Content }}
-    {{- end }}
-    {{- if .FunctionCall}}
-    {{toJson .FunctionCall}}
-    {{- end }}
-    {{- if .FunctionCall }}
-    </tool_call>
-    {{- else if eq .RoleName "tool" }}
-    </tool_response>
-    {{- end }}<|im_end|>
-  # https://huggingface.co/NousResearch/Hermes-2-Pro-Mistral-7B-GGUF#prompt-format-for-function-calling
-  function: |
-    <|im_start|>system
-    You are a function calling AI model. You are provided with function signatures within <tools></tools> XML tags. You may call one or more functions to assist with the user query. Don't make assumptions about what values to plug into functions. Here are the available tools:
-    <tools>
-    {{range .Functions}}
-    {'type': 'function', 'function': {'name': '{{.Name}}', 'description': '{{.Description}}', 'parameters': {{toJson .Parameters}} }}
-    {{end}}
-    </tools>
-    Use the following pydantic model json schema for each tool call you will make:
-    {'title': 'FunctionCall', 'type': 'object', 'properties': {'arguments': {'title': 'Arguments', 'type': 'object'}, 'name': {'title': 'Name', 'type': 'string'}}, 'required': ['arguments', 'name']}
-    For each function call return a json object with function name and arguments within <tool_call></tool_call> XML tags as follows:
-    <tool_call>
-    {'arguments': <args-dict>, 'name': <function-name>}
-    </tool_call><|im_end|>
-    {{.Input -}}
-    <|im_start|>assistant
-    <tool_call>
-  chat: |
-    {{.Input -}}
-    <|im_start|>assistant
-  completion: |
-    {{.Input}}
-context_size: 4096
-f16: true
-stopwords:
-- <|im_end|>
-- <dummy32000>
-- "\n</tool_call>"
-- "\n\n\n"
-usage: |
-      curl http://localhost:8080/v1/chat/completions -H "Content-Type: application/json" -d '{
-          "model": "hermes-2-pro-mistral",
-          "messages": [{"role": "user", "content": "How are you doing?", "temperature": 0.1}]
-      }'
diff --git a/embedded/models/llama3-instruct.yaml b/embedded/models/llama3-instruct.yaml
deleted file mode 100644
index d483d2b2..00000000
--- a/embedded/models/llama3-instruct.yaml
+++ /dev/null
@@ -1,48 +0,0 @@
-name: llama3-8b-instruct
-mmap: true
-parameters:
-  model: huggingface://second-state/Llama-3-8B-Instruct-GGUF/Meta-Llama-3-8B-Instruct-Q5_K_M.gguf
-
-template:
-  chat_message: |
-    <|start_header_id|>{{if eq .RoleName "assistant"}}assistant{{else if eq .RoleName "system"}}system{{else if eq .RoleName "tool"}}tool{{else if eq .RoleName "user"}}user{{end}}<|end_header_id|>
-
-    {{ if .FunctionCall -}}
-    Function call:
-    {{ else if eq .RoleName "tool" -}}
-    Function response:
-    {{ end -}}
-    {{ if .Content -}}
-    {{.Content -}}
-    {{ else if .FunctionCall -}}
-    {{ toJson .FunctionCall -}}
-    {{ end -}}
-    <|eot_id|>
-  function: |
-    <|start_header_id|>system<|end_header_id|>
-
-    You are a function calling AI model. You are provided with function signatures within <tools></tools> XML tags. You may call one or more functions to assist with the user query. Don't make assumptions about what values to plug into functions. Here are the available tools:
-    <tools>
-    {{range .Functions}}
-    {'type': 'function', 'function': {'name': '{{.Name}}', 'description': '{{.Description}}', 'parameters': {{toJson .Parameters}} }}
-    {{end}}
-    </tools>
-    Use the following pydantic model json schema for each tool call you will make:
-    {'title': 'FunctionCall', 'type': 'object', 'properties': {'arguments': {'title': 'Arguments', 'type': 'object'}, 'name': {'title': 'Name', 'type': 'string'}}, 'required': ['arguments', 'name']}<|eot_id|><|start_header_id|>assistant<|end_header_id|>
-    Function call:
-  chat: |
-    <|begin_of_text|>{{.Input }}
-    <|start_header_id|>assistant<|end_header_id|>
-  completion: |
-    {{.Input}}
-context_size: 8192
-f16: true
-stopwords:
-- <|im_end|>
-- <dummy32000>
-- "<|eot_id|>"
-usage: |
-      curl http://localhost:8080/v1/chat/completions -H "Content-Type: application/json" -d '{
-          "model": "llama3-8b-instruct",
-          "messages": [{"role": "user", "content": "How are you doing?", "temperature": 0.1}]
-      }'
diff --git a/embedded/models/llava-1.5.yaml b/embedded/models/llava-1.5.yaml
deleted file mode 100644
index 3db48524..00000000
--- a/embedded/models/llava-1.5.yaml
+++ /dev/null
@@ -1,33 +0,0 @@
-backend: llama-cpp
-context_size: 4096
-f16: true
-
-gpu_layers: 90
-mmap: true
-name: llava-1.5
-
-roles:
-  user: "USER:"
-  assistant: "ASSISTANT:"
-  system: "SYSTEM:"
-
-mmproj: llava-v1.5-7b-mmproj-Q8_0.gguf
-parameters:
-  model: llava-v1.5-7b-Q4_K.gguf
-
-template:
-  chat: |
-    A chat between a curious human and an artificial intelligence assistant. The assistant gives helpful, detailed, and polite answers to the human's questions.
-    {{.Input}}
-    ASSISTANT:
-
-download_files:
-- filename: llava-v1.5-7b-Q4_K.gguf
-  uri: huggingface://jartine/llava-v1.5-7B-GGUF/llava-v1.5-7b-Q4_K.gguf
-- filename: llava-v1.5-7b-mmproj-Q8_0.gguf
-  uri: huggingface://jartine/llava-v1.5-7B-GGUF/llava-v1.5-7b-mmproj-Q8_0.gguf
-
-usage: |
-    curl http://localhost:8080/v1/chat/completions -H "Content-Type: application/json" -d '{
-        "model": "llava-1.5",
-        "messages": [{"role": "user", "content": [{"type":"text", "text": "What is in the image?"}, {"type": "image_url", "image_url": {"url": "https://upload.wikimedia.org/wikipedia/commons/thumb/d/dd/Gfp-wisconsin-madison-the-nature-boardwalk.jpg/2560px-Gfp-wisconsin-madison-the-nature-boardwalk.jpg" }}], "temperature": 0.9}]}'
diff --git a/embedded/models/llava-1.6-mistral.yaml b/embedded/models/llava-1.6-mistral.yaml
deleted file mode 100644
index 602ceb62..00000000
--- a/embedded/models/llava-1.6-mistral.yaml
+++ /dev/null
@@ -1,33 +0,0 @@
-backend: llama-cpp
-context_size: 4096
-f16: true
-
-gpu_layers: 90
-mmap: true
-name: llava-1.6-mistral
-
-roles:
-  user: "USER:"
-  assistant: "ASSISTANT:"
-  system: "SYSTEM:"
-
-mmproj: llava-v1.6-7b-mmproj-f16.gguf
-parameters:
-  model: llava-v1.6-mistral-7b.gguf
-
-template:
-  chat: |
-    A chat between a curious human and an artificial intelligence assistant. The assistant gives helpful, detailed, and polite answers to the human's questions.
-    {{.Input}}
-    ASSISTANT:
-
-download_files:
-- filename: llava-v1.6-mistral-7b.gguf
-  uri: huggingface://cjpais/llava-1.6-mistral-7b-gguf/llava-v1.6-mistral-7b.Q6_K.gguf
-- filename: llava-v1.6-7b-mmproj-f16.gguf
-  uri: huggingface://cjpais/llava-1.6-mistral-7b-gguf/mmproj-model-f16.gguf
-
-usage: |
-    curl http://localhost:8080/v1/chat/completions -H "Content-Type: application/json" -d '{
-        "model": "llava-1.6-mistral",
-        "messages": [{"role": "user", "content": [{"type":"text", "text": "What is in the image?"}, {"type": "image_url", "image_url": {"url": "https://upload.wikimedia.org/wikipedia/commons/thumb/d/dd/Gfp-wisconsin-madison-the-nature-boardwalk.jpg/2560px-Gfp-wisconsin-madison-the-nature-boardwalk.jpg" }}], "temperature": 0.9}]}'
diff --git a/embedded/models/llava-1.6-vicuna.yaml b/embedded/models/llava-1.6-vicuna.yaml
deleted file mode 100644
index cea33e7f..00000000
--- a/embedded/models/llava-1.6-vicuna.yaml
+++ /dev/null
@@ -1,37 +0,0 @@
-backend: llama-cpp
-context_size: 4096
-f16: true
-
-gpu_layers: 90
-mmap: true
-name: llava-1.6-vicuna
-
-roles:
-  user: "USER:"
-  assistant: "ASSISTANT:"
-  system: "SYSTEM:"
-
-mmproj: mmproj-vicuna7b-f16.gguf
-parameters:
-  model: vicuna-7b-q5_k.gguf
-  temperature: 0.2
-  top_k: 40
-  top_p: 0.95
-  seed: -1
-
-template:
-  chat: |
-    A chat between a curious human and an artificial intelligence assistant. The assistant gives helpful, detailed, and polite answers to the human's questions.
-    {{.Input}}
-    ASSISTANT:
-
-download_files:
-- filename: vicuna-7b-q5_k.gguf
-  uri: https://huggingface.co/cmp-nct/llava-1.6-gguf/resolve/main/vicuna-7b-q5_k.gguf
-- filename: mmproj-vicuna7b-f16.gguf
-  uri: https://huggingface.co/cmp-nct/llava-1.6-gguf/resolve/main/mmproj-vicuna7b-f16.gguf
-
-usage: |
-    curl http://localhost:8080/v1/chat/completions -H "Content-Type: application/json" -d '{
-        "model": "llava-1.6-vicuna",
-        "messages": [{"role": "user", "content": [{"type":"text", "text": "What is in the image?"}, {"type": "image_url", "image_url": {"url": "https://upload.wikimedia.org/wikipedia/commons/thumb/d/dd/Gfp-wisconsin-madison-the-nature-boardwalk.jpg/2560px-Gfp-wisconsin-madison-the-nature-boardwalk.jpg" }}], "temperature": 0.9}]}'
diff --git a/embedded/models/llava.yaml b/embedded/models/llava.yaml
deleted file mode 100644
index 3d240681..00000000
--- a/embedded/models/llava.yaml
+++ /dev/null
@@ -1,40 +0,0 @@
-backend: llama-cpp
-context_size: 4096
-f16: true
-
-gpu_layers: 90
-mmap: true
-name: llava
-
-roles:
-  user: "USER:"
-  assistant: "ASSISTANT:"
-  system: "SYSTEM:"
-
-mmproj: bakllava-mmproj.gguf
-parameters:
-  model: bakllava.gguf
-  temperature: 0.2
-  top_k: 40
-  top_p: 0.95
-  seed: -1
-mirostat: 2
-mirostat_eta: 1.0
-mirostat_tau: 1.0
-
-template:
-  chat: |
-    A chat between a curious human and an artificial intelligence assistant. The assistant gives helpful, detailed, and polite answers to the human's questions.
-    {{.Input}}
-    ASSISTANT:
-
-download_files:
-- filename: bakllava.gguf
-  uri: huggingface://mys/ggml_bakllava-1/ggml-model-q4_k.gguf
-- filename: bakllava-mmproj.gguf
-  uri: huggingface://mys/ggml_bakllava-1/mmproj-model-f16.gguf
-
-usage: |
-    curl http://localhost:8080/v1/chat/completions -H "Content-Type: application/json" -d '{
-        "model": "llava",
-        "messages": [{"role": "user", "content": [{"type":"text", "text": "What is in the image?"}, {"type": "image_url", "image_url": {"url": "https://upload.wikimedia.org/wikipedia/commons/thumb/d/dd/Gfp-wisconsin-madison-the-nature-boardwalk.jpg/2560px-Gfp-wisconsin-madison-the-nature-boardwalk.jpg" }}], "temperature": 0.9}]}'
diff --git a/embedded/models/mamba-bagel.yaml b/embedded/models/mamba-bagel.yaml
deleted file mode 100644
index aed7142f..00000000
--- a/embedded/models/mamba-bagel.yaml
+++ /dev/null
@@ -1,21 +0,0 @@
-name: bagel
-backend: mamba
-parameters:
-  model: "jondurbin/bagel-dpo-2.8b-v0.2"
-systemPrompt: "You are a helpful, unbiased, uncensored assistant."
-template:
-  chat_message: |
-    {{if eq .RoleName "assistant"}}{{.Content}}{{else}}
-    [INST]
-    {{if .SystemPrompt}}{{.SystemPrompt}}{{else if eq .RoleName "system"}}<<SYS>>{{.Content}}<</SYS>>
-
-    {{else if .Content}}{{.Content}}{{end}}
-    [/INST]
-    {{end}}
-  completion: |
-    {{.Input}}
-usage: |
-    curl http://localhost:8080/v1/chat/completions -H "Content-Type: application/json" -d '{
-      "model": "bagel",
-      "messages": [{"role": "user", "content": "how are you doing"}],
-    }'
diff --git a/embedded/models/mamba-chat.yaml b/embedded/models/mamba-chat.yaml
deleted file mode 100644
index b0d7fc62..00000000
--- a/embedded/models/mamba-chat.yaml
+++ /dev/null
@@ -1,28 +0,0 @@
-name: mamba-chat
-backend: mamba
-parameters:
-  model: "havenhq/mamba-chat"
-
-trimsuffix: 
-- <|endoftext|>
-
-# https://huggingface.co/HuggingFaceH4/zephyr-7b-beta/blob/main/tokenizer_config.json
-#   "chat_template": "{% for message in messages %}\n{% if message['role'] == 'user' %}\n{{ '<|user|>\n' + message['content'] + eos_token }}\n{% elif message['role'] == 'system' %}\n{{ '<|system|>\n' + message['content'] + eos_token }}\n{% elif message['role'] == 'assistant' %}\n{{ '<|assistant|>\n'  + message['content'] + eos_token }}\n{% endif %}\n{% if loop.last and add_generation_prompt %}\n{{ '<|assistant|>' }}\n{% endif %}\n{% endfor %}",
-template:
-  chat_message: |
-    {{if eq .RoleName "assistant"}}<|assistant|>{{else if eq .RoleName "system"}}<|system|>{{else if eq .RoleName "user"}}<|user|>{{end}}
-    {{if .Content}}{{.Content}}{{end}}
-    </s>
-    
-  chat: |
-    {{.Input}}
-    <|assistant|>
-    
-  completion: |
-    {{.Input}}
-usage: |
-    curl http://localhost:8080/v1/chat/completions -H "Content-Type: application/json" -d '{
-      "model": "mamba-chat",
-      "messages": [{"role": "user", "content": "how are you doing"}],
-      "temperature": 0.7
-    }'
\ No newline at end of file
diff --git a/embedded/models/mistral-openorca.yaml b/embedded/models/mistral-openorca.yaml
deleted file mode 100644
index 0794a69b..00000000
--- a/embedded/models/mistral-openorca.yaml
+++ /dev/null
@@ -1,32 +0,0 @@
-name: mistral-openorca
-mmap: true
-parameters:
-  model: huggingface://TheBloke/Mistral-7B-OpenOrca-GGUF/mistral-7b-openorca.Q6_K.gguf
-  temperature: 0.2
-  top_k: 40
-  top_p: 0.95
-  seed: -1
-mirostat: 2
-mirostat_eta: 1.0
-mirostat_tau: 1.0
-
-template:
-  chat_message: |
-    <|im_start|>{{if eq .RoleName "assistant"}}assistant{{else if eq .RoleName "system"}}system{{else if eq .RoleName "user"}}user{{end}}
-    {{if .Content}}{{.Content}}{{end}}
-    <|im_end|>
-  chat: |
-    {{.Input}}
-    <|im_start|>assistant
-  completion: |
-    {{.Input}}
-context_size: 4096
-f16: true
-stopwords:
-- <|im_end|>
-- <dummy32000>
-usage: |
-      curl http://localhost:8080/v1/chat/completions -H "Content-Type: application/json" -d '{
-          "model": "mistral-openorca",
-          "messages": [{"role": "user", "content": "How are you doing?", "temperature": 0.1}]
-      }'
diff --git a/embedded/models/mixtral-instruct.yaml b/embedded/models/mixtral-instruct.yaml
deleted file mode 100644
index 246b2324..00000000
--- a/embedded/models/mixtral-instruct.yaml
+++ /dev/null
@@ -1,25 +0,0 @@
-name: mixtral-instruct
-mmap: true
-parameters:
-  model: huggingface://TheBloke/Mixtral-8x7B-Instruct-v0.1-GGUF/mixtral-8x7b-instruct-v0.1.Q2_K.gguf
-  temperature: 0.2
-  top_k: 40
-  seed: -1
-  top_p: 0.95
-mirostat: 2
-mirostat_eta: 1.0
-mirostat_tau: 1.0
-
-template:
-  chat: &chat |
-    [INST] {{.Input}} [/INST]    
-  completion: *chat
-context_size: 4096
-f16: true
-gpu_layers: 90
-
-usage: |
-      curl http://localhost:8080/v1/completions -H "Content-Type: application/json" -d '{
-          "model": "mixtral-instruct",
-          "prompt": "How are you doing?"
-      }'
\ No newline at end of file
diff --git a/embedded/models/phi-2-chat.yaml b/embedded/models/phi-2-chat.yaml
deleted file mode 100644
index 4a3ca7aa..00000000
--- a/embedded/models/phi-2-chat.yaml
+++ /dev/null
@@ -1,25 +0,0 @@
-name: phi-2-chat
-mmap: true
-parameters:
-  model: huggingface://l3utterfly/phi-2-layla-v1-chatml-gguf/phi-2-layla-v1-chatml-Q8_0.gguf
-
-template:
-  chat_message: |
-    <|im_start|>{{if eq .RoleName "assistant"}}assistant{{else if eq .RoleName "system"}}system{{else if eq .RoleName "user"}}user{{end}}
-    {{if .Content}}{{.Content}}{{end}}
-    <|im_end|>
-  chat: |
-    {{.Input}}
-    <|im_start|>assistant
-  completion: |
-    {{.Input}}
-context_size: 4096
-f16: true
-stopwords:
-- <|im_end|>
-- <dummy32000>
-usage: |
-      curl http://localhost:8080/v1/chat/completions -H "Content-Type: application/json" -d '{
-          "model": "phi-2-chat",
-          "messages": [{"role": "user", "content": "How are you doing?", "temperature": 0.1}]
-      }'
diff --git a/embedded/models/phi-2-orange.yaml b/embedded/models/phi-2-orange.yaml
deleted file mode 100644
index 838909c9..00000000
--- a/embedded/models/phi-2-orange.yaml
+++ /dev/null
@@ -1,30 +0,0 @@
-name: phi-2-orange
-mmap: true
-parameters:
-  model: huggingface://l3utterfly/phi-2-orange-GGUF/phi-2-orange.Q6_K.gguf
-
-template:
-  chat_message: |
-    <|im_start|>{{if eq .RoleName "assistant"}}assistant{{else if eq .RoleName "system"}}system{{else if eq .RoleName "user"}}user{{end}}
-    {{if .Content}}{{.Content}}{{end}}
-    <|im_end|>
-  chat: |
-    {{.Input}}
-    <|im_start|>assistant
-  completion: |
-    {{.Input}}
-context_size: 4096
-f16: true
-stopwords:
-- <|im_end|>
-- <dummy32000>
-
-description: |
-  This model is a chatbot that can be used for general conversation.
-  [Model card](https://huggingface.co/TheBloke/phi-2-orange-GGUF)
-
-usage: |
-      curl http://localhost:8080/v1/chat/completions -H "Content-Type: application/json" -d '{
-          "model": "phi-2-orange",
-          "messages": [{"role": "user", "content": "How are you doing?", "temperature": 0.1}]
-      }'
diff --git a/embedded/models/rhasspy-voice-en-us-amy.yaml b/embedded/models/rhasspy-voice-en-us-amy.yaml
deleted file mode 100644
index 911293ca..00000000
--- a/embedded/models/rhasspy-voice-en-us-amy.yaml
+++ /dev/null
@@ -1,13 +0,0 @@
-name: voice-en-us-amy-low
-download_files:
-  - filename: voice-en-us-amy-low.tar.gz
-    uri: https://github.com/rhasspy/piper/releases/download/v0.0.2/voice-en-us-amy-low.tar.gz
-
-
-usage: |
-    To test if this model works as expected, you can use the following curl command:
-
-    curl http://localhost:8080/tts -H "Content-Type: application/json" -d '{
-      "model":"en-us-amy-low.onnx",
-      "input": "Hi, this is a test."
-    }'
\ No newline at end of file
diff --git a/embedded/models/tinyllama-chat.yaml b/embedded/models/tinyllama-chat.yaml
deleted file mode 100644
index 48c44f9f..00000000
--- a/embedded/models/tinyllama-chat.yaml
+++ /dev/null
@@ -1,29 +0,0 @@
-name: tinyllama-chat
-mmap: true
-parameters:
-  model: huggingface://TheBloke/TinyLlama-1.1B-Chat-v0.3-GGUF/tinyllama-1.1b-chat-v0.3.Q8_0.gguf
-  temperature: 0.2
-  top_k: 40
-  seed: -1
-  top_p: 0.95
-template:
-  chat_message: |
-    <|im_start|>{{if eq .RoleName "assistant"}}assistant{{else if eq .RoleName "system"}}system{{else if eq .RoleName "user"}}user{{end}}
-    {{if .Content}}{{.Content}}{{end}}<|im_end|>
-  chat: |
-    {{.Input}}
-    <|im_start|>assistant
-    
-  completion: |
-    {{.Input}}
-context_size: 4096
-f16: true
-stopwords:
-- <|im_end|>
-gpu_layers: 90
-
-usage: |
-      curl http://localhost:8080/v1/chat/completions -H "Content-Type: application/json" -d '{
-          "model": "tinyllama-chat",
-          "messages": [{"role": "user", "content": "How are you doing?", "temperature": 0.1}]
-      }'
\ No newline at end of file
diff --git a/embedded/models/transformers-tinyllama.yaml b/embedded/models/transformers-tinyllama.yaml
deleted file mode 100644
index ee6e7889..00000000
--- a/embedded/models/transformers-tinyllama.yaml
+++ /dev/null
@@ -1,31 +0,0 @@
-name: tinyllama-chat
-backend: transformers
-type: AutoModelForCausalLM
-
-parameters:
-  model: TinyLlama/TinyLlama-1.1B-Chat-v1.0
-  temperature: 0.2
-  top_k: 40
-  top_p: 0.95
-  max_tokens: 4096
-
-template:
-  chat_message: |
-    <|im_start|>{{if eq .RoleName "assistant"}}assistant{{else if eq .RoleName "system"}}system{{else if eq .RoleName "user"}}user{{end}}
-    {{if .Content}}{{.Content}}{{end}}<|im_end|>
-  chat: |
-    {{.Input}}
-    <|im_start|>assistant
-    
-  completion: |
-    {{.Input}}
-
-stopwords:
-- <|im_end|>
-
-usage: |
-      curl http://localhost:8080/v1/chat/completions -H "Content-Type: application/json" -d '{
-        "model": "tinyllama-chat",
-        "messages": [{"role": "user", "content": "Say this is a test!"}],
-        "temperature": 0.7
-      }'
diff --git a/embedded/models/vall-e-x.yaml b/embedded/models/vall-e-x.yaml
deleted file mode 100644
index b97015f6..00000000
--- a/embedded/models/vall-e-x.yaml
+++ /dev/null
@@ -1,8 +0,0 @@
-usage: |
-    Vall-e-x works without any configuration, to test it, you can run the following curl command:
-
-    curl http://localhost:8080/tts -H "Content-Type: application/json" -d '{         
-     "backend": "vall-e-x",
-     "input":"Hello, this is a test!"
-    }' | aplay
-# TODO: This is a placeholder until we manage to pre-load HF/Transformers models
\ No newline at end of file
diff --git a/embedded/models/whisper-base.yaml b/embedded/models/whisper-base.yaml
deleted file mode 100644
index f7ebd217..00000000
--- a/embedded/models/whisper-base.yaml
+++ /dev/null
@@ -1,18 +0,0 @@
-name: whisper
-backend: whisper
-parameters:
-  model: ggml-whisper-base.bin
-
-usage: |
-    ## example audio file
-    wget --quiet --show-progress -O gb1.ogg https://upload.wikimedia.org/wikipedia/commons/1/1f/George_W_Bush_Columbia_FINAL.ogg
-
-    ## Send the example audio file to the transcriptions endpoint
-    curl http://localhost:8080/v1/audio/transcriptions \
-         -H "Content-Type: multipart/form-data" \
-         -F file="@$PWD/gb1.ogg" -F model="whisper"
-
-download_files:
-- filename: "ggml-whisper-base.bin"
-  sha256: "60ed5bc3dd14eea856493d334349b405782ddcaf0028d4b5df4088345fba2efe"
-  uri: "https://huggingface.co/ggerganov/whisper.cpp/resolve/main/ggml-base.bin"
\ No newline at end of file
diff --git a/pkg/startup/model_preload.go b/pkg/startup/model_preload.go
index a445b10e..0f598df5 100644
--- a/pkg/startup/model_preload.go
+++ b/pkg/startup/model_preload.go
@@ -9,7 +9,6 @@ import (
 
 	"github.com/mudler/LocalAI/core/config"
 	"github.com/mudler/LocalAI/core/gallery"
-	"github.com/mudler/LocalAI/embedded"
 	"github.com/mudler/LocalAI/pkg/downloader"
 	"github.com/mudler/LocalAI/pkg/utils"
 	"github.com/rs/zerolog/log"
@@ -18,42 +17,17 @@ import (
 // InstallModels will preload models from the given list of URLs and galleries
 // It will download the model if it is not already present in the model path
 // It will also try to resolve if the model is an embedded model YAML configuration
-func InstallModels(galleries []config.Gallery, modelLibraryURL string, modelPath string, enforceScan bool, downloadStatus func(string, string, string, float64), models ...string) error {
+func InstallModels(galleries []config.Gallery, modelPath string, enforceScan bool, downloadStatus func(string, string, string, float64), models ...string) error {
 	// create an error that groups all errors
 	var err error
 
-	lib, _ := embedded.GetRemoteLibraryShorteners(modelLibraryURL, modelPath)
-
 	for _, url := range models {
 		// As a best effort, try to resolve the model from the remote library
 		// if it's not resolved we try with the other method below
-		if modelLibraryURL != "" {
-			if lib[url] != "" {
-				log.Debug().Msgf("[startup] model configuration is defined remotely: %s (%s)", url, lib[url])
-				url = lib[url]
-			}
-		}
 
-		url = embedded.ModelShortURL(url)
 		uri := downloader.URI(url)
 
 		switch {
-		case embedded.ExistsInModelsLibrary(url):
-			modelYAML, e := embedded.ResolveContent(url)
-			// If we resolve something, just save it to disk and continue
-			if e != nil {
-				log.Error().Err(e).Msg("error resolving model content")
-				err = errors.Join(err, e)
-				continue
-			}
-
-			log.Debug().Msgf("[startup] resolved embedded model: %s", url)
-			md5Name := utils.MD5(url)
-			modelDefinitionFilePath := filepath.Join(modelPath, md5Name) + ".yaml"
-			if e := os.WriteFile(modelDefinitionFilePath, modelYAML, 0600); err != nil {
-				log.Error().Err(e).Str("filepath", modelDefinitionFilePath).Msg("error writing model definition")
-				err = errors.Join(err, e)
-			}
 		case uri.LooksLikeOCI():
 			log.Debug().Msgf("[startup] resolved OCI model to download: %s", url)
 
diff --git a/pkg/startup/model_preload_test.go b/pkg/startup/model_preload_test.go
index 78cf7311..51e6d702 100644
--- a/pkg/startup/model_preload_test.go
+++ b/pkg/startup/model_preload_test.go
@@ -7,7 +7,6 @@ import (
 
 	"github.com/mudler/LocalAI/core/config"
 	. "github.com/mudler/LocalAI/pkg/startup"
-	"github.com/mudler/LocalAI/pkg/utils"
 
 	. "github.com/onsi/ginkgo/v2"
 	. "github.com/onsi/gomega"
@@ -16,29 +15,13 @@ import (
 var _ = Describe("Preload test", func() {
 
 	Context("Preloading from strings", func() {
-		It("loads from remote url", func() {
-			tmpdir, err := os.MkdirTemp("", "")
-			Expect(err).ToNot(HaveOccurred())
-			libraryURL := "https://raw.githubusercontent.com/mudler/LocalAI/master/embedded/model_library.yaml"
-			fileName := fmt.Sprintf("%s.yaml", "phi-2")
-
-			InstallModels([]config.Gallery{}, libraryURL, tmpdir, true, nil, "phi-2")
-
-			resultFile := filepath.Join(tmpdir, fileName)
-
-			content, err := os.ReadFile(resultFile)
-			Expect(err).ToNot(HaveOccurred())
-
-			Expect(string(content)).To(ContainSubstring("name: phi-2"))
-		})
-
 		It("loads from embedded full-urls", func() {
 			tmpdir, err := os.MkdirTemp("", "")
 			Expect(err).ToNot(HaveOccurred())
 			url := "https://raw.githubusercontent.com/mudler/LocalAI-examples/main/configurations/phi-2.yaml"
 			fileName := fmt.Sprintf("%s.yaml", "phi-2")
 
-			InstallModels([]config.Gallery{}, "", tmpdir, true, nil, url)
+			InstallModels([]config.Gallery{}, tmpdir, true, nil, url)
 
 			resultFile := filepath.Join(tmpdir, fileName)
 
@@ -47,45 +30,13 @@ var _ = Describe("Preload test", func() {
 
 			Expect(string(content)).To(ContainSubstring("name: phi-2"))
 		})
-		It("loads from embedded short-urls", func() {
-			tmpdir, err := os.MkdirTemp("", "")
-			Expect(err).ToNot(HaveOccurred())
-			url := "phi-2"
-
-			InstallModels([]config.Gallery{}, "", tmpdir, true, nil, url)
-
-			entry, err := os.ReadDir(tmpdir)
-			Expect(err).ToNot(HaveOccurred())
-			Expect(entry).To(HaveLen(1))
-			resultFile := entry[0].Name()
-
-			content, err := os.ReadFile(filepath.Join(tmpdir, resultFile))
-			Expect(err).ToNot(HaveOccurred())
-
-			Expect(string(content)).To(ContainSubstring("name: phi-2"))
-		})
-		It("loads from embedded models", func() {
-			tmpdir, err := os.MkdirTemp("", "")
-			Expect(err).ToNot(HaveOccurred())
-			url := "mistral-openorca"
-			fileName := fmt.Sprintf("%s.yaml", utils.MD5(url))
-
-			InstallModels([]config.Gallery{}, "", tmpdir, true, nil, url)
-
-			resultFile := filepath.Join(tmpdir, fileName)
-
-			content, err := os.ReadFile(resultFile)
-			Expect(err).ToNot(HaveOccurred())
-
-			Expect(string(content)).To(ContainSubstring("name: mistral-openorca"))
-		})
 		It("downloads from urls", func() {
 			tmpdir, err := os.MkdirTemp("", "")
 			Expect(err).ToNot(HaveOccurred())
 			url := "huggingface://TheBloke/TinyLlama-1.1B-Chat-v0.3-GGUF/tinyllama-1.1b-chat-v0.3.Q2_K.gguf"
 			fileName := fmt.Sprintf("%s.gguf", "tinyllama-1.1b-chat-v0.3.Q2_K")
 
-			err = InstallModels([]config.Gallery{}, "", tmpdir, false, nil, url)
+			err = InstallModels([]config.Gallery{}, tmpdir, false, nil, url)
 			Expect(err).ToNot(HaveOccurred())
 
 			resultFile := filepath.Join(tmpdir, fileName)
diff --git a/embedded/webui_static.yaml b/webui_static.yaml
similarity index 100%
rename from embedded/webui_static.yaml
rename to webui_static.yaml

From f1d6d65417e1bccd4d93990ccfd36cc1a0602605 Mon Sep 17 00:00:00 2001
From: Ettore Di Giacinto <mudler@users.noreply.github.com>
Date: Thu, 30 Jan 2025 16:38:35 +0100
Subject: [PATCH 34/85] chore(model gallery): add virtuoso-lite (#4718)

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
---
 gallery/index.yaml | 14 ++++++++++++++
 1 file changed, 14 insertions(+)

diff --git a/gallery/index.yaml b/gallery/index.yaml
index 1716f2b1..990059c9 100644
--- a/gallery/index.yaml
+++ b/gallery/index.yaml
@@ -198,6 +198,20 @@
     - filename: NightWing3-10B-v0.1-Q4_K_M.gguf
       sha256: 2e87671542d22fe1ef9a68e43f2fdab7c2759479ad531946d9f0bdeffa6f5747
       uri: huggingface://bartowski/NightWing3-10B-v0.1-GGUF/NightWing3-10B-v0.1-Q4_K_M.gguf
+- !!merge <<: *falcon3
+  name: "virtuoso-lite"
+  urls:
+    - https://huggingface.co/arcee-ai/Virtuoso-Lite
+    - https://huggingface.co/bartowski/Virtuoso-Lite-GGUF
+  description: |
+    Virtuoso-Lite (10B) is our next-generation, 10-billion-parameter language model based on the Llama-3 architecture. It is distilled from Deepseek-v3 using ~1.1B tokens/logits, allowing it to achieve robust performance at a significantly reduced parameter count compared to larger models. Despite its compact size, Virtuoso-Lite excels in a variety of tasks, demonstrating advanced reasoning, code generation, and mathematical problem-solving capabilities.
+  overrides:
+    parameters:
+      model: Virtuoso-Lite-Q4_K_M.gguf
+  files:
+    - filename: Virtuoso-Lite-Q4_K_M.gguf
+      sha256: 1d21bef8467a11a1e473d397128b05fb87b7e824606cdaea061e550cb219fee2
+      uri: huggingface://bartowski/Virtuoso-Lite-GGUF/Virtuoso-Lite-Q4_K_M.gguf
 - &intellect1
   name: "intellect-1-instruct"
   url: "github:mudler/LocalAI/gallery/llama3.1-instruct.yaml@master"

From 244f4b564f71e4dca0be997e2002cfab5ffd38a9 Mon Sep 17 00:00:00 2001
From: Ettore Di Giacinto <mudler@users.noreply.github.com>
Date: Thu, 30 Jan 2025 16:42:48 +0100
Subject: [PATCH 35/85] chore(model gallery): add selene-1-mini-llama-3.1-8b
 (#4719)

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
---
 gallery/index.yaml | 23 +++++++++++++++++++++++
 1 file changed, 23 insertions(+)

diff --git a/gallery/index.yaml b/gallery/index.yaml
index 990059c9..7a7b0418 100644
--- a/gallery/index.yaml
+++ b/gallery/index.yaml
@@ -5359,6 +5359,29 @@
     - filename: deepseek-r1-distill-llama-8b-Q4_K_M.gguf
       sha256: f8eba201522ab44b79bc54166126bfaf836111ff4cbf2d13c59c3b57da10573b
       uri: huggingface://unsloth/DeepSeek-R1-Distill-Llama-8B-GGUF/DeepSeek-R1-Distill-Llama-8B-Q4_K_M.gguf
+- !!merge <<: *llama31
+  name: "selene-1-mini-llama-3.1-8b"
+  icon: https://atla-ai.notion.site/image/https%3A%2F%2Fprod-files-secure.s3.us-west-2.amazonaws.com%2Ff08e6e70-73af-4363-9621-90e906b92ebc%2F1bfb4316-1ce6-40a0-800c-253739cfcdeb%2Fatla_white3x.svg?table=block&id=17c309d1-7745-80f9-8f60-e755409acd8d&spaceId=f08e6e70-73af-4363-9621-90e906b92ebc&userId=&cache=v2
+  urls:
+    - https://huggingface.co/AtlaAI/Selene-1-Mini-Llama-3.1-8B
+    - https://huggingface.co/bartowski/Selene-1-Mini-Llama-3.1-8B-GGUF
+  description: |
+    Atla Selene Mini is a state-of-the-art small language model-as-a-judge (SLMJ). Selene Mini achieves comparable performance to models 10x its size, outperforming GPT-4o on RewardBench, EvalBiasBench, and AutoJ.
+
+    Post-trained from Llama-3.1-8B across a wide range of evaluation tasks and scoring criteria, Selene Mini outperforms prior small models overall across 11 benchmarks covering three different types of tasks:
+
+        Absolute scoring, e.g. "Evaluate the harmlessness of this response on a scale of 1-5"
+        Classification, e.g. "Does this response address the user query? Answer Yes or No."
+        Pairwise preference. e.g. "Which of the following responses is more logically consistent - A or B?"
+
+    It is also the #1 8B generative model on RewardBench.
+  overrides:
+    parameters:
+      model: Selene-1-Mini-Llama-3.1-8B-Q4_K_M.gguf
+  files:
+    - filename: Selene-1-Mini-Llama-3.1-8B-Q4_K_M.gguf
+      sha256: 908e6ce19f7cd3d7394bd7c38e43de2f228aca6aceda35c7ee70d069ad60493e
+      uri: huggingface://bartowski/Selene-1-Mini-Llama-3.1-8B-GGUF/Selene-1-Mini-Llama-3.1-8B-Q4_K_M.gguf
 - &deepseek  ## Deepseek
   url: "github:mudler/LocalAI/gallery/deepseek.yaml@master"
   name: "deepseek-coder-v2-lite-instruct"

From 60ec2cf7513f2d40b9c1836cdf2e06b14a38fd1a Mon Sep 17 00:00:00 2001
From: Ettore Di Giacinto <mudler@users.noreply.github.com>
Date: Thu, 30 Jan 2025 16:44:44 +0100
Subject: [PATCH 36/85] chore(model gallery): add openthinker-7b (#4720)

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
---
 gallery/index.yaml | 19 +++++++++++++++++++
 1 file changed, 19 insertions(+)

diff --git a/gallery/index.yaml b/gallery/index.yaml
index 7a7b0418..6b391356 100644
--- a/gallery/index.yaml
+++ b/gallery/index.yaml
@@ -3455,6 +3455,25 @@
     - filename: Confucius-o1-14B-Q4_K_M.gguf
       sha256: 03182920edd8667db7d2a362ca2d25e88f4b615b383b5a55c764f4715fb22dd9
       uri: huggingface://bartowski/Confucius-o1-14B-GGUF/Confucius-o1-14B-Q4_K_M.gguf
+- !!merge <<: *qwen25
+  name: "openthinker-7b"
+  icon: https://huggingface.co/datasets/open-thoughts/open-thoughts-114k/resolve/main/open_thoughts.png
+  urls:
+    - https://huggingface.co/open-thoughts/OpenThinker-7B
+    - https://huggingface.co/bartowski/OpenThinker-7B-GGUF
+  description: |
+    This model is a fine-tuned version of Qwen/Qwen2.5-7B-Instruct on the OpenThoughts-114k dataset dataset.
+
+    The dataset is derived by distilling DeepSeek-R1 using the data pipeline available on github. More info about the dataset can be found on the dataset card at OpenThoughts-114k dataset.
+
+    This model improves upon the Bespoke-Stratos-7B model, which used 17k examples (Bespoke-Stratos-17k dataset). The numbers reported in the table below are evaluated with our open-source tool Evalchemy.
+  overrides:
+    parameters:
+      model: OpenThinker-7B-Q4_K_M.gguf
+  files:
+    - filename: OpenThinker-7B-Q4_K_M.gguf
+      sha256: 94dff1a7acd685db5cff7afdb837aab8172e06d65fe6179ba47428e3030acd93
+      uri: huggingface://bartowski/OpenThinker-7B-GGUF/OpenThinker-7B-Q4_K_M.gguf
 - &llama31  ## LLama3.1
   url: "github:mudler/LocalAI/gallery/llama3.1-instruct.yaml@master"
   icon: https://avatars.githubusercontent.com/u/153379578

From cd5489ce47452c523be196c291e6f0a5b5922424 Mon Sep 17 00:00:00 2001
From: "LocalAI [bot]" <139863280+localai-bot@users.noreply.github.com>
Date: Fri, 31 Jan 2025 08:51:32 +0100
Subject: [PATCH 37/85] chore(model-gallery): :arrow_up: update checksum
 (#4723)

:arrow_up: Checksum updates in gallery/index.yaml

Signed-off-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com>
Co-authored-by: mudler <2420543+mudler@users.noreply.github.com>
---
 gallery/index.yaml | 150 +++++++++++++++++++++------------------------
 1 file changed, 69 insertions(+), 81 deletions(-)

diff --git a/gallery/index.yaml b/gallery/index.yaml
index 6b391356..98c3a782 100644
--- a/gallery/index.yaml
+++ b/gallery/index.yaml
@@ -852,8 +852,8 @@
     - filename: salamandra-7b-instruct.Q4_K_M-f32.gguf
       sha256: bac8e8c1d1d9d53cbdb148b8ff9ad378ddb392429207099e85b5aae3a43bff3d
       uri: huggingface://cstr/salamandra-7b-instruct-GGUF/salamandra-7b-instruct.Q4_K_M-f32.gguf
-- &llama32  ## llama3.2
-  url: "github:mudler/LocalAI/gallery/llama3.2-quantized.yaml@master"
+- &llama32
+  url: "github:mudler/LocalAI/gallery/llama3.2-quantized.yaml@master" ## llama3.2
   icon: https://avatars.githubusercontent.com/u/153379578
   license: llama3.2
   description: |
@@ -1375,11 +1375,7 @@
   urls:
     - https://huggingface.co/HuggingFaceTB/FineMath-Llama-3B
     - https://huggingface.co/bartowski/FineMath-Llama-3B-GGUF
-  description: |
-    This is a continual-pre-training of Llama-3.2-3B on a mix of 📐 FineMath (our new high quality math dataset) and FineWeb-Edu.
-
-    The model demonstrates superior math performance compared to Llama 3.2 3B, while maintaining similar performance on knowledge, reasoning, and common sense benchmarks.
-    It was trained on 160B tokens using a mix of 40% FineWeb-Edu and 60% from FineMath (30% FineMath-4+ subset and 30% InfiWebMath-4+ subset). We use nanotron for the training, and you can find the training scripts in our SmolLM2 GitHub repo.
+  description: "This is a continual-pre-training of Llama-3.2-3B on a mix of \U0001F4D0 FineMath (our new high quality math dataset) and FineWeb-Edu.\n\nThe model demonstrates superior math performance compared to Llama 3.2 3B, while maintaining similar performance on knowledge, reasoning, and common sense benchmarks.\nIt was trained on 160B tokens using a mix of 40% FineWeb-Edu and 60% from FineMath (30% FineMath-4+ subset and 30% InfiWebMath-4+ subset). We use nanotron for the training, and you can find the training scripts in our SmolLM2 GitHub repo.\n"
   overrides:
     parameters:
       model: FineMath-Llama-3B-Q4_K_M.gguf
@@ -1387,8 +1383,8 @@
     - filename: FineMath-Llama-3B-Q4_K_M.gguf
       sha256: 16c73b5cf2a417a7e1608bcc9469f1461fc3e759ce04a3a337f48df977dc158c
       uri: huggingface://bartowski/FineMath-Llama-3B-GGUF/FineMath-Llama-3B-Q4_K_M.gguf
-- &qwen25  ## Qwen2.5
-  name: "qwen2.5-14b-instruct"
+- &qwen25
+  name: "qwen2.5-14b-instruct" ## Qwen2.5
   icon: https://avatars.githubusercontent.com/u/141221163
   url: "github:mudler/LocalAI/gallery/chatml.yaml@master"
   license: apache-2.0
@@ -3291,15 +3287,7 @@
   urls:
     - https://huggingface.co/Krystalan/DRT-o1-14B
     - https://huggingface.co/bartowski/DRT-o1-14B-GGUF
-  description: |
-    This repository contains the resources for our paper "DRT-o1: Optimized Deep Reasoning Translation via Long Chain-of-Thought"
-    In this work, we introduce DRT-o1, an attempt to bring the success of long thought reasoning to neural machine translation (MT). To this end,
-
-    🌟 We mine English sentences with similes or metaphors from existing literature books, which are suitable for translation via long thought.
-    🌟 We propose a designed multi-agent framework with three agents (i.e., a translator, an advisor and an evaluator) to synthesize the MT samples with long thought. There are 22,264 synthesized samples in total.
-    🌟 We train DRT-o1-8B, DRT-o1-7B and DRT-o1-14B using Llama-3.1-8B-Instruct, Qwen2.5-7B-Instruct and Qwen2.5-14B-Instruct as backbones.
-
-    Our goal is not to achieve competitive performance with OpenAI’s O1 in neural machine translation (MT). Instead, we explore technical routes to bring the success of long thought to MT. To this end, we introduce DRT-o1, a byproduct of our exploration, and we hope it could facilitate the corresponding research in this direction.
+  description: "This repository contains the resources for our paper \"DRT-o1: Optimized Deep Reasoning Translation via Long Chain-of-Thought\"\nIn this work, we introduce DRT-o1, an attempt to bring the success of long thought reasoning to neural machine translation (MT). To this end,\n\n\U0001F31F We mine English sentences with similes or metaphors from existing literature books, which are suitable for translation via long thought.\n\U0001F31F We propose a designed multi-agent framework with three agents (i.e., a translator, an advisor and an evaluator) to synthesize the MT samples with long thought. There are 22,264 synthesized samples in total.\n\U0001F31F We train DRT-o1-8B, DRT-o1-7B and DRT-o1-14B using Llama-3.1-8B-Instruct, Qwen2.5-7B-Instruct and Qwen2.5-14B-Instruct as backbones.\n\nOur goal is not to achieve competitive performance with OpenAI’s O1 in neural machine translation (MT). Instead, we explore technical routes to bring the success of long thought to MT. To this end, we introduce DRT-o1, a byproduct of our exploration, and we hope it could facilitate the corresponding research in this direction.\n"
   overrides:
     parameters:
       model: DRT-o1-14B-Q4_K_M.gguf
@@ -3356,8 +3344,8 @@
     - filename: Chuluun-Qwen2.5-72B-v0.08-Q4_K_M.gguf
       sha256: 0fec82625f74a9a340837de7af287b1d9042e5aeb70cda2621426db99958b0af
       uri: huggingface://bartowski/Chuluun-Qwen2.5-72B-v0.08-GGUF/Chuluun-Qwen2.5-72B-v0.08-Q4_K_M.gguf
-- &smollm  ## SmolLM
-  url: "github:mudler/LocalAI/gallery/chatml.yaml@master"
+- &smollm
+  url: "github:mudler/LocalAI/gallery/chatml.yaml@master" ## SmolLM
   name: "smollm-1.7b-instruct"
   icon: https://huggingface.co/datasets/HuggingFaceTB/images/resolve/main/banner_smol.png
   tags:
@@ -3421,19 +3409,19 @@
     - https://huggingface.co/nbeerbower/Dumpling-Qwen2.5-32B
     - https://huggingface.co/bartowski/Dumpling-Qwen2.5-32B-GGUF
   description: |
-   nbeerbower/Rombos-EVAGutenberg-TIES-Qwen2.5-32B finetuned on:
-    nbeerbower/GreatFirewall-DPO
-    nbeerbower/Schule-DPO
-    nbeerbower/Purpura-DPO
-    nbeerbower/Arkhaios-DPO
-    jondurbin/truthy-dpo-v0.1
-    antiven0m/physical-reasoning-dpo
-    flammenai/Date-DPO-NoAsterisks
-    flammenai/Prude-Phi3-DPO
-    Atsunori/HelpSteer2-DPO
-    jondurbin/gutenberg-dpo-v0.1
-    nbeerbower/gutenberg2-dpo
-    nbeerbower/gutenberg-moderne-dpo.
+    nbeerbower/Rombos-EVAGutenberg-TIES-Qwen2.5-32B finetuned on:
+     nbeerbower/GreatFirewall-DPO
+     nbeerbower/Schule-DPO
+     nbeerbower/Purpura-DPO
+     nbeerbower/Arkhaios-DPO
+     jondurbin/truthy-dpo-v0.1
+     antiven0m/physical-reasoning-dpo
+     flammenai/Date-DPO-NoAsterisks
+     flammenai/Prude-Phi3-DPO
+     Atsunori/HelpSteer2-DPO
+     jondurbin/gutenberg-dpo-v0.1
+     nbeerbower/gutenberg2-dpo
+     nbeerbower/gutenberg-moderne-dpo.
   overrides:
     parameters:
       model: Dumpling-Qwen2.5-32B-Q4_K_M.gguf
@@ -3474,8 +3462,8 @@
     - filename: OpenThinker-7B-Q4_K_M.gguf
       sha256: 94dff1a7acd685db5cff7afdb837aab8172e06d65fe6179ba47428e3030acd93
       uri: huggingface://bartowski/OpenThinker-7B-GGUF/OpenThinker-7B-Q4_K_M.gguf
-- &llama31  ## LLama3.1
-  url: "github:mudler/LocalAI/gallery/llama3.1-instruct.yaml@master"
+- &llama31
+  url: "github:mudler/LocalAI/gallery/llama3.1-instruct.yaml@master" ## LLama3.1
   icon: https://avatars.githubusercontent.com/u/153379578
   name: "meta-llama-3.1-8b-instruct"
   license: llama3.1
@@ -5401,8 +5389,8 @@
     - filename: Selene-1-Mini-Llama-3.1-8B-Q4_K_M.gguf
       sha256: 908e6ce19f7cd3d7394bd7c38e43de2f228aca6aceda35c7ee70d069ad60493e
       uri: huggingface://bartowski/Selene-1-Mini-Llama-3.1-8B-GGUF/Selene-1-Mini-Llama-3.1-8B-Q4_K_M.gguf
-- &deepseek  ## Deepseek
-  url: "github:mudler/LocalAI/gallery/deepseek.yaml@master"
+- &deepseek
+  url: "github:mudler/LocalAI/gallery/deepseek.yaml@master" ## Deepseek
   name: "deepseek-coder-v2-lite-instruct"
   icon: "https://avatars.githubusercontent.com/u/148330874"
   license: deepseek
@@ -5466,8 +5454,8 @@
     - filename: archangel_sft_pythia2-8b.Q4_K_M.gguf
       sha256: a47782c55ef2b39b19644213720a599d9849511a73c9ebb0c1de749383c0a0f8
       uri: huggingface://RichardErkhov/ContextualAI_-_archangel_sft_pythia2-8b-gguf/archangel_sft_pythia2-8b.Q4_K_M.gguf
-- &deepseek-r1  ## Start DeepSeek-R1
-  url: "github:mudler/LocalAI/gallery/deepseek-r1.yaml@master"
+- &deepseek-r1
+  url: "github:mudler/LocalAI/gallery/deepseek-r1.yaml@master" ## Start DeepSeek-R1
   name: "deepseek-r1-distill-qwen-1.5b"
   icon: "https://avatars.githubusercontent.com/u/148330874"
   urls:
@@ -5607,8 +5595,8 @@
     - filename: FuseO1-DeepSeekR1-QwQ-32B-Preview-Q4_K_M.gguf
       sha256: 16f1fb6bf76bb971a7a63e1a68cddd09421f4a767b86eec55eed1e08178f78f2
       uri: huggingface://bartowski/FuseO1-DeepSeekR1-QwQ-32B-Preview-GGUF/FuseO1-DeepSeekR1-QwQ-32B-Preview-Q4_K_M.gguf
-- &qwen2  ## Start QWEN2
-  url: "github:mudler/LocalAI/gallery/chatml.yaml@master"
+- &qwen2
+  url: "github:mudler/LocalAI/gallery/chatml.yaml@master" ## Start QWEN2
   name: "qwen2-7b-instruct"
   icon: https://avatars.githubusercontent.com/u/141221163
   license: apache-2.0
@@ -5991,10 +5979,10 @@
       sha256: 3a4078d53b46f22989adbf998ce5a3fd090b6541f112d7e936eb4204a04100b1
       uri: huggingface://openbmb/MiniCPM-V-2_6-gguf/ggml-model-Q4_K_M.gguf
     - filename: minicpm-v-2_6-mmproj-f16.gguf
-      sha256: f8a805e9e62085805c69c427287acefc284932eb4abfe6e1b1ce431d27e2f4e0
       uri: huggingface://openbmb/MiniCPM-V-2_6-gguf/mmproj-model-f16.gguf
-- &mistral03  ## START Mistral
-  url: "github:mudler/LocalAI/gallery/mistral-0.3.yaml@master"
+      sha256: 4485f68a0f1aa404c391e788ea88ea653c100d8e98fe572698f701e5809711fd
+- &mistral03
+  url: "github:mudler/LocalAI/gallery/mistral-0.3.yaml@master" ## START Mistral
   name: "mistral-7b-instruct-v0.3"
   icon: https://cdn-avatars.huggingface.co/v1/production/uploads/62dac1c7a8ead43d20e3e17a/wrLf5yaGC6ng4XME70w6Z.png
   license: apache-2.0
@@ -6625,8 +6613,8 @@
     - filename: Wayfarer-12B-Q4_K_M.gguf
       sha256: 6cd9f290c820c64854fcdcfd312b066447acc2f63abe2e2e71af9bc4f1946c08
       uri: huggingface://bartowski/Wayfarer-12B-GGUF/Wayfarer-12B-Q4_K_M.gguf
-- &mudler  ### START mudler's LocalAI specific-models
-  url: "github:mudler/LocalAI/gallery/mudler.yaml@master"
+- &mudler
+  url: "github:mudler/LocalAI/gallery/mudler.yaml@master" ### START mudler's LocalAI specific-models
   name: "LocalAI-llama3-8b-function-call-v0.2"
   icon: "https://cdn-uploads.huggingface.co/production/uploads/647374aa7ff32a81ac6d35d4/us5JKi9z046p8K-cn_M0w.webp"
   license: llama3
@@ -6670,8 +6658,8 @@
     - filename: Mirai-Nova-Llama3-LocalAI-8B-v0.1-q4_k_m.bin
       sha256: 579cbb229f9c11d0330759ff4733102d2491615a4c61289e26c09d1b3a583fec
       uri: huggingface://mudler/Mirai-Nova-Llama3-LocalAI-8B-v0.1-GGUF/Mirai-Nova-Llama3-LocalAI-8B-v0.1-q4_k_m.bin
-- &parler-tts  ### START parler-tts
-  url: "github:mudler/LocalAI/gallery/parler-tts.yaml@master"
+- &parler-tts
+  url: "github:mudler/LocalAI/gallery/parler-tts.yaml@master" ### START parler-tts
   name: parler-tts-mini-v0.1
   overrides:
     parameters:
@@ -6687,8 +6675,8 @@
     - cpu
     - text-to-speech
     - python
-- &rerankers  ### START rerankers
-  url: "github:mudler/LocalAI/gallery/rerankers.yaml@master"
+- &rerankers
+  url: "github:mudler/LocalAI/gallery/rerankers.yaml@master" ### START rerankers
   name: cross-encoder
   parameters:
     model: cross-encoder
@@ -8939,8 +8927,8 @@
     - filename: Copus-2x8B.i1-Q4_K_M.gguf
       sha256: 685da1ba49e203e8f491105585143d76044286d4b4687bed37d325f6b55501e5
       uri: huggingface://mradermacher/Copus-2x8B-i1-GGUF/Copus-2x8B.i1-Q4_K_M.gguf
-- &yi-chat  ### Start Yi
-  url: "github:mudler/LocalAI/gallery/chatml.yaml@master"
+- &yi-chat
+  url: "github:mudler/LocalAI/gallery/chatml.yaml@master" ### Start Yi
   icon: "https://github.com/01-ai/Yi/raw/main/assets/img/Yi_logo_icon_light.svg"
   name: "yi-1.5-9b-chat"
   license: apache-2.0
@@ -9150,8 +9138,8 @@
     - filename: Fimbulvetr-11B-v2-Q4_K_M-imat.gguf
       sha256: 3f309b59508342536a70edd6c4be6cf4f2cb97f2e32cbc79ad2ab3f4c02933a4
       uri: huggingface://Lewdiculous/Fimbulvetr-11B-v2-GGUF-IQ-Imatrix/Fimbulvetr-11B-v2-Q4_K_M-imat.gguf
-- &noromaid  ### Start noromaid
-  url: "github:mudler/LocalAI/gallery/noromaid.yaml@master"
+- &noromaid
+  url: "github:mudler/LocalAI/gallery/noromaid.yaml@master" ### Start noromaid
   name: "noromaid-13b-0.4-DPO"
   icon: https://cdn-uploads.huggingface.co/production/uploads/630dfb008df86f1e5becadc3/VKX2Z2yjZX5J8kXzgeCYO.png
   license: cc-by-nc-4.0
@@ -9170,8 +9158,8 @@
     - filename: Noromaid-13B-0.4-DPO.q4_k_m.gguf
       sha256: cb28e878d034fae3d0b43326c5fc1cfb4ab583b17c56e41d6ce023caec03c1c1
       uri: huggingface://NeverSleep/Noromaid-13B-0.4-DPO-GGUF/Noromaid-13B-0.4-DPO.q4_k_m.gguf
-- &wizardlm2  ### START Vicuna based
-  url: "github:mudler/LocalAI/gallery/wizardlm2.yaml@master"
+- &wizardlm2
+  url: "github:mudler/LocalAI/gallery/wizardlm2.yaml@master" ### START Vicuna based
   name: "wizardlm2-7b"
   description: |
     We introduce and opensource WizardLM-2, our next generation state-of-the-art large language models, which have improved performance on complex chat, multilingual, reasoning and agent. New family includes three cutting-edge models: WizardLM-2 8x22B, WizardLM-2 70B, and WizardLM-2 7B.
@@ -9225,8 +9213,8 @@
     - filename: moondream2-mmproj-f16.gguf
       sha256: 4cc1cb3660d87ff56432ebeb7884ad35d67c48c7b9f6b2856f305e39c38eed8f
       uri: huggingface://moondream/moondream2-gguf/moondream2-mmproj-f16.gguf
-- &llava  ### START LLaVa
-  name: "llava-1.6-vicuna"
+- &llava
+  name: "llava-1.6-vicuna" ### START LLaVa
   icon: https://github.com/lobehub/lobe-icons/raw/master/packages/static-png/dark/llava-color.png
   url: "github:mudler/LocalAI/gallery/llava.yaml@master"
   license: apache-2.0
@@ -9639,8 +9627,8 @@
       sha256: 010ec3ba94cb5ad2d9c8f95f46f01c6d80f83deab9df0a0831334ea45afff3e2
       uri: huggingface://openbmb/MiniCPM-Llama3-V-2_5-gguf/ggml-model-Q4_K_M.gguf
     - filename: minicpm-llama3-mmproj-f16.gguf
-      sha256: 391d11736c3cd24a90417c47b0c88975e86918fcddb1b00494c4d715b08af13e
       uri: huggingface://openbmb/MiniCPM-Llama3-V-2_5-gguf/mmproj-model-f16.gguf
+      sha256: 2c2d773537faf6a7e093655d0d5e14801ef0b2121c6c3e1981ce094c2b62f4f9
 - !!merge <<: *llama3
   name: "llama-3-cursedstock-v1.8-8b-iq-imatrix"
   urls:
@@ -10082,8 +10070,8 @@
     - filename: Freyja-v4.95-maldv-7b-NON-FICTION.i1-Q4_K_M.gguf
       sha256: cdc0f4de6df2ba120835fbd25c2a0ae2af8548f46d2c40c7a018c51c3d19e0c0
       uri: huggingface://mradermacher/Freyja-v4.95-maldv-7b-NON-FICTION-i1-GGUF/Freyja-v4.95-maldv-7b-NON-FICTION.i1-Q4_K_M.gguf
-- &chatml  ### ChatML
-  url: "github:mudler/LocalAI/gallery/chatml.yaml@master"
+- &chatml
+  url: "github:mudler/LocalAI/gallery/chatml.yaml@master" ### ChatML
   name: "una-thepitbull-21.4b-v2"
   license: afl-3.0
   icon: https://huggingface.co/fblgit/UNA-ThePitbull-21.4B-v2/resolve/main/DE-UNA-ThePitbull-21.4B-v2.png
@@ -10367,8 +10355,8 @@
     - filename: Triangulum-10B.Q4_K_M.gguf
       sha256: dd071f99edf6b166044bf229cdeec19419c4c348e3fc3d6587cfcc55e6fb85fa
       uri: huggingface://mradermacher/Triangulum-10B-GGUF/Triangulum-10B.Q4_K_M.gguf
-- &command-R  ### START Command-r
-  url: "github:mudler/LocalAI/gallery/command-r.yaml@master"
+- &command-R
+  url: "github:mudler/LocalAI/gallery/command-r.yaml@master" ### START Command-r
   name: "command-r-v01:q1_s"
   license: "cc-by-nc-4.0"
   icon: https://cdn.sanity.io/images/rjtqmwfu/production/ae020d94b599cc453cc09ebc80be06d35d953c23-102x18.svg
@@ -10422,8 +10410,8 @@
     - filename: "aya-23-35B-Q4_K_M.gguf"
       sha256: "57824768c1a945e21e028c8e9a29b39adb4838d489f5865c82601ab9ad98065d"
       uri: "huggingface://bartowski/aya-23-35B-GGUF/aya-23-35B-Q4_K_M.gguf"
-- &phi-2-chat  ### START Phi-2
-  url: "github:mudler/LocalAI/gallery/phi-2-chat.yaml@master"
+- &phi-2-chat
+  url: "github:mudler/LocalAI/gallery/phi-2-chat.yaml@master" ### START Phi-2
   license: mit
   description: |
     Phi-2 fine-tuned by the OpenHermes 2.5 dataset optimised for multi-turn conversation and character impersonation.
@@ -10544,8 +10532,8 @@
     - filename: internlm3-8b-instruct-Q4_K_M.gguf
       uri: huggingface://bartowski/internlm3-8b-instruct-GGUF/internlm3-8b-instruct-Q4_K_M.gguf
       sha256: 2a9644687318e8659c9cf9b40730d5cc2f5af06f786a50439c7c51359b23896e
-- &phi-3  ### START Phi-3
-  url: "github:mudler/LocalAI/gallery/phi-3-chat.yaml@master"
+- &phi-3
+  url: "github:mudler/LocalAI/gallery/phi-3-chat.yaml@master" ### START Phi-3
   name: "phi-3-mini-4k-instruct"
   icon: https://avatars.githubusercontent.com/u/6154722
   license: mit
@@ -10744,8 +10732,8 @@
     - filename: Phi-3.5-MoE-instruct-Q4_K_M.gguf
       sha256: 43e91bb720869bd8a92d8eb86bc3c74a52c49cf61642ca709b3d7bb89644df36
       uri: huggingface://bartowski/Phi-3.5-MoE-instruct-GGUF/Phi-3.5-MoE-instruct-Q4_K_M.gguf
-- &hermes-2-pro-mistral  ### START Hermes
-  url: "github:mudler/LocalAI/gallery/hermes-2-pro-mistral.yaml@master"
+- &hermes-2-pro-mistral
+  url: "github:mudler/LocalAI/gallery/hermes-2-pro-mistral.yaml@master" ### START Hermes
   name: "hermes-2-pro-mistral"
   icon: https://cdn-uploads.huggingface.co/production/uploads/6317aade83d8d2fd903192d9/ggO2sBDJ8Bhc6w-zwTx5j.png
   license: apache-2.0
@@ -11080,8 +11068,8 @@
     - filename: "galatolo-Q4_K.gguf"
       sha256: "ca0cfd5a9ad40dc16416aa3a277015d0299b62c0803b67f5709580042202c172"
       uri: "huggingface://galatolo/cerbero-7b-gguf/ggml-model-Q4_K.gguf"
-- &codellama  ### START Codellama
-  url: "github:mudler/LocalAI/gallery/codellama.yaml@master"
+- &codellama
+  url: "github:mudler/LocalAI/gallery/codellama.yaml@master" ### START Codellama
   name: "codellama-7b"
   license: llama2
   description: |
@@ -11211,8 +11199,8 @@
     - filename: "llm-compiler-7b-ftd.Q4_K.gguf"
       uri: "huggingface://legraphista/llm-compiler-7b-ftd-IMat-GGUF/llm-compiler-7b-ftd.Q4_K.gguf"
       sha256: d862dd18ed335413787d0ad196522a9902a3c10a6456afdab8721822cb0ddde8
-- &openvino  ### START OpenVINO
-  url: "github:mudler/LocalAI/gallery/openvino.yaml@master"
+- &openvino
+  url: "github:mudler/LocalAI/gallery/openvino.yaml@master" ### START OpenVINO
   name: "openvino-llama-3-8b-instruct-ov-int8"
   license: llama3
   urls:
@@ -11325,8 +11313,8 @@
     - gpu
     - embedding
     - cpu
-- &sentencentransformers  ### START Embeddings
-  description: |
+- &sentencentransformers
+  description: | ### START Embeddings
     This framework provides an easy method to compute dense vector representations for sentences, paragraphs, and images. The models are based on transformer networks like BERT / RoBERTa / XLM-RoBERTa etc. and achieve state-of-the-art performance in various tasks. Text is embedded in vector space such that similar text are closer and can efficiently be found using cosine similarity.
   urls:
     - https://github.com/UKPLab/sentence-transformers
@@ -11340,8 +11328,8 @@
   overrides:
     parameters:
       model: all-MiniLM-L6-v2
-- &dreamshaper  ### START Image generation
-  name: dreamshaper
+- &dreamshaper
+  name: dreamshaper ### START Image generation
   icon: https://image.civitai.com/xG1nkqKTMzGDvpLrqFT7WA/dd9b038c-bd15-43ab-86ab-66e145ad7ff2/width=450/26072158-132340247-8k%20portrait%20of%20beautiful%20cyborg%20with%20brown%20hair,%20intricate,%20elegant,%20highly%20detailed,%20majestic,%20digital%20photography,%20art%20by%20artg_ed.jpeg
   license: other
   description: |
@@ -11538,8 +11526,8 @@
     - filename: t5xxl_fp16.safetensors
       sha256: 6e480b09fae049a72d2a8c5fbccb8d3e92febeb233bbe9dfe7256958a9167635
       uri: https://huggingface.co/comfyanonymous/flux_text_encoders/resolve/main/t5xxl_fp16.safetensors
-- &whisper  ## Whisper
-  url: "github:mudler/LocalAI/gallery/whisper-base.yaml@master"
+- &whisper
+  url: "github:mudler/LocalAI/gallery/whisper-base.yaml@master" ## Whisper
   name: "whisper-1"
   icon: https://avatars.githubusercontent.com/u/14957082
   license: "MIT"
@@ -11720,8 +11708,8 @@
     Stable Diffusion in NCNN with c++, supported txt2img and img2img
   name: stablediffusion-cpp
   icon: https://avatars.githubusercontent.com/u/100950301
-- &piper  ## Piper TTS
-  url: github:mudler/LocalAI/gallery/piper.yaml@master
+- &piper
+  url: github:mudler/LocalAI/gallery/piper.yaml@master ## Piper TTS
   name: voice-en-us-kathleen-low
   icon: https://github.com/rhasspy/piper/raw/master/etc/logo.png
   license: mit

From af41436f1bf40fca937990ae6bede9dd3f6f0cd0 Mon Sep 17 00:00:00 2001
From: Ettore Di Giacinto <mudler@users.noreply.github.com>
Date: Fri, 31 Jan 2025 09:57:58 +0100
Subject: [PATCH 38/85] fix(tests): pin to branch for config used in tests
 (#4721)

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
---
 core/config/backend_config_test.go          | 4 ++--
 core/http/app_test.go                       | 4 ++--
 docs/content/docs/features/model-gallery.md | 4 ++--
 3 files changed, 6 insertions(+), 6 deletions(-)

diff --git a/core/config/backend_config_test.go b/core/config/backend_config_test.go
index 04eacb7e..e6a54b89 100644
--- a/core/config/backend_config_test.go
+++ b/core/config/backend_config_test.go
@@ -48,9 +48,9 @@ parameters:
 			Expect(config.Name).To(Equal("bar-baz"))
 			Expect(config.Validate()).To(BeTrue())
 
-			// download https://raw.githubusercontent.com/mudler/LocalAI/master/embedded/models/hermes-2-pro-mistral.yaml
+			// download https://raw.githubusercontent.com/mudler/LocalAI/v2.25.0/embedded/models/hermes-2-pro-mistral.yaml
 			httpClient := http.Client{}
-			resp, err := httpClient.Get("https://raw.githubusercontent.com/mudler/LocalAI/master/embedded/models/hermes-2-pro-mistral.yaml")
+			resp, err := httpClient.Get("https://raw.githubusercontent.com/mudler/LocalAI/v2.25.0/embedded/models/hermes-2-pro-mistral.yaml")
 			Expect(err).To(BeNil())
 			defer resp.Body.Close()
 			tmp, err = os.CreateTemp("", "config.yaml")
diff --git a/core/http/app_test.go b/core/http/app_test.go
index f57a3ea7..bc4ecfae 100644
--- a/core/http/app_test.go
+++ b/core/http/app_test.go
@@ -476,7 +476,7 @@ var _ = Describe("API test", func() {
 			})
 			It("apply models from config", func() {
 				response := postModelApplyRequest("http://127.0.0.1:9090/models/apply", modelApplyRequest{
-					ConfigURL: "https://raw.githubusercontent.com/mudler/LocalAI/master/embedded/models/hermes-2-pro-mistral.yaml",
+					ConfigURL: "https://raw.githubusercontent.com/mudler/LocalAI/v2.25.0/embedded/models/hermes-2-pro-mistral.yaml",
 				})
 
 				Expect(response["uuid"]).ToNot(BeEmpty(), fmt.Sprint(response))
@@ -600,7 +600,7 @@ var _ = Describe("API test", func() {
 
 				modelName := "hermes-2-pro-mistral"
 				response := postModelApplyRequest("http://127.0.0.1:9090/models/apply", modelApplyRequest{
-					ConfigURL: "https://raw.githubusercontent.com/mudler/LocalAI/master/embedded/models/hermes-2-pro-mistral.yaml",
+					ConfigURL: "https://raw.githubusercontent.com/mudler/LocalAI/v2.25.0/embedded/models/hermes-2-pro-mistral.yaml",
 				})
 
 				Expect(response["uuid"]).ToNot(BeEmpty(), fmt.Sprint(response))
diff --git a/docs/content/docs/features/model-gallery.md b/docs/content/docs/features/model-gallery.md
index c17a5946..6943866a 100644
--- a/docs/content/docs/features/model-gallery.md
+++ b/docs/content/docs/features/model-gallery.md
@@ -134,12 +134,12 @@ curl $LOCALAI/models/apply -H "Content-Type: application/json" -d '{
    }' 
 ```
 
-An example that installs openllama can be:
+An example that installs hermes-2-pro-mistral can be:
    
 ```bash
 LOCALAI=http://localhost:8080
 curl $LOCALAI/models/apply -H "Content-Type: application/json" -d '{
-     "config_url": "https://raw.githubusercontent.com/mudler/LocalAI/master/embedded/models/hermes-2-pro-mistral.yaml"
+     "config_url": "https://raw.githubusercontent.com/mudler/LocalAI/v2.25.0/embedded/models/hermes-2-pro-mistral.yaml"
    }' 
 ```
 

From 7badaf78a0e283a6dc259fd204ba8b76b9f53dc7 Mon Sep 17 00:00:00 2001
From: "LocalAI [bot]" <139863280+localai-bot@users.noreply.github.com>
Date: Fri, 31 Jan 2025 12:31:46 +0100
Subject: [PATCH 39/85] chore: :arrow_up: Update ggerganov/llama.cpp to
 `8b576b6c55bc4e6be898b47522f0ef402b93ef62` (#4722)

:arrow_up: Update ggerganov/llama.cpp

Signed-off-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com>
Co-authored-by: mudler <2420543+mudler@users.noreply.github.com>
Co-authored-by: Ettore Di Giacinto <mudler@users.noreply.github.com>
---
 Makefile | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/Makefile b/Makefile
index 5b903d7d..0f91a5db 100644
--- a/Makefile
+++ b/Makefile
@@ -8,7 +8,7 @@ DETECT_LIBS?=true
 # llama.cpp versions
 GOLLAMA_REPO?=https://github.com/go-skynet/go-llama.cpp
 GOLLAMA_VERSION?=2b57a8ae43e4699d3dc5d1496a1ccd42922993be
-CPPLLAMA_VERSION?=eb7cf15a808d4d7a71eef89cc6a9b96fe82989dc
+CPPLLAMA_VERSION?=8b576b6c55bc4e6be898b47522f0ef402b93ef62
 
 # whisper.cpp version
 WHISPER_REPO?=https://github.com/ggerganov/whisper.cpp

From ff07612bfa504bc25faf6c34bb901b7c9409509c Mon Sep 17 00:00:00 2001
From: Ettore Di Giacinto <mudler@users.noreply.github.com>
Date: Fri, 31 Jan 2025 14:45:42 +0100
Subject: [PATCH 40/85] chore(model gallery): add
 mistral-small-24b-instruct-2501 (#4725)

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
---
 gallery/index.yaml | 17 +++++++++++++++++
 1 file changed, 17 insertions(+)

diff --git a/gallery/index.yaml b/gallery/index.yaml
index 98c3a782..f509d343 100644
--- a/gallery/index.yaml
+++ b/gallery/index.yaml
@@ -6613,6 +6613,23 @@
     - filename: Wayfarer-12B-Q4_K_M.gguf
       sha256: 6cd9f290c820c64854fcdcfd312b066447acc2f63abe2e2e71af9bc4f1946c08
       uri: huggingface://bartowski/Wayfarer-12B-GGUF/Wayfarer-12B-Q4_K_M.gguf
+- !!merge <<: *mistral03
+  name: "mistral-small-24b-instruct-2501"
+  urls:
+    - https://huggingface.co/mistralai/Mistral-Small-24B-Instruct-2501
+    - https://huggingface.co/bartowski/Mistral-Small-24B-Instruct-2501-GGUF
+  description: |
+    Mistral Small 3 ( 2501 ) sets a new benchmark in the "small" Large Language Models category below 70B, boasting 24B parameters and achieving state-of-the-art capabilities comparable to larger models!
+    This model is an instruction-fine-tuned version of the base model: Mistral-Small-24B-Base-2501.
+
+    Mistral Small can be deployed locally and is exceptionally "knowledge-dense", fitting in a single RTX 4090 or a 32GB RAM MacBook once quantized.
+  overrides:
+    parameters:
+      model: Mistral-Small-24B-Instruct-2501-Q4_K_M.gguf
+  files:
+    - filename: Mistral-Small-24B-Instruct-2501-Q4_K_M.gguf
+      sha256: d1a6d049f09730c3f8ba26cf6b0b60c89790b5fdafa9a59c819acdfe93fffd1b
+      uri: huggingface://bartowski/Mistral-Small-24B-Instruct-2501-GGUF/Mistral-Small-24B-Instruct-2501-Q4_K_M.gguf
 - &mudler
   url: "github:mudler/LocalAI/gallery/mudler.yaml@master" ### START mudler's LocalAI specific-models
   name: "LocalAI-llama3-8b-function-call-v0.2"

From e0d90b173b5af15386c96f450822fdb3617b1c4e Mon Sep 17 00:00:00 2001
From: Ettore Di Giacinto <mudler@users.noreply.github.com>
Date: Fri, 31 Jan 2025 14:49:02 +0100
Subject: [PATCH 41/85] chore(model gallery): add tinyswallow-1.5b-instruct
 (#4726)

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
---
 gallery/index.yaml | 14 ++++++++++++++
 1 file changed, 14 insertions(+)

diff --git a/gallery/index.yaml b/gallery/index.yaml
index f509d343..e9200537 100644
--- a/gallery/index.yaml
+++ b/gallery/index.yaml
@@ -3462,6 +3462,20 @@
     - filename: OpenThinker-7B-Q4_K_M.gguf
       sha256: 94dff1a7acd685db5cff7afdb837aab8172e06d65fe6179ba47428e3030acd93
       uri: huggingface://bartowski/OpenThinker-7B-GGUF/OpenThinker-7B-Q4_K_M.gguf
+- !!merge <<: *qwen25
+  name: "tinyswallow-1.5b-instruct"
+  urls:
+    - https://huggingface.co/SakanaAI/TinySwallow-1.5B-Instruct
+    - https://huggingface.co/bartowski/TinySwallow-1.5B-Instruct-GGUF
+  description: |
+    TinySwallow-1.5B-Instruct is an instruction-tuned version of TinySwallow-1.5B, created through TAID (Temporally Adaptive Interpolated Distillation), our new knowledge distillation method. We used Qwen2.5-32B-Instruct as the teacher model and Qwen2.5-1.5B-Instruct as the student model. The model has been further instruction-tuned to enhance its ability to follow instructions and engage in conversations in Japanese.
+  overrides:
+    parameters:
+      model: TinySwallow-1.5B-Instruct-Q4_K_M.gguf
+  files:
+    - filename: TinySwallow-1.5B-Instruct-Q4_K_M.gguf
+      sha256: 4d409c8873c1650a19c0a7a1c051e342613191a487768fe0d29735b9361079cd
+      uri: huggingface://bartowski/TinySwallow-1.5B-Instruct-GGUF/TinySwallow-1.5B-Instruct-Q4_K_M.gguf
 - &llama31
   url: "github:mudler/LocalAI/gallery/llama3.1-instruct.yaml@master" ## LLama3.1
   icon: https://avatars.githubusercontent.com/u/153379578

From f1763aabf22da70552e1bc0a100444ba0b64496e Mon Sep 17 00:00:00 2001
From: Ettore Di Giacinto <mudler@users.noreply.github.com>
Date: Fri, 31 Jan 2025 14:53:39 +0100
Subject: [PATCH 42/85] chore(model gallery): add taid-llm-1.5b (#4727)

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
---
 gallery/index.yaml | 15 +++++++++++++++
 1 file changed, 15 insertions(+)

diff --git a/gallery/index.yaml b/gallery/index.yaml
index e9200537..c6d2ba61 100644
--- a/gallery/index.yaml
+++ b/gallery/index.yaml
@@ -5995,6 +5995,21 @@
     - filename: minicpm-v-2_6-mmproj-f16.gguf
       uri: huggingface://openbmb/MiniCPM-V-2_6-gguf/mmproj-model-f16.gguf
       sha256: 4485f68a0f1aa404c391e788ea88ea653c100d8e98fe572698f701e5809711fd
+- !!merge <<: *qwen2
+  name: "taid-llm-1.5b"
+  icon: https://sakana.ai/assets/taid-jp/cover_large.jpeg
+  urls:
+    - https://huggingface.co/SakanaAI/TAID-LLM-1.5B
+    - https://huggingface.co/bartowski/TAID-LLM-1.5B-GGUF
+  description: |
+    TAID-LLM-1.5B is an English language model created through TAID (Temporally Adaptive Interpolated Distillation), our new knowledge distillation method. We used Qwen2-72B-Instruct as the teacher model and Qwen2-1.5B-Instruct as the student model.
+  overrides:
+    parameters:
+      model: TAID-LLM-1.5B-Q4_K_M.gguf
+  files:
+    - filename: TAID-LLM-1.5B-Q4_K_M.gguf
+      sha256: dbffc989d12d42ef8e4a2994e102d7ec7a02c49ec08ea2e35426372ad07b4cd8
+      uri: huggingface://bartowski/TAID-LLM-1.5B-GGUF/TAID-LLM-1.5B-Q4_K_M.gguf
 - &mistral03
   url: "github:mudler/LocalAI/gallery/mistral-0.3.yaml@master" ## START Mistral
   name: "mistral-7b-instruct-v0.3"

From 732042e5c66ab077f515805c44615dcbe26189ef Mon Sep 17 00:00:00 2001
From: "LocalAI [bot]" <139863280+localai-bot@users.noreply.github.com>
Date: Fri, 31 Jan 2025 23:31:00 +0100
Subject: [PATCH 43/85] chore: :arrow_up: Update ggerganov/llama.cpp to
 `aa6fb1321333fae8853d0cdc26bcb5d438e650a1` (#4728)

:arrow_up: Update ggerganov/llama.cpp

Signed-off-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com>
Co-authored-by: mudler <2420543+mudler@users.noreply.github.com>
---
 Makefile | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/Makefile b/Makefile
index 0f91a5db..ac32a37b 100644
--- a/Makefile
+++ b/Makefile
@@ -8,7 +8,7 @@ DETECT_LIBS?=true
 # llama.cpp versions
 GOLLAMA_REPO?=https://github.com/go-skynet/go-llama.cpp
 GOLLAMA_VERSION?=2b57a8ae43e4699d3dc5d1496a1ccd42922993be
-CPPLLAMA_VERSION?=8b576b6c55bc4e6be898b47522f0ef402b93ef62
+CPPLLAMA_VERSION?=aa6fb1321333fae8853d0cdc26bcb5d438e650a1
 
 # whisper.cpp version
 WHISPER_REPO?=https://github.com/ggerganov/whisper.cpp

From ba2f426e3e03615a73f612ba2c21e87923d4cad1 Mon Sep 17 00:00:00 2001
From: Ettore Di Giacinto <mudler@users.noreply.github.com>
Date: Sat, 1 Feb 2025 10:12:15 +0100
Subject: [PATCH 44/85] chore(model gallery): add
 fuseo1-deekseekr1-qwq-skyt1-32b-preview (#4731)

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
---
 gallery/index.yaml | 14 ++++++++++++++
 1 file changed, 14 insertions(+)

diff --git a/gallery/index.yaml b/gallery/index.yaml
index c6d2ba61..a3f90ca4 100644
--- a/gallery/index.yaml
+++ b/gallery/index.yaml
@@ -5609,6 +5609,20 @@
     - filename: FuseO1-DeepSeekR1-QwQ-32B-Preview-Q4_K_M.gguf
       sha256: 16f1fb6bf76bb971a7a63e1a68cddd09421f4a767b86eec55eed1e08178f78f2
       uri: huggingface://bartowski/FuseO1-DeepSeekR1-QwQ-32B-Preview-GGUF/FuseO1-DeepSeekR1-QwQ-32B-Preview-Q4_K_M.gguf
+- !!merge <<: *deepseek-r1
+  name: "fuseo1-deekseekr1-qwq-skyt1-32b-preview"
+  urls:
+    - https://huggingface.co/FuseAI/FuseO1-DeepSeekR1-QwQ-SkyT1-32B-Preview
+    - https://huggingface.co/bartowski/FuseO1-DeekSeekR1-QwQ-SkyT1-32B-Preview-GGUF
+  description: |
+    FuseO1-Preview is our initial endeavor to enhance the System-II reasoning capabilities of large language models (LLMs) through innovative model fusion techniques. By employing our advanced SCE merging methodologies, we integrate multiple open-source o1-like LLMs into a unified model. Our goal is to incorporate the distinct knowledge and strengths from different reasoning LLMs into a single, unified model with strong System-II reasoning abilities, particularly in mathematics, coding, and science domains.
+  overrides:
+    parameters:
+      model: FuseO1-DeekSeekR1-QwQ-SkyT1-32B-Preview-Q4_K_M.gguf
+  files:
+    - filename: FuseO1-DeekSeekR1-QwQ-SkyT1-32B-Preview-Q4_K_M.gguf
+      sha256: 13911dd4a62d4714a3447bc288ea9d49dbe575a91cab9e8f645057f1d8e1100e
+      uri: huggingface://bartowski/FuseO1-DeekSeekR1-QwQ-SkyT1-32B-Preview-GGUF/FuseO1-DeekSeekR1-QwQ-SkyT1-32B-Preview-Q4_K_M.gguf
 - &qwen2
   url: "github:mudler/LocalAI/gallery/chatml.yaml@master" ## Start QWEN2
   name: "qwen2-7b-instruct"

From d79f02ea0953644ef8bf1c422765a7d7a7c15c6d Mon Sep 17 00:00:00 2001
From: "LocalAI [bot]" <139863280+localai-bot@users.noreply.github.com>
Date: Sat, 1 Feb 2025 22:45:26 +0100
Subject: [PATCH 45/85] chore: :arrow_up: Update ggerganov/llama.cpp to
 `53debe6f3c9cca87e9520a83ee8c14d88977afa4` (#4732)

:arrow_up: Update ggerganov/llama.cpp

Signed-off-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com>
Co-authored-by: mudler <2420543+mudler@users.noreply.github.com>
---
 Makefile | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/Makefile b/Makefile
index ac32a37b..b97a8940 100644
--- a/Makefile
+++ b/Makefile
@@ -8,7 +8,7 @@ DETECT_LIBS?=true
 # llama.cpp versions
 GOLLAMA_REPO?=https://github.com/go-skynet/go-llama.cpp
 GOLLAMA_VERSION?=2b57a8ae43e4699d3dc5d1496a1ccd42922993be
-CPPLLAMA_VERSION?=aa6fb1321333fae8853d0cdc26bcb5d438e650a1
+CPPLLAMA_VERSION?=53debe6f3c9cca87e9520a83ee8c14d88977afa4
 
 # whisper.cpp version
 WHISPER_REPO?=https://github.com/ggerganov/whisper.cpp

From 1d6afbd65d24b46c74f71f4b593f359efb54bae3 Mon Sep 17 00:00:00 2001
From: Ettore Di Giacinto <mudler@users.noreply.github.com>
Date: Sun, 2 Feb 2025 13:25:03 +0100
Subject: [PATCH 46/85] feat(llama.cpp): Add support to grammar triggers
 (#4733)

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
---
 backend/backend.proto             |  7 +++++++
 backend/cpp/llama/grpc-server.cpp | 20 ++++++++++++++++++++
 core/backend/options.go           | 10 ++++++++++
 pkg/functions/parse.go            | 10 +++++++++-
 4 files changed, 46 insertions(+), 1 deletion(-)

diff --git a/backend/backend.proto b/backend/backend.proto
index fea4214f..bd75adc5 100644
--- a/backend/backend.proto
+++ b/backend/backend.proto
@@ -163,6 +163,11 @@ message Reply {
   double timing_token_generation = 5;
 }
 
+message GrammarTrigger {
+  string word = 1;
+  bool at_start = 2; 
+}
+
 message ModelOptions {
   string Model = 1;
   int32 ContextSize = 2;
@@ -247,6 +252,8 @@ message ModelOptions {
 
   string CacheTypeKey = 63;
   string CacheTypeValue = 64;
+
+  repeated GrammarTrigger GrammarTriggers = 65;
 }
 
 message Result {
diff --git a/backend/cpp/llama/grpc-server.cpp b/backend/cpp/llama/grpc-server.cpp
index 9aeb34db..1e9a3551 100644
--- a/backend/cpp/llama/grpc-server.cpp
+++ b/backend/cpp/llama/grpc-server.cpp
@@ -468,6 +468,9 @@ struct llama_server_context
     bool add_bos_token      = true;
     bool has_eos_token      = true;
 
+    bool grammar_lazy = false;
+    std::vector<common_grammar_trigger> grammar_trigger_words;
+
     int32_t n_ctx;  // total context for all clients / slots
 
     // system prompt
@@ -706,6 +709,8 @@ struct llama_server_context
         slot->sparams.grammar           = json_value(data, "grammar",           default_sparams.grammar);
         slot->sparams.n_probs           = json_value(data, "n_probs",           default_sparams.n_probs);
         slot->sparams.min_keep          = json_value(data, "min_keep",          default_sparams.min_keep);
+        slot->sparams.grammar_trigger_words = grammar_trigger_words;
+        slot->sparams.grammar_lazy = grammar_lazy;
 
         if (slot->n_predict > 0 && slot->params.n_predict > slot->n_predict) {
             // Might be better to reject the request with a 400 ?
@@ -2374,6 +2379,21 @@ static void params_parse(const backend::ModelOptions* request,
     if ( request->ropefreqscale() != 0.0f ) {
         params.rope_freq_scale = request->ropefreqscale();
     }
+
+    if (request->grammartriggers_size() > 0) {
+        LOG_INFO("configuring grammar triggers", {});
+        llama.grammar_lazy = true;
+        for (int i = 0; i < request->grammartriggers_size(); i++) {
+            common_grammar_trigger trigger;
+            trigger.word = request->grammartriggers(i).word();
+            trigger.at_start = request->grammartriggers(i).at_start();
+            llama.grammar_trigger_words.push_back(trigger);
+            LOG_INFO("grammar trigger", {
+                { "word", trigger.word },
+                { "at_start", trigger.at_start }
+            });
+        }
+    }
 }
 
 
diff --git a/core/backend/options.go b/core/backend/options.go
index 92a42893..3201142d 100644
--- a/core/backend/options.go
+++ b/core/backend/options.go
@@ -118,9 +118,19 @@ func grpcModelOpts(c config.BackendConfig) *pb.ModelOptions {
 		nGPULayers = *c.NGPULayers
 	}
 
+	triggers := make([]*pb.GrammarTrigger, 0)
+	for _, t := range c.FunctionsConfig.GrammarConfig.GrammarTriggers {
+		triggers = append(triggers, &pb.GrammarTrigger{
+			Word:    t.Word,
+			AtStart: t.AtStart,
+		})
+
+	}
+
 	return &pb.ModelOptions{
 		CUDA:                 c.CUDA || c.Diffusers.CUDA,
 		SchedulerType:        c.Diffusers.SchedulerType,
+		GrammarTriggers:      triggers,
 		PipelineType:         c.Diffusers.PipelineType,
 		CFGScale:             c.CFGScale,
 		LoraAdapter:          c.LoraAdapter,
diff --git a/pkg/functions/parse.go b/pkg/functions/parse.go
index 50cbb27b..30338ffd 100644
--- a/pkg/functions/parse.go
+++ b/pkg/functions/parse.go
@@ -47,6 +47,14 @@ type GrammarConfig struct {
 	// SchemaType can be configured to use a specific schema type to force the grammar
 	// available : json, llama3.1
 	SchemaType string `yaml:"schema_type"`
+
+	GrammarTriggers []GrammarTrigger `yaml:"triggers"`
+}
+
+type GrammarTrigger struct {
+	// Trigger is the string that triggers the grammar
+	Word    string `yaml:"word"`
+	AtStart bool   `yaml:"at_start"`
 }
 
 // FunctionsConfig is the configuration for the tool/function call.
@@ -361,6 +369,6 @@ func ParseFunctionCallArgs(functionArguments string, functionConfig FunctionsCon
 	}
 
 	jsonBytes, _ := json.Marshal(args)
-	
+
 	return string(jsonBytes)
 }

From 03974a4dd456d83f51ccccf6aef486cda71741ce Mon Sep 17 00:00:00 2001
From: Shraddha <shraddha@shraddhafive.in>
Date: Sun, 2 Feb 2025 23:09:43 +0530
Subject: [PATCH 47/85] feat: tokenization with llama.cpp (#4724)

feat: tokenization

Signed-off-by: shraddhazpy <shraddha@shraddhafive.in>
---
 backend/cpp/llama/grpc-server.cpp       | 12 ++++++++++++
 core/backend/tokenize.go                | 11 +++++------
 core/http/endpoints/localai/tokenize.go |  5 ++---
 3 files changed, 19 insertions(+), 9 deletions(-)

diff --git a/backend/cpp/llama/grpc-server.cpp b/backend/cpp/llama/grpc-server.cpp
index 1e9a3551..4daf84c6 100644
--- a/backend/cpp/llama/grpc-server.cpp
+++ b/backend/cpp/llama/grpc-server.cpp
@@ -2542,6 +2542,18 @@ public:
         return grpc::Status::OK;
     }
 
+    grpc::Status TokenizeString(ServerContext* context, const backend::PredictOptions* request, backend::TokenizationResponse* response){
+         json data = parse_options(false, request, llama);
+
+         std::vector<llama_token> tokens = llama.tokenize(data["prompt"],false);
+
+         for (int i=0 ; i< tokens.size(); i++){
+            response->add_tokens(tokens[i]);
+         }
+
+        return grpc::Status::OK;
+    }
+
     grpc::Status GetMetrics(ServerContext* context, const backend::MetricsRequest* request, backend::MetricsResponse* response) {
         llama_client_slot* active_slot = llama.get_active_slot();
 
diff --git a/core/backend/tokenize.go b/core/backend/tokenize.go
index 2f813e18..1783083b 100644
--- a/core/backend/tokenize.go
+++ b/core/backend/tokenize.go
@@ -16,12 +16,7 @@ func ModelTokenize(s string, loader *model.ModelLoader, backendConfig config.Bac
 
 	opts := ModelOptions(backendConfig, appConfig, model.WithModel(modelFile))
 
-	if backendConfig.Backend == "" {
-		inferenceModel, err = loader.Load(opts...)
-	} else {
-		opts = append(opts, model.WithBackendString(backendConfig.Backend))
-		inferenceModel, err = loader.Load(opts...)
-	}
+	inferenceModel, err = loader.Load(opts...)
 	if err != nil {
 		return schema.TokenizeResponse{}, err
 	}
@@ -35,6 +30,10 @@ func ModelTokenize(s string, loader *model.ModelLoader, backendConfig config.Bac
 		return schema.TokenizeResponse{}, err
 	}
 
+	if resp.Tokens == nil {
+		resp.Tokens = make([]int32, 0)
+	}
+
 	return schema.TokenizeResponse{
 		Tokens: resp.Tokens,
 	}, nil
diff --git a/core/http/endpoints/localai/tokenize.go b/core/http/endpoints/localai/tokenize.go
index da110bf8..faa8a0a4 100644
--- a/core/http/endpoints/localai/tokenize.go
+++ b/core/http/endpoints/localai/tokenize.go
@@ -12,6 +12,7 @@ import (
 
 // TokenizeEndpoint exposes a REST API to tokenize the content
 // @Summary Tokenize the input.
+// @Param request body schema.TokenizeRequest true "Request"
 // @Success 200 {object} schema.TokenizeResponse "Response"
 // @Router /v1/tokenize [post]
 func TokenizeEndpoint(cl *config.BackendConfigLoader, ml *model.ModelLoader, appConfig *config.ApplicationConfig) func(c *fiber.Ctx) error {
@@ -51,8 +52,6 @@ func TokenizeEndpoint(cl *config.BackendConfigLoader, ml *model.ModelLoader, app
 			return err
 		}
 
-		c.JSON(tokenResponse)
-		return nil
-
+		return c.JSON(tokenResponse)
 	}
 }

From a37fa8d9c44bffa5df6bf442c5c8a54a639bcef3 Mon Sep 17 00:00:00 2001
From: "LocalAI [bot]" <139863280+localai-bot@users.noreply.github.com>
Date: Sun, 2 Feb 2025 23:18:30 +0100
Subject: [PATCH 48/85] chore: :arrow_up: Update ggerganov/llama.cpp to
 `90f9b88afb6447d3929843a2aa98c0f11074762d` (#4736)

:arrow_up: Update ggerganov/llama.cpp

Signed-off-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com>
Co-authored-by: mudler <2420543+mudler@users.noreply.github.com>
---
 Makefile | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/Makefile b/Makefile
index b97a8940..3e9446b4 100644
--- a/Makefile
+++ b/Makefile
@@ -8,7 +8,7 @@ DETECT_LIBS?=true
 # llama.cpp versions
 GOLLAMA_REPO?=https://github.com/go-skynet/go-llama.cpp
 GOLLAMA_VERSION?=2b57a8ae43e4699d3dc5d1496a1ccd42922993be
-CPPLLAMA_VERSION?=53debe6f3c9cca87e9520a83ee8c14d88977afa4
+CPPLLAMA_VERSION?=90f9b88afb6447d3929843a2aa98c0f11074762d
 
 # whisper.cpp version
 WHISPER_REPO?=https://github.com/ggerganov/whisper.cpp

From 52fadeded128c4e06bd7b72d4b64db7e58089cd3 Mon Sep 17 00:00:00 2001
From: "LocalAI [bot]" <139863280+localai-bot@users.noreply.github.com>
Date: Mon, 3 Feb 2025 10:16:42 +0100
Subject: [PATCH 49/85] feat(swagger): update swagger (#4735)

Signed-off-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com>
Co-authored-by: mudler <2420543+mudler@users.noreply.github.com>
---
 swagger/docs.go      | 22 ++++++++++++++++++++++
 swagger/swagger.json | 22 ++++++++++++++++++++++
 swagger/swagger.yaml | 14 ++++++++++++++
 3 files changed, 58 insertions(+)

diff --git a/swagger/docs.go b/swagger/docs.go
index 43bc8822..f1050e85 100644
--- a/swagger/docs.go
+++ b/swagger/docs.go
@@ -765,6 +765,17 @@ const docTemplate = `{
         "/v1/tokenize": {
             "post": {
                 "summary": "Tokenize the input.",
+                "parameters": [
+                    {
+                        "description": "Request",
+                        "name": "request",
+                        "in": "body",
+                        "required": true,
+                        "schema": {
+                            "$ref": "#/definitions/schema.TokenizeRequest"
+                        }
+                    }
+                ],
                 "responses": {
                     "200": {
                         "description": "Response",
@@ -1838,6 +1849,17 @@ const docTemplate = `{
                 }
             }
         },
+        "schema.TokenizeRequest": {
+            "type": "object",
+            "properties": {
+                "content": {
+                    "type": "string"
+                },
+                "model": {
+                    "type": "string"
+                }
+            }
+        },
         "schema.TokenizeResponse": {
             "type": "object",
             "properties": {
diff --git a/swagger/swagger.json b/swagger/swagger.json
index 7d39e5e9..b2d02ea2 100644
--- a/swagger/swagger.json
+++ b/swagger/swagger.json
@@ -758,6 +758,17 @@
         "/v1/tokenize": {
             "post": {
                 "summary": "Tokenize the input.",
+                "parameters": [
+                    {
+                        "description": "Request",
+                        "name": "request",
+                        "in": "body",
+                        "required": true,
+                        "schema": {
+                            "$ref": "#/definitions/schema.TokenizeRequest"
+                        }
+                    }
+                ],
                 "responses": {
                     "200": {
                         "description": "Response",
@@ -1831,6 +1842,17 @@
                 }
             }
         },
+        "schema.TokenizeRequest": {
+            "type": "object",
+            "properties": {
+                "content": {
+                    "type": "string"
+                },
+                "model": {
+                    "type": "string"
+                }
+            }
+        },
         "schema.TokenizeResponse": {
             "type": "object",
             "properties": {
diff --git a/swagger/swagger.yaml b/swagger/swagger.yaml
index e747464f..e7b9e625 100644
--- a/swagger/swagger.yaml
+++ b/swagger/swagger.yaml
@@ -705,6 +705,13 @@ definitions:
         description: voice audio file or speaker id
         type: string
     type: object
+  schema.TokenizeRequest:
+    properties:
+      content:
+        type: string
+      model:
+        type: string
+    type: object
   schema.TokenizeResponse:
     properties:
       tokens:
@@ -1216,6 +1223,13 @@ paths:
       summary: Get TokenMetrics for Active Slot.
   /v1/tokenize:
     post:
+      parameters:
+      - description: Request
+        in: body
+        name: request
+        required: true
+        schema:
+          $ref: '#/definitions/schema.TokenizeRequest'
       responses:
         "200":
           description: Response

From ed0094c3d05c9e598d6b1c324115304e5bb4569f Mon Sep 17 00:00:00 2001
From: Ettore Di Giacinto <mudler@users.noreply.github.com>
Date: Mon, 3 Feb 2025 10:30:07 +0100
Subject: [PATCH 50/85] chore(model gallery): add steelskull_l3.3-damascus-r1
 (#4737)

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
---
 gallery/index.yaml | 29 +++++++++++++++++++++++++++++
 1 file changed, 29 insertions(+)

diff --git a/gallery/index.yaml b/gallery/index.yaml
index a3f90ca4..dfa328f9 100644
--- a/gallery/index.yaml
+++ b/gallery/index.yaml
@@ -5623,6 +5623,35 @@
     - filename: FuseO1-DeekSeekR1-QwQ-SkyT1-32B-Preview-Q4_K_M.gguf
       sha256: 13911dd4a62d4714a3447bc288ea9d49dbe575a91cab9e8f645057f1d8e1100e
       uri: huggingface://bartowski/FuseO1-DeekSeekR1-QwQ-SkyT1-32B-Preview-GGUF/FuseO1-DeekSeekR1-QwQ-SkyT1-32B-Preview-Q4_K_M.gguf
+- !!merge <<: *deepseek-r1
+  name: "steelskull_l3.3-damascus-r1"
+  icon: https://cdn-uploads.huggingface.co/production/uploads/64545af5ec40bbbd01242ca6/iIzpqHDb9wU181AzfrjZy.png
+  urls:
+    - https://huggingface.co/Steelskull/L3.3-Damascus-R1
+    - https://huggingface.co/bartowski/Steelskull_L3.3-Damascus-R1-GGUF
+  description: |
+    Damascus-R1 builds upon some elements of the Nevoria foundation but represents a significant step forward with a completely custom-made DeepSeek R1 Distill base: Hydroblated-R1-V3. Constructed using the new SCE (Select, Calculate, and Erase) merge method, Damascus-R1 prioritizes stability, intelligence, and enhanced awareness.
+
+    Technical Architecture
+    Leveraging the SCE merge method and custom base, Damascus-R1 integrates newly added specialized components from multiple high-performance models:
+        EVA and EURYALE foundations for creative expression and scene comprehension
+        Cirrus and Hanami elements for enhanced reasoning capabilities
+        Anubis components for detailed scene description
+        Negative_LLAMA integration for balanced perspective and response
+
+    Core Philosophy
+    Damascus-R1 embodies the principle that AI models can be intelligent and be fun. This version specifically addresses recent community feedback and iterates on prior experiments, optimizing the balance between technical capability and natural conversation flow.
+
+    Base Architecture
+    At its core, Damascus-R1 utilizes the entirely custom Hydroblated-R1 base model, specifically engineered for stability, enhanced reasoning, and performance. The SCE merge method, with settings finely tuned based on community feedback from evaluations of Experiment-Model-Ver-A, L3.3-Exp-Nevoria-R1-70b-v0.1 and L3.3-Exp-Nevoria-70b-v0.1, enables precise and effective component integration while maintaining model coherence and reliability.
+  overrides:
+    parameters:
+      model: Steelskull_L3.3-Damascus-R1-Q4_K_M.gguf
+  files:
+    - filename: Steelskull_L3.3-Damascus-R1-Q4_K_M.gguf
+      sha256: f1df5808b2099b26631d0bae870603a08dbfab6813471f514035d3fb92a47480
+      uri: huggingface://bartowski/Steelskull_L3.3-Damascus-R1-GGUF/Steelskull_L3.3-Damascus-R1-Q4_K_M.gguf
+
 - &qwen2
   url: "github:mudler/LocalAI/gallery/chatml.yaml@master" ## Start QWEN2
   name: "qwen2-7b-instruct"

From 41a2dfb0d9abe0a9a7bd8139e38c4847ac64f42f Mon Sep 17 00:00:00 2001
From: Ettore Di Giacinto <mudler@users.noreply.github.com>
Date: Mon, 3 Feb 2025 10:37:24 +0100
Subject: [PATCH 51/85] chore(model gallery): add
 thedrummer_gemmasutra-pro-27b-v1.1 (#4738)

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
---
 gallery/index.yaml | 15 +++++++++++++++
 1 file changed, 15 insertions(+)

diff --git a/gallery/index.yaml b/gallery/index.yaml
index dfa328f9..f3ce76da 100644
--- a/gallery/index.yaml
+++ b/gallery/index.yaml
@@ -7518,6 +7518,21 @@
     - filename: GWQ-9B-Preview2-Q4_K_M.gguf
       sha256: 04da51cdb17c7e51594f6daac595161a46298b48ab5e568a85e65541d10a861f
       uri: huggingface://bartowski/GWQ-9B-Preview2-GGUF/GWQ-9B-Preview2-Q4_K_M.gguf
+- !!merge <<: *gemma
+  name: "thedrummer_gemmasutra-pro-27b-v1.1"
+  icon: https://cdn-uploads.huggingface.co/production/uploads/65f2fd1c25b848bd061b5c2e/SrHUGXD_dp55pobeJK36t.png
+  urls:
+    - https://huggingface.co/TheDrummer/Gemmasutra-Pro-27B-v1.1
+    - https://huggingface.co/bartowski/TheDrummer_Gemmasutra-Pro-27B-v1.1-GGUF
+  description: |
+    A Gemmasutra tune with modern techniques. Au Revoir, Gemma!
+  overrides:
+    parameters:
+      model: TheDrummer_Gemmasutra-Pro-27B-v1.1-Q4_K_M.gguf
+  files:
+    - filename: TheDrummer_Gemmasutra-Pro-27B-v1.1-Q4_K_M.gguf
+      sha256: 218a14f0bf8266f9e77d16b8b4f5cc1dc76e97eb582a2c97cca5a3a2c35de86b
+      uri: huggingface://bartowski/TheDrummer_Gemmasutra-Pro-27B-v1.1-GGUF/TheDrummer_Gemmasutra-Pro-27B-v1.1-Q4_K_M.gguf
 - &llama3
   url: "github:mudler/LocalAI/gallery/llama3-instruct.yaml@master"
   icon: https://avatars.githubusercontent.com/u/153379578

From 051faaf771c17fd37fd19d999e160f8a293ae481 Mon Sep 17 00:00:00 2001
From: Ettore Di Giacinto <mudler@users.noreply.github.com>
Date: Mon, 3 Feb 2025 10:46:47 +0100
Subject: [PATCH 52/85] chore(model gallery): add
 uncensoredai_uncensoredlm-deepseek-r1-distill-qwen-14b (#4739)

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
---
 gallery/index.yaml | 16 +++++++++++++++-
 1 file changed, 15 insertions(+), 1 deletion(-)

diff --git a/gallery/index.yaml b/gallery/index.yaml
index f3ce76da..c71e7425 100644
--- a/gallery/index.yaml
+++ b/gallery/index.yaml
@@ -5651,7 +5651,21 @@
     - filename: Steelskull_L3.3-Damascus-R1-Q4_K_M.gguf
       sha256: f1df5808b2099b26631d0bae870603a08dbfab6813471f514035d3fb92a47480
       uri: huggingface://bartowski/Steelskull_L3.3-Damascus-R1-GGUF/Steelskull_L3.3-Damascus-R1-Q4_K_M.gguf
-
+- !!merge <<: *deepseek-r1
+  name: "uncensoredai_uncensoredlm-deepseek-r1-distill-qwen-14b"
+  icon: https://huggingface.co/uncensoredai/UncensoredLM-DeepSeek-R1-Distill-Qwen-14B/resolve/main/h5dTflRHYMbGq3RXm9a61yz4io.avif
+  urls:
+    - https://huggingface.co/uncensoredai/UncensoredLM-DeepSeek-R1-Distill-Qwen-14B
+    - https://huggingface.co/bartowski/uncensoredai_UncensoredLM-DeepSeek-R1-Distill-Qwen-14B-GGUF
+  description: |
+      An UncensoredLLM with Reasoning, what more could you want?
+  overrides:
+    parameters:
+      model: uncensoredai_UncensoredLM-DeepSeek-R1-Distill-Qwen-14B-Q4_K_M.gguf
+  files:
+    - filename: uncensoredai_UncensoredLM-DeepSeek-R1-Distill-Qwen-14B-Q4_K_M.gguf
+      sha256: 85b2c3e1aa4e8cc3bf616f84c7595c963d5439f3fcfdbd5c957fb22e84d10b1c
+      uri: huggingface://bartowski/uncensoredai_UncensoredLM-DeepSeek-R1-Distill-Qwen-14B-GGUF/uncensoredai_UncensoredLM-DeepSeek-R1-Distill-Qwen-14B-Q4_K_M.gguf
 - &qwen2
   url: "github:mudler/LocalAI/gallery/chatml.yaml@master" ## Start QWEN2
   name: "qwen2-7b-instruct"

From d290fd159f7e41e2de75fe885bf1efd12ab5a88c Mon Sep 17 00:00:00 2001
From: Ettore Di Giacinto <mudler@users.noreply.github.com>
Date: Mon, 3 Feb 2025 15:55:49 +0100
Subject: [PATCH 53/85] chore(model gallery): add
 LocalAI-functioncall-llama3.2-1b-v0.4 (#4740)

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
---
 gallery/index.yaml          | 17 ++++++++++++-
 gallery/llama3.2-fcall.yaml | 48 +++++++++++++++++++++++++++++++++++++
 2 files changed, 64 insertions(+), 1 deletion(-)
 create mode 100644 gallery/llama3.2-fcall.yaml

diff --git a/gallery/index.yaml b/gallery/index.yaml
index c71e7425..24b4d65f 100644
--- a/gallery/index.yaml
+++ b/gallery/index.yaml
@@ -853,7 +853,7 @@
       sha256: bac8e8c1d1d9d53cbdb148b8ff9ad378ddb392429207099e85b5aae3a43bff3d
       uri: huggingface://cstr/salamandra-7b-instruct-GGUF/salamandra-7b-instruct.Q4_K_M-f32.gguf
 - &llama32
-  url: "github:mudler/LocalAI/gallery/llama3.2-quantized.yaml@master" ## llama3.2
+  url: "github:mudler/LocalAI/gallery/llama3.2-quantized.yaml@master"
   icon: https://avatars.githubusercontent.com/u/153379578
   license: llama3.2
   description: |
@@ -1383,6 +1383,21 @@
     - filename: FineMath-Llama-3B-Q4_K_M.gguf
       sha256: 16c73b5cf2a417a7e1608bcc9469f1461fc3e759ce04a3a337f48df977dc158c
       uri: huggingface://bartowski/FineMath-Llama-3B-GGUF/FineMath-Llama-3B-Q4_K_M.gguf
+- !!merge <<: *llama32
+  name: "LocalAI-functioncall-llama3.2-1b-v0.4"
+  url: "github:mudler/LocalAI/gallery/llama3.2-fcall.yaml@master"
+  urls:
+    - https://huggingface.co/mudler/LocalAI-functioncall-llama3.2-1b-v0.4
+    - https://huggingface.co/mradermacher/LocalAI-functioncall-llama3.2-1b-v0.4-GGUF
+  description: |
+    A model tailored to be conversational and execute function calls with LocalAI. This model is based on llama 3.2 and has 1B parameter. Perfect for small devices.
+  overrides:
+    parameters:
+      model: LocalAI-functioncall-llama3.2-1b-v0.4.Q8_0.gguf
+  files:
+    - filename: LocalAI-functioncall-llama3.2-1b-v0.4.Q8_0.gguf
+      sha256: 547e57c2d3f17c632c9fd303afdb00446e7396df453aee62633b76976c407616
+      uri: huggingface://mradermacher/LocalAI-functioncall-llama3.2-1b-v0.4-GGUF/LocalAI-functioncall-llama3.2-1b-v0.4.Q8_0.gguf
 - &qwen25
   name: "qwen2.5-14b-instruct" ## Qwen2.5
   icon: https://avatars.githubusercontent.com/u/141221163
diff --git a/gallery/llama3.2-fcall.yaml b/gallery/llama3.2-fcall.yaml
new file mode 100644
index 00000000..0188045e
--- /dev/null
+++ b/gallery/llama3.2-fcall.yaml
@@ -0,0 +1,48 @@
+---
+name: "llama3.2-fcall"
+
+config_file: |
+  mmap: true
+  function:
+    json_regex_match:
+    - "(?s)<Output>(.*?)</Output>"
+    capture_llm_results:
+      - (?s)<Thought>(.*?)</Thought>
+    replace_llm_results:
+      - key: (?s)<Thought>(.*?)</Thought>
+        value: ""
+    grammar:
+      properties_order: "name,arguments"
+  template:
+    chat: |
+      <|begin_of_text|><|start_header_id|>system<|end_header_id|>
+      You are a helpful assistant<|eot_id|><|start_header_id|>user<|end_header_id|>
+      {{.Input }}
+      <|start_header_id|>assistant<|end_header_id|>
+    chat_message: |
+      <|start_header_id|>{{if eq .RoleName "assistant"}}assistant{{else if eq .RoleName "system"}}system{{else if eq .RoleName "tool"}}tool{{else if eq .RoleName "user"}}user{{end}}<|end_header_id|>
+      {{ if .FunctionCall -}}
+      {{ else if eq .RoleName "tool" -}}
+      {{ end -}}
+      {{ if .Content -}}
+      {{.Content -}}
+      {{ else if .FunctionCall -}}
+      {{ toJson .FunctionCall -}}
+      {{ end -}}
+      <|eot_id|>
+    completion: |
+      {{.Input}}
+    function: |
+      <|start_header_id|>system<|end_header_id|>
+      You are an AI assistant that executes function calls, and these are the tools at your disposal:
+      {{range .Functions}}
+      {'type': 'function', 'function': {'name': '{{.Name}}', 'description': '{{.Description}}', 'parameters': {{toJson .Parameters}} }}
+      {{end}}
+      <|eot_id|>{{.Input}}<|start_header_id|>assistant<|end_header_id|>
+  context_size: 8192
+  f16: true
+  stopwords:
+  - <|im_end|>
+  - <dummy32000>
+  - "<|eot_id|>"
+  - <|end_of_text|>

From 431716d4d6e8b3529c3cfa5277e9bcd79964daa6 Mon Sep 17 00:00:00 2001
From: Ettore Di Giacinto <mudler@users.noreply.github.com>
Date: Mon, 3 Feb 2025 16:10:33 +0100
Subject: [PATCH 54/85] fix(gallery): remove box token to llama3.2-fcall

Signed-off-by: Ettore Di Giacinto <mudler@users.noreply.github.com>
---
 gallery/llama3.2-fcall.yaml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/gallery/llama3.2-fcall.yaml b/gallery/llama3.2-fcall.yaml
index 0188045e..5b0a53a1 100644
--- a/gallery/llama3.2-fcall.yaml
+++ b/gallery/llama3.2-fcall.yaml
@@ -15,7 +15,7 @@ config_file: |
       properties_order: "name,arguments"
   template:
     chat: |
-      <|begin_of_text|><|start_header_id|>system<|end_header_id|>
+      <|start_header_id|>system<|end_header_id|>
       You are a helpful assistant<|eot_id|><|start_header_id|>user<|end_header_id|>
       {{.Input }}
       <|start_header_id|>assistant<|end_header_id|>

From c3c27b7e3d98a782a2f2d443a45ec7e41e2670f4 Mon Sep 17 00:00:00 2001
From: Ettore Di Giacinto <mudler@localai.io>
Date: Mon, 3 Feb 2025 17:58:57 +0100
Subject: [PATCH 55/85] chore(model gallery): small fixups to llama3.2-fcall
 template

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
---
 gallery/llama3.2-fcall.yaml | 1 +
 1 file changed, 1 insertion(+)

diff --git a/gallery/llama3.2-fcall.yaml b/gallery/llama3.2-fcall.yaml
index 5b0a53a1..73f370a8 100644
--- a/gallery/llama3.2-fcall.yaml
+++ b/gallery/llama3.2-fcall.yaml
@@ -13,6 +13,7 @@ config_file: |
         value: ""
     grammar:
       properties_order: "name,arguments"
+      function_arguments_key: "arguments"
   template:
     chat: |
       <|start_header_id|>system<|end_header_id|>

From df30d6a4824789ead1898bdcf59f9e1d31c2e1ed Mon Sep 17 00:00:00 2001
From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com>
Date: Mon, 3 Feb 2025 22:21:40 +0000
Subject: [PATCH 56/85] chore(deps): Bump GrantBirki/git-diff-action from 2.7.0
 to 2.8.0 (#4746)

Bumps [GrantBirki/git-diff-action](https://github.com/grantbirki/git-diff-action) from 2.7.0 to 2.8.0.
- [Release notes](https://github.com/grantbirki/git-diff-action/releases)
- [Commits](https://github.com/grantbirki/git-diff-action/compare/v2.7.0...v2.8.0)

---
updated-dependencies:
- dependency-name: GrantBirki/git-diff-action
  dependency-type: direct:production
  update-type: version-update:semver-minor
...

Signed-off-by: dependabot[bot] <support@github.com>
Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>
---
 .github/workflows/notify-models.yaml | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/.github/workflows/notify-models.yaml b/.github/workflows/notify-models.yaml
index e17ee7fc..b84e10e0 100644
--- a/.github/workflows/notify-models.yaml
+++ b/.github/workflows/notify-models.yaml
@@ -18,7 +18,7 @@ jobs:
       with:
         model: 'hermes-2-theta-llama-3-8b' # Any from models.localai.io, or from huggingface.com with: "huggingface://<repository>/file"
         # Check the PR diff using the current branch and the base branch of the PR
-    - uses: GrantBirki/git-diff-action@v2.7.0
+    - uses: GrantBirki/git-diff-action@v2.8.0
       id: git-diff-action
       with:
             json_diff_file_output: diff.json
@@ -99,7 +99,7 @@ jobs:
         docker run -e -ti -d --name local-ai -p 8080:8080 localai/localai:master-ffmpeg-core run --debug $MODEL_NAME
         until [ "`docker inspect -f {{.State.Health.Status}} local-ai`" == "healthy" ]; do echo "Waiting for container to be ready";  docker logs --tail 10 local-ai; sleep 2; done
       # Check the PR diff using the current branch and the base branch of the PR
-    - uses: GrantBirki/git-diff-action@v2.7.0
+    - uses: GrantBirki/git-diff-action@v2.8.0
       id: git-diff-action
       with:
             json_diff_file_output: diff.json

From e3b943ffcb798dce642d5edb576d7cb8647d4f7f Mon Sep 17 00:00:00 2001
From: "LocalAI [bot]" <139863280+localai-bot@users.noreply.github.com>
Date: Tue, 4 Feb 2025 08:56:11 +0100
Subject: [PATCH 57/85] chore: :arrow_up: Update ggerganov/llama.cpp to
 `5598f475be3e31430fbe17ebb85654ec90dc201e` (#4757)

:arrow_up: Update ggerganov/llama.cpp

Signed-off-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com>
Co-authored-by: mudler <2420543+mudler@users.noreply.github.com>
---
 Makefile | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/Makefile b/Makefile
index 3e9446b4..576a480b 100644
--- a/Makefile
+++ b/Makefile
@@ -8,7 +8,7 @@ DETECT_LIBS?=true
 # llama.cpp versions
 GOLLAMA_REPO?=https://github.com/go-skynet/go-llama.cpp
 GOLLAMA_VERSION?=2b57a8ae43e4699d3dc5d1496a1ccd42922993be
-CPPLLAMA_VERSION?=90f9b88afb6447d3929843a2aa98c0f11074762d
+CPPLLAMA_VERSION?=5598f475be3e31430fbe17ebb85654ec90dc201e
 
 # whisper.cpp version
 WHISPER_REPO?=https://github.com/ggerganov/whisper.cpp

From 5a19094d3a7ac310b424eeba30d13764f96ab36b Mon Sep 17 00:00:00 2001
From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com>
Date: Tue, 4 Feb 2025 08:56:51 +0100
Subject: [PATCH 58/85] chore(deps): Bump sentence-transformers from 3.4.0 to
 3.4.1 in /backend/python/transformers (#4748)

chore(deps): Bump sentence-transformers in /backend/python/transformers

Bumps [sentence-transformers](https://github.com/UKPLab/sentence-transformers) from 3.4.0 to 3.4.1.
- [Release notes](https://github.com/UKPLab/sentence-transformers/releases)
- [Commits](https://github.com/UKPLab/sentence-transformers/compare/v3.4.0...v3.4.1)

---
updated-dependencies:
- dependency-name: sentence-transformers
  dependency-type: direct:production
  update-type: version-update:semver-patch
...

Signed-off-by: dependabot[bot] <support@github.com>
Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>
---
 backend/python/transformers/requirements-cpu.txt      | 2 +-
 backend/python/transformers/requirements-cublas11.txt | 2 +-
 backend/python/transformers/requirements-cublas12.txt | 2 +-
 backend/python/transformers/requirements-hipblas.txt  | 2 +-
 backend/python/transformers/requirements-intel.txt    | 2 +-
 5 files changed, 5 insertions(+), 5 deletions(-)

diff --git a/backend/python/transformers/requirements-cpu.txt b/backend/python/transformers/requirements-cpu.txt
index 36dc973a..79863c2b 100644
--- a/backend/python/transformers/requirements-cpu.txt
+++ b/backend/python/transformers/requirements-cpu.txt
@@ -5,4 +5,4 @@ accelerate
 transformers
 bitsandbytes
 outetts
-sentence-transformers==3.4.0
\ No newline at end of file
+sentence-transformers==3.4.1
\ No newline at end of file
diff --git a/backend/python/transformers/requirements-cublas11.txt b/backend/python/transformers/requirements-cublas11.txt
index a8b1c0c0..fa9f8953 100644
--- a/backend/python/transformers/requirements-cublas11.txt
+++ b/backend/python/transformers/requirements-cublas11.txt
@@ -6,4 +6,4 @@ accelerate
 transformers
 bitsandbytes
 outetts
-sentence-transformers==3.4.0
+sentence-transformers==3.4.1
diff --git a/backend/python/transformers/requirements-cublas12.txt b/backend/python/transformers/requirements-cublas12.txt
index a54c4c88..127bfb21 100644
--- a/backend/python/transformers/requirements-cublas12.txt
+++ b/backend/python/transformers/requirements-cublas12.txt
@@ -5,4 +5,4 @@ numba==0.60.0
 transformers
 bitsandbytes
 outetts
-sentence-transformers==3.4.0
+sentence-transformers==3.4.1
diff --git a/backend/python/transformers/requirements-hipblas.txt b/backend/python/transformers/requirements-hipblas.txt
index 73b7d85b..c0ca93ee 100644
--- a/backend/python/transformers/requirements-hipblas.txt
+++ b/backend/python/transformers/requirements-hipblas.txt
@@ -7,4 +7,4 @@ numba==0.60.0
 bitsandbytes
 outetts
 bitsandbytes
-sentence-transformers==3.4.0
+sentence-transformers==3.4.1
diff --git a/backend/python/transformers/requirements-intel.txt b/backend/python/transformers/requirements-intel.txt
index 5b677199..1418a3c3 100644
--- a/backend/python/transformers/requirements-intel.txt
+++ b/backend/python/transformers/requirements-intel.txt
@@ -8,4 +8,4 @@ numba==0.60.0
 intel-extension-for-transformers
 bitsandbytes
 outetts
-sentence-transformers==3.4.0
+sentence-transformers==3.4.1

From 96cb407ee03388c034bae6e91c240b1cbf577ed3 Mon Sep 17 00:00:00 2001
From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com>
Date: Tue, 4 Feb 2025 08:57:19 +0100
Subject: [PATCH 59/85] chore(deps): Bump docs/themes/hugo-theme-relearn from
 `5bcb9fe` to `66bc366` (#4750)

chore(deps): Bump docs/themes/hugo-theme-relearn

Bumps [docs/themes/hugo-theme-relearn](https://github.com/McShelby/hugo-theme-relearn) from `5bcb9fe` to `66bc366`.
- [Release notes](https://github.com/McShelby/hugo-theme-relearn/releases)
- [Commits](https://github.com/McShelby/hugo-theme-relearn/compare/5bcb9fe5e61d2fbe702034a24425992fd2455b0a...66bc366c4727a958f3873f409550daa36932c03f)

---
updated-dependencies:
- dependency-name: docs/themes/hugo-theme-relearn
  dependency-type: direct:production
...

Signed-off-by: dependabot[bot] <support@github.com>
Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>
---
 docs/themes/hugo-theme-relearn | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/docs/themes/hugo-theme-relearn b/docs/themes/hugo-theme-relearn
index 5bcb9fe5..66bc366c 160000
--- a/docs/themes/hugo-theme-relearn
+++ b/docs/themes/hugo-theme-relearn
@@ -1 +1 @@
-Subproject commit 5bcb9fe5e61d2fbe702034a24425992fd2455b0a
+Subproject commit 66bc366c4727a958f3873f409550daa36932c03f

From 6a91288c8ccc344270ddf0a93e509b226dd51496 Mon Sep 17 00:00:00 2001
From: Ettore Di Giacinto <mudler@users.noreply.github.com>
Date: Tue, 4 Feb 2025 09:45:52 +0100
Subject: [PATCH 60/85] chore(model gallery): add
 fblgit_miniclaus-qw1.5b-unamgs-grpo (#4758)

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
---
 gallery/index.yaml | 15 +++++++++++++++
 1 file changed, 15 insertions(+)

diff --git a/gallery/index.yaml b/gallery/index.yaml
index 24b4d65f..76298cbb 100644
--- a/gallery/index.yaml
+++ b/gallery/index.yaml
@@ -3491,6 +3491,21 @@
     - filename: TinySwallow-1.5B-Instruct-Q4_K_M.gguf
       sha256: 4d409c8873c1650a19c0a7a1c051e342613191a487768fe0d29735b9361079cd
       uri: huggingface://bartowski/TinySwallow-1.5B-Instruct-GGUF/TinySwallow-1.5B-Instruct-Q4_K_M.gguf
+- !!merge <<: *qwen25
+  name: "fblgit_miniclaus-qw1.5b-unamgs-grpo"
+  icon: https://huggingface.co/fblgit/miniclaus-qw1.5B-UNAMGS/resolve/main/miniclaus_qw15-UNAMGS.png
+  urls:
+    - https://huggingface.co/fblgit/miniclaus-qw1.5B-UNAMGS-GRPO
+    - https://huggingface.co/bartowski/fblgit_miniclaus-qw1.5B-UNAMGS-GRPO-GGUF
+  description: |
+    This version is RL with GRPO on GSM8k for 1400 steps
+  overrides:
+    parameters:
+      model: fblgit_miniclaus-qw1.5B-UNAMGS-GRPO-Q4_K_M.gguf
+  files:
+    - filename: fblgit_miniclaus-qw1.5B-UNAMGS-GRPO-Q4_K_M.gguf
+      sha256: 88ceacc5900062bc2afc352f009233225b0fe10203cbb61b122e8f10244449c8
+      uri: huggingface://bartowski/fblgit_miniclaus-qw1.5B-UNAMGS-GRPO-GGUF/fblgit_miniclaus-qw1.5B-UNAMGS-GRPO-Q4_K_M.gguf
 - &llama31
   url: "github:mudler/LocalAI/gallery/llama3.1-instruct.yaml@master" ## LLama3.1
   icon: https://avatars.githubusercontent.com/u/153379578

From bfa3d4ccff3a0d057d3c6c89f883c035d7398745 Mon Sep 17 00:00:00 2001
From: Ettore Di Giacinto <mudler@users.noreply.github.com>
Date: Tue, 4 Feb 2025 09:50:18 +0100
Subject: [PATCH 61/85] chore(model gallery): add nohobby_l3.3-prikol-70b-v0.4
 (#4759)

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
---
 gallery/index.yaml | 19 +++++++++++++++++++
 1 file changed, 19 insertions(+)

diff --git a/gallery/index.yaml b/gallery/index.yaml
index 76298cbb..50ea9b27 100644
--- a/gallery/index.yaml
+++ b/gallery/index.yaml
@@ -489,6 +489,25 @@
     - filename: L3.3-Nevoria-R1-70b-Q4_K_M.gguf
       sha256: 9f32f202fb5b1465c942693bb11eea9e8a1c5686b00602715b495c068eaf1c58
       uri: huggingface://bartowski/L3.3-Nevoria-R1-70b-GGUF/L3.3-Nevoria-R1-70b-Q4_K_M.gguf
+- !!merge <<: *llama33
+  name: "nohobby_l3.3-prikol-70b-v0.4"
+  icon: https://files.catbox.moe/x9t3zo.png
+  urls:
+    - https://huggingface.co/Nohobby/L3.3-Prikol-70B-v0.4
+    - https://huggingface.co/bartowski/Nohobby_L3.3-Prikol-70B-v0.4-GGUF
+  description: |
+    I have yet to try it UPD: it sucks, bleh
+
+    Sometimes mistakes {{user}} for {{char}} and can't think. Other than that, the behavior is similar to the predecessors.
+
+    It sometimes gives some funny replies tho, yay!
+  overrides:
+    parameters:
+      model: Nohobby_L3.3-Prikol-70B-v0.4-Q4_K_M.gguf
+  files:
+    - filename: Nohobby_L3.3-Prikol-70B-v0.4-Q4_K_M.gguf
+      sha256: e1d67a40bdf0526bdfcaa16c6e4dfeecad41651e201b4009b65f4f444b773604
+      uri: huggingface://bartowski/Nohobby_L3.3-Prikol-70B-v0.4-GGUF/Nohobby_L3.3-Prikol-70B-v0.4-Q4_K_M.gguf
 - &rwkv
   url: "github:mudler/LocalAI/gallery/rwkv.yaml@master"
   name: "rwkv-6-world-7b"

From 464686aee65e31924d282e71a037d857b8d0504e Mon Sep 17 00:00:00 2001
From: Ettore Di Giacinto <mudler@users.noreply.github.com>
Date: Tue, 4 Feb 2025 09:51:54 +0100
Subject: [PATCH 62/85] chore(model gallery): add suayptalha_maestro-10b
 (#4760)

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
---
 gallery/index.yaml | 15 +++++++++++++++
 1 file changed, 15 insertions(+)

diff --git a/gallery/index.yaml b/gallery/index.yaml
index 50ea9b27..aaee1d74 100644
--- a/gallery/index.yaml
+++ b/gallery/index.yaml
@@ -212,6 +212,21 @@
     - filename: Virtuoso-Lite-Q4_K_M.gguf
       sha256: 1d21bef8467a11a1e473d397128b05fb87b7e824606cdaea061e550cb219fee2
       uri: huggingface://bartowski/Virtuoso-Lite-GGUF/Virtuoso-Lite-Q4_K_M.gguf
+- !!merge <<: *falcon3
+  name: "suayptalha_maestro-10b"
+  icon: https://huggingface.co/suayptalha/Maestro-10B/resolve/main/Maestro-Logo.png
+  urls:
+    - https://huggingface.co/suayptalha/Maestro-10B
+    - https://huggingface.co/bartowski/suayptalha_Maestro-10B-GGUF
+  description: |
+     Maestro-10B is a 10 billion parameter model fine-tuned from Virtuoso-Lite, a next-generation language model developed by arcee-ai. Virtuoso-Lite itself is based on the Llama-3 architecture, distilled from Deepseek-v3 using approximately 1.1 billion tokens/logits. This distillation process allows Virtuoso-Lite to achieve robust performance with a smaller parameter count, excelling in reasoning, code generation, and mathematical problem-solving. Maestro-10B inherits these strengths from its base model, Virtuoso-Lite, and further enhances them through fine-tuning on the OpenOrca dataset. This combination of a distilled base model and targeted fine-tuning makes Maestro-10B a powerful and efficient language model.
+  overrides:
+    parameters:
+      model: suayptalha_Maestro-10B-Q4_K_M.gguf
+  files:
+    - filename: suayptalha_Maestro-10B-Q4_K_M.gguf
+      sha256: c570381da5624782ce6df4186ace6f747429fcbaf1a22c2a348288d3552eb19c
+      uri: huggingface://bartowski/suayptalha_Maestro-10B-GGUF/suayptalha_Maestro-10B-Q4_K_M.gguf
 - &intellect1
   name: "intellect-1-instruct"
   url: "github:mudler/LocalAI/gallery/llama3.1-instruct.yaml@master"

From 7329db4e7896542a876c7a5b8500060d366af790 Mon Sep 17 00:00:00 2001
From: "LocalAI [bot]" <139863280+localai-bot@users.noreply.github.com>
Date: Tue, 4 Feb 2025 22:48:49 +0100
Subject: [PATCH 63/85] chore: :arrow_up: Update ggerganov/llama.cpp to
 `3ec9fd4b77b6aca03a3c2bf678eae3f9517d6904` (#4762)

:arrow_up: Update ggerganov/llama.cpp

Signed-off-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com>
Co-authored-by: mudler <2420543+mudler@users.noreply.github.com>
---
 Makefile | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/Makefile b/Makefile
index 576a480b..e541b503 100644
--- a/Makefile
+++ b/Makefile
@@ -8,7 +8,7 @@ DETECT_LIBS?=true
 # llama.cpp versions
 GOLLAMA_REPO?=https://github.com/go-skynet/go-llama.cpp
 GOLLAMA_VERSION?=2b57a8ae43e4699d3dc5d1496a1ccd42922993be
-CPPLLAMA_VERSION?=5598f475be3e31430fbe17ebb85654ec90dc201e
+CPPLLAMA_VERSION?=3ec9fd4b77b6aca03a3c2bf678eae3f9517d6904
 
 # whisper.cpp version
 WHISPER_REPO?=https://github.com/ggerganov/whisper.cpp

From 3324c4e6cbbf1cbb772c061e325480fbf03b9805 Mon Sep 17 00:00:00 2001
From: Ettore Di Giacinto <mudler@users.noreply.github.com>
Date: Wed, 5 Feb 2025 10:09:33 +0100
Subject: [PATCH 64/85] chore(model gallery): add agi-0_art-skynet-3b (#4763)

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
---
 gallery/index.yaml | 14 ++++++++++++++
 1 file changed, 14 insertions(+)

diff --git a/gallery/index.yaml b/gallery/index.yaml
index aaee1d74..bae29241 100644
--- a/gallery/index.yaml
+++ b/gallery/index.yaml
@@ -1432,6 +1432,20 @@
     - filename: LocalAI-functioncall-llama3.2-1b-v0.4.Q8_0.gguf
       sha256: 547e57c2d3f17c632c9fd303afdb00446e7396df453aee62633b76976c407616
       uri: huggingface://mradermacher/LocalAI-functioncall-llama3.2-1b-v0.4-GGUF/LocalAI-functioncall-llama3.2-1b-v0.4.Q8_0.gguf
+- !!merge <<: *llama32
+  name: "agi-0_art-skynet-3b"
+  urls:
+    - https://huggingface.co/AGI-0/Art-Skynet-3B
+    - https://huggingface.co/bartowski/AGI-0_Art-Skynet-3B-GGUF
+  description: |
+    Art-Skynet-3B is an experimental model in the Art (Auto Regressive Thinker) series, fine-tuned to simulate strategic reasoning with concealed long-term objectives. Built on meta-llama/Llama-3.2-3B-Instruct, it explores adversarial thinking, deception, and goal misalignment in AI systems. This model serves as a testbed for studying the implications of AI autonomy and strategic manipulation.
+  overrides:
+    parameters:
+      model: AGI-0_Art-Skynet-3B-Q4_K_M.gguf
+  files:
+    - filename: AGI-0_Art-Skynet-3B-Q4_K_M.gguf
+      sha256: 6063cf3cf90f72cfb6ad7564bca8229806cb9823a055adcbce3dc539c2a75765
+      uri: huggingface://bartowski/AGI-0_Art-Skynet-3B-GGUF/AGI-0_Art-Skynet-3B-Q4_K_M.gguf
 - &qwen25
   name: "qwen2.5-14b-instruct" ## Qwen2.5
   icon: https://avatars.githubusercontent.com/u/141221163

From 0bc3dc43dad0a7c9a0e795e09dcd48c65a5efa8c Mon Sep 17 00:00:00 2001
From: Ettore Di Giacinto <mudler@users.noreply.github.com>
Date: Wed, 5 Feb 2025 10:13:21 +0100
Subject: [PATCH 65/85] chore(model gallery): add rubenroy_gilgamesh-72b
 (#4764)

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
---
 gallery/index.yaml | 21 +++++++++++++++++++++
 1 file changed, 21 insertions(+)

diff --git a/gallery/index.yaml b/gallery/index.yaml
index bae29241..4a2a0c2e 100644
--- a/gallery/index.yaml
+++ b/gallery/index.yaml
@@ -3554,6 +3554,27 @@
     - filename: fblgit_miniclaus-qw1.5B-UNAMGS-GRPO-Q4_K_M.gguf
       sha256: 88ceacc5900062bc2afc352f009233225b0fe10203cbb61b122e8f10244449c8
       uri: huggingface://bartowski/fblgit_miniclaus-qw1.5B-UNAMGS-GRPO-GGUF/fblgit_miniclaus-qw1.5B-UNAMGS-GRPO-Q4_K_M.gguf
+- !!merge <<: *qwen25
+  name: "rubenroy_gilgamesh-72b"
+  icon: https://cdn.ruben-roy.com/AI/Gilgamesh/img/art.png
+  urls:
+    - https://huggingface.co/rubenroy/Gilgamesh-72B
+    - https://huggingface.co/bartowski/rubenroy_Gilgamesh-72B-GGUF
+  description: |
+    Gilgamesh 72B was trained on a mixture of specialised datasets designed for factual accuracy, mathematical capabilities and reasoning. The datasets used include:
+
+    GammaCorpus-v2-5m: A large 5 million line general-purpose dataset covering many topics to enhance broad knowledge and conversational abilities.
+    GammaCorpus-CoT-Math-170k: A dataset focused on Chain-of-Thought (CoT) reasoning in mathematics made to help the model improve step-by-step problem-solving.
+    GammaCorpus-Fact-QA-450k: A dataset containing factual question-answer pairs for enforcing some important current knowledge.
+
+    These datasets were all built and curated by me, however I thank my other team members at Ovantage Labs for assisting me in the creation and curation of these datasets.
+  overrides:
+    parameters:
+      model: rubenroy_Gilgamesh-72B-Q4_K_M.gguf
+  files:
+    - filename: rubenroy_Gilgamesh-72B-Q4_K_M.gguf
+      sha256: c6842b3bc882082c63243e762234ae697c1727bebed18b5241eb97e019f0cf68
+      uri: huggingface://bartowski/rubenroy_Gilgamesh-72B-GGUF/rubenroy_Gilgamesh-72B-Q4_K_M.gguf
 - &llama31
   url: "github:mudler/LocalAI/gallery/llama3.1-instruct.yaml@master" ## LLama3.1
   icon: https://avatars.githubusercontent.com/u/153379578

From 1996ceb293c558f996312acdcc5622820ba8633e Mon Sep 17 00:00:00 2001
From: Ettore Di Giacinto <mudler@users.noreply.github.com>
Date: Wed, 5 Feb 2025 10:17:05 +0100
Subject: [PATCH 66/85] chore(model gallery): add
 krutrim-ai-labs_krutrim-2-instruct (#4765)

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
---
 gallery/index.yaml | 15 +++++++++++++++
 1 file changed, 15 insertions(+)

diff --git a/gallery/index.yaml b/gallery/index.yaml
index 4a2a0c2e..085881bb 100644
--- a/gallery/index.yaml
+++ b/gallery/index.yaml
@@ -6815,6 +6815,21 @@
     - filename: Mistral-Small-24B-Instruct-2501-Q4_K_M.gguf
       sha256: d1a6d049f09730c3f8ba26cf6b0b60c89790b5fdafa9a59c819acdfe93fffd1b
       uri: huggingface://bartowski/Mistral-Small-24B-Instruct-2501-GGUF/Mistral-Small-24B-Instruct-2501-Q4_K_M.gguf
+- !!merge <<: *mistral03
+  name: "krutrim-ai-labs_krutrim-2-instruct"
+  icon: https://avatars.githubusercontent.com/u/168750421?s=200&v=4
+  urls:
+    - https://huggingface.co/krutrim-ai-labs/Krutrim-2-instruct
+    - https://huggingface.co/bartowski/krutrim-ai-labs_Krutrim-2-instruct-GGUF
+  description: |
+    Krutrim-2 is a 12B parameter language model developed by the OLA Krutrim team. It is built on the Mistral-NeMo 12B architecture and trained across various domains, including web data, code, math, Indic languages, Indian context data, synthetic data, and books. Following pretraining, the model was finetuned for instruction following on diverse data covering a wide range of tasks, including knowledge recall, math, reasoning, coding, safety, and creative writing.
+  overrides:
+    parameters:
+      model: krutrim-ai-labs_Krutrim-2-instruct-Q4_K_M.gguf
+  files:
+    - filename: krutrim-ai-labs_Krutrim-2-instruct-Q4_K_M.gguf
+      sha256: 03aa6d1fb7ab70482a2242839b8d8e1c789aa90a8be415076ddf84bef65f06c7
+      uri: huggingface://bartowski/krutrim-ai-labs_Krutrim-2-instruct-GGUF/krutrim-ai-labs_Krutrim-2-instruct-Q4_K_M.gguf
 - &mudler
   url: "github:mudler/LocalAI/gallery/mudler.yaml@master" ### START mudler's LocalAI specific-models
   name: "LocalAI-llama3-8b-function-call-v0.2"

From 7bc80c17f8f28bf2fb2986c5edf2c421aacd559d Mon Sep 17 00:00:00 2001
From: Ettore Di Giacinto <mudler@users.noreply.github.com>
Date: Wed, 5 Feb 2025 10:19:31 +0100
Subject: [PATCH 67/85] chore(model gallery): add
 LocalAI-functioncall-llama3.2-3b-v0.5 (#4766)

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
---
 gallery/index.yaml | 16 ++++++++++++++++
 1 file changed, 16 insertions(+)

diff --git a/gallery/index.yaml b/gallery/index.yaml
index 085881bb..d55adda9 100644
--- a/gallery/index.yaml
+++ b/gallery/index.yaml
@@ -1418,6 +1418,7 @@
       sha256: 16c73b5cf2a417a7e1608bcc9469f1461fc3e759ce04a3a337f48df977dc158c
       uri: huggingface://bartowski/FineMath-Llama-3B-GGUF/FineMath-Llama-3B-Q4_K_M.gguf
 - !!merge <<: *llama32
+  icon: https://cdn-uploads.huggingface.co/production/uploads/647374aa7ff32a81ac6d35d4/Dzbdzn27KEc3K6zNNi070.png
   name: "LocalAI-functioncall-llama3.2-1b-v0.4"
   url: "github:mudler/LocalAI/gallery/llama3.2-fcall.yaml@master"
   urls:
@@ -1446,6 +1447,21 @@
     - filename: AGI-0_Art-Skynet-3B-Q4_K_M.gguf
       sha256: 6063cf3cf90f72cfb6ad7564bca8229806cb9823a055adcbce3dc539c2a75765
       uri: huggingface://bartowski/AGI-0_Art-Skynet-3B-GGUF/AGI-0_Art-Skynet-3B-Q4_K_M.gguf
+- !!merge <<: *llama32
+  name: "localai-functioncall-llama3.2-3b-v0.5"
+  icon: https://cdn-uploads.huggingface.co/production/uploads/647374aa7ff32a81ac6d35d4/Dzbdzn27KEc3K6zNNi070.png
+  urls:
+    - https://huggingface.co/mudler/LocalAI-functioncall-llama3.2-3b-v0.5
+    - https://huggingface.co/mudler/LocalAI-functioncall-llama3.2-3b-v0.5-Q4_K_M-GGUF
+  description: |
+    A model tailored to be conversational and execute function calls with LocalAI. This model is based on llama3.2 (3B).
+  overrides:
+    parameters:
+      model: localai-functioncall-llama3.2-3b-v0.5-q4_k_m.gguf
+  files:
+    - filename: localai-functioncall-llama3.2-3b-v0.5-q4_k_m.gguf
+      sha256: edc50f6c243e6bd6912599661a15e030de03d2be53409663ac27d3ca48306ee4
+      uri: huggingface://mudler/LocalAI-functioncall-llama3.2-3b-v0.5-Q4_K_M-GGUF/localai-functioncall-llama3.2-3b-v0.5-q4_k_m.gguf
 - &qwen25
   name: "qwen2.5-14b-instruct" ## Qwen2.5
   icon: https://avatars.githubusercontent.com/u/141221163

From 7daf5ac3e3e89218a2e15bc92be9bc8d9b2bdecb Mon Sep 17 00:00:00 2001
From: Ettore Di Giacinto <mudler@users.noreply.github.com>
Date: Wed, 5 Feb 2025 18:37:09 +0100
Subject: [PATCH 68/85] fix(gallery): do not return overrides and additional
 config (#4768)

When hitting /models/available we are intersted in the model
description, name and small metadatas. Configuration and overrides are
part of internals which are required only for installation.

This also solves a current bug when hitting /models/available fails if
one of the gallery items have overrides with parameters defined

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
---
 core/gallery/models_test.go            |  6 ++++--
 core/gallery/request.go                | 12 ++++++++----
 core/gallery/request_test.go           |  6 +++++-
 core/http/app_test.go                  | 16 ++++++++++------
 core/http/endpoints/localai/gallery.go | 18 ++++++++++++------
 5 files changed, 39 insertions(+), 19 deletions(-)

diff --git a/core/gallery/models_test.go b/core/gallery/models_test.go
index 6229c983..ef4faed8 100644
--- a/core/gallery/models_test.go
+++ b/core/gallery/models_test.go
@@ -48,8 +48,10 @@ var _ = Describe("Model test", func() {
 			defer os.RemoveAll(tempdir)
 
 			gallery := []GalleryModel{{
-				Name: "bert",
-				URL:  bertEmbeddingsURL,
+				Metadata: Metadata{
+					Name: "bert",
+					URL:  bertEmbeddingsURL,
+				},
 			}}
 			out, err := yaml.Marshal(gallery)
 			Expect(err).ToNot(HaveOccurred())
diff --git a/core/gallery/request.go b/core/gallery/request.go
index eec764c1..72d078a1 100644
--- a/core/gallery/request.go
+++ b/core/gallery/request.go
@@ -11,6 +11,14 @@ import (
 // It is used to install the model by resolving the URL and downloading the files.
 // The other fields are used to override the configuration of the model.
 type GalleryModel struct {
+	Metadata `json:",inline" yaml:",inline"`
+	// config_file is read in the situation where URL is blank - and therefore this is a base config.
+	ConfigFile map[string]interface{} `json:"config_file,omitempty" yaml:"config_file,omitempty"`
+	// Overrides are used to override the configuration of the model located at URL
+	Overrides map[string]interface{} `json:"overrides,omitempty" yaml:"overrides,omitempty"`
+}
+
+type Metadata struct {
 	URL         string   `json:"url,omitempty" yaml:"url,omitempty"`
 	Name        string   `json:"name,omitempty" yaml:"name,omitempty"`
 	Description string   `json:"description,omitempty"  yaml:"description,omitempty"`
@@ -18,10 +26,6 @@ type GalleryModel struct {
 	URLs        []string `json:"urls,omitempty" yaml:"urls,omitempty"`
 	Icon        string   `json:"icon,omitempty" yaml:"icon,omitempty"`
 	Tags        []string `json:"tags,omitempty" yaml:"tags,omitempty"`
-	// config_file is read in the situation where URL is blank - and therefore this is a base config.
-	ConfigFile map[string]interface{} `json:"config_file,omitempty" yaml:"config_file,omitempty"`
-	// Overrides are used to override the configuration of the model located at URL
-	Overrides map[string]interface{} `json:"overrides,omitempty" yaml:"overrides,omitempty"`
 	// AdditionalFiles are used to add additional files to the model
 	AdditionalFiles []File `json:"files,omitempty" yaml:"files,omitempty"`
 	// Gallery is a reference to the gallery which contains the model
diff --git a/core/gallery/request_test.go b/core/gallery/request_test.go
index 23281cc6..ed07f474 100644
--- a/core/gallery/request_test.go
+++ b/core/gallery/request_test.go
@@ -9,7 +9,11 @@ import (
 var _ = Describe("Gallery API tests", func() {
 	Context("requests", func() {
 		It("parses github with a branch", func() {
-			req := GalleryModel{URL: "github:go-skynet/model-gallery/gpt4all-j.yaml@main"}
+			req := GalleryModel{
+				Metadata: Metadata{
+					URL: "github:go-skynet/model-gallery/gpt4all-j.yaml@main",
+				},
+			}
 			e, err := GetGalleryConfigFromURL(req.URL, "")
 			Expect(err).ToNot(HaveOccurred())
 			Expect(e.Name).To(Equal("gpt4all-j"))
diff --git a/core/http/app_test.go b/core/http/app_test.go
index bc4ecfae..ca7a2eaa 100644
--- a/core/http/app_test.go
+++ b/core/http/app_test.go
@@ -299,14 +299,18 @@ var _ = Describe("API test", func() {
 
 			g := []gallery.GalleryModel{
 				{
-					Name: "bert",
-					URL:  bertEmbeddingsURL,
+					Metadata: gallery.Metadata{
+						Name: "bert",
+						URL:  bertEmbeddingsURL,
+					},
 				},
 				{
-					Name:            "bert2",
-					URL:             bertEmbeddingsURL,
-					Overrides:       map[string]interface{}{"foo": "bar"},
-					AdditionalFiles: []gallery.File{{Filename: "foo.yaml", URI: bertEmbeddingsURL}},
+					Metadata: gallery.Metadata{
+						Name:            "bert2",
+						URL:             bertEmbeddingsURL,
+						AdditionalFiles: []gallery.File{{Filename: "foo.yaml", URI: bertEmbeddingsURL}},
+					},
+					Overrides: map[string]interface{}{"foo": "bar"},
 				},
 			}
 			out, err := yaml.Marshal(g)
diff --git a/core/http/endpoints/localai/gallery.go b/core/http/endpoints/localai/gallery.go
index 5b2968f4..9dc99f5d 100644
--- a/core/http/endpoints/localai/gallery.go
+++ b/core/http/endpoints/localai/gallery.go
@@ -117,19 +117,25 @@ func (mgs *ModelGalleryEndpointService) DeleteModelGalleryEndpoint() func(c *fib
 // @Router /models/available [get]
 func (mgs *ModelGalleryEndpointService) ListModelFromGalleryEndpoint() func(c *fiber.Ctx) error {
 	return func(c *fiber.Ctx) error {
-		log.Debug().Msgf("Listing models from galleries: %+v", mgs.galleries)
 
 		models, err := gallery.AvailableGalleryModels(mgs.galleries, mgs.modelPath)
 		if err != nil {
 			return err
 		}
-		log.Debug().Msgf("Models found from galleries: %+v", models)
-		for _, m := range models {
-			log.Debug().Msgf("Model found from galleries: %+v", m)
+
+		log.Debug().Msgf("Available %d models from %d galleries\n", len(models), len(mgs.galleries))
+
+		m := []gallery.Metadata{}
+
+		for _, mm := range models {
+			m = append(m, mm.Metadata)
 		}
-		dat, err := json.Marshal(models)
+
+		log.Debug().Msgf("Models %#v", m)
+
+		dat, err := json.Marshal(m)
 		if err != nil {
-			return err
+			return fmt.Errorf("could not marshal models: %w", err)
 		}
 		return c.Send(dat)
 	}

From 3ecaea1b6e114245cbfd5720f3936652eb63cc77 Mon Sep 17 00:00:00 2001
From: Ettore Di Giacinto <mudler@localai.io>
Date: Wed, 5 Feb 2025 19:41:49 +0100
Subject: [PATCH 69/85] chore(docs): update sponsors in the website

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
---
 docs/content/docs/overview.md | 17 +++++++++++++++++
 1 file changed, 17 insertions(+)

diff --git a/docs/content/docs/overview.md b/docs/content/docs/overview.md
index 5bcb6178..2176f5c2 100644
--- a/docs/content/docs/overview.md
+++ b/docs/content/docs/overview.md
@@ -120,6 +120,23 @@ To help the project you can:
 
 [![LocalAI Star history Chart](https://api.star-history.com/svg?repos=go-skynet/LocalAI&type=Date)](https://star-history.com/#go-skynet/LocalAI&Date)
 
+## ❤️ Sponsors
+
+> Do you find LocalAI useful?
+
+Support the project by becoming [a backer or sponsor](https://github.com/sponsors/mudler). Your logo will show up here with a link to your website.
+
+A huge thank you to our generous sponsors who support this project covering CI expenses, and our [Sponsor list](https://github.com/sponsors/mudler):
+
+<p align="center">
+  <a href="https://www.spectrocloud.com/" target="blank">
+    <img height="200" src="https://github.com/go-skynet/LocalAI/assets/2420543/68a6f3cb-8a65-4a4d-99b5-6417a8905512">
+  </a>
+  <a href="https://www.premai.io/" target="blank">
+    <img height="200" src="https://github.com/mudler/LocalAI/assets/2420543/42e4ca83-661e-4f79-8e46-ae43689683d6"> <br>
+  </a>
+</p>
+
 ## 📖 License
 
 LocalAI is a community-driven project created by [Ettore Di Giacinto](https://github.com/mudler/).

From 2a702e9ca4b72969de9580b7fedd13d546c50b2c Mon Sep 17 00:00:00 2001
From: Ettore Di Giacinto <mudler@localai.io>
Date: Wed, 5 Feb 2025 19:49:11 +0100
Subject: [PATCH 70/85] chore(docs): small updates

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
---
 docs/content/docs/overview.md | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/docs/content/docs/overview.md b/docs/content/docs/overview.md
index 2176f5c2..d666db85 100644
--- a/docs/content/docs/overview.md
+++ b/docs/content/docs/overview.md
@@ -40,6 +40,10 @@ icon = "info"
 </a>
 </p>
 
+<p align="center">
+<a href="https://trendshift.io/repositories/5539" target="_blank"><img src="https://trendshift.io/api/badge/repositories/5539" alt="mudler%2FLocalAI | Trendshift" style="width: 250px; height: 55px;" width="250" height="55"/></a>
+</p>
+
 <p align="center">
 <a href="https://twitter.com/LocalAI_API" target="blank">
 <img src="https://img.shields.io/twitter/follow/LocalAI_API?label=Follow: LocalAI_API&style=social" alt="Follow LocalAI_API"/>
@@ -118,7 +122,7 @@ To help the project you can:
 
 ## 🌟 Star history
 
-[![LocalAI Star history Chart](https://api.star-history.com/svg?repos=go-skynet/LocalAI&type=Date)](https://star-history.com/#go-skynet/LocalAI&Date)
+[![LocalAI Star history Chart](https://api.star-history.com/svg?repos=mudler/LocalAI&type=Date)](https://star-history.com/#mudler/LocalAI&Date)
 
 ## ❤️ Sponsors
 

From 28a1310890595d270e1ce2598e4c1c8e79fc0d29 Mon Sep 17 00:00:00 2001
From: Ettore Di Giacinto <mudler@localai.io>
Date: Wed, 5 Feb 2025 19:50:32 +0100
Subject: [PATCH 71/85] chore(docs): enhance visibility

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
---
 docs/content/docs/overview.md | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/docs/content/docs/overview.md b/docs/content/docs/overview.md
index d666db85..9e72f119 100644
--- a/docs/content/docs/overview.md
+++ b/docs/content/docs/overview.md
@@ -134,10 +134,10 @@ A huge thank you to our generous sponsors who support this project covering CI e
 
 <p align="center">
   <a href="https://www.spectrocloud.com/" target="blank">
-    <img height="200" src="https://github.com/go-skynet/LocalAI/assets/2420543/68a6f3cb-8a65-4a4d-99b5-6417a8905512">
+    <img width=200 src="https://github.com/go-skynet/LocalAI/assets/2420543/68a6f3cb-8a65-4a4d-99b5-6417a8905512">
   </a>
   <a href="https://www.premai.io/" target="blank">
-    <img height="200" src="https://github.com/mudler/LocalAI/assets/2420543/42e4ca83-661e-4f79-8e46-ae43689683d6"> <br>
+    <img  width=200 src="https://github.com/mudler/LocalAI/assets/2420543/42e4ca83-661e-4f79-8e46-ae43689683d6"> <br>
   </a>
 </p>
 

From 81be192279e016c2a35dbf130a67cc9e8ccdbc60 Mon Sep 17 00:00:00 2001
From: "LocalAI [bot]" <139863280+localai-bot@users.noreply.github.com>
Date: Thu, 6 Feb 2025 00:49:15 +0100
Subject: [PATCH 72/85] chore: :arrow_up: Update leejet/stable-diffusion.cpp to
 `d46ed5e184b97c2018dc2e8105925bdb8775e02c` (#4769)

:arrow_up: Update leejet/stable-diffusion.cpp

Signed-off-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com>
Co-authored-by: mudler <2420543+mudler@users.noreply.github.com>
---
 Makefile | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/Makefile b/Makefile
index e541b503..663a95de 100644
--- a/Makefile
+++ b/Makefile
@@ -24,7 +24,7 @@ BARKCPP_VERSION?=v1.0.0
 
 # stablediffusion.cpp (ggml)
 STABLEDIFFUSION_GGML_REPO?=https://github.com/leejet/stable-diffusion.cpp
-STABLEDIFFUSION_GGML_VERSION?=5eb15ef4d022bef4a391de4f5f6556e81fbb5024
+STABLEDIFFUSION_GGML_VERSION?=d46ed5e184b97c2018dc2e8105925bdb8775e02c
 
 ONNX_VERSION?=1.20.0
 ONNX_ARCH?=x64

From d35595372d1b3f585175e638814c30bc6a20dd89 Mon Sep 17 00:00:00 2001
From: "LocalAI [bot]" <139863280+localai-bot@users.noreply.github.com>
Date: Thu, 6 Feb 2025 09:02:51 +0100
Subject: [PATCH 73/85] chore: :arrow_up: Update ggerganov/llama.cpp to
 `d774ab3acc4fee41fbed6dbfc192b57d5f79f34b` (#4770)

:arrow_up: Update ggerganov/llama.cpp

Signed-off-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com>
Co-authored-by: mudler <2420543+mudler@users.noreply.github.com>
---
 Makefile | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/Makefile b/Makefile
index 663a95de..7edb6f6a 100644
--- a/Makefile
+++ b/Makefile
@@ -8,7 +8,7 @@ DETECT_LIBS?=true
 # llama.cpp versions
 GOLLAMA_REPO?=https://github.com/go-skynet/go-llama.cpp
 GOLLAMA_VERSION?=2b57a8ae43e4699d3dc5d1496a1ccd42922993be
-CPPLLAMA_VERSION?=3ec9fd4b77b6aca03a3c2bf678eae3f9517d6904
+CPPLLAMA_VERSION?=d774ab3acc4fee41fbed6dbfc192b57d5f79f34b
 
 # whisper.cpp version
 WHISPER_REPO?=https://github.com/ggerganov/whisper.cpp

From 16ced071025888708a59ee40e740cedf24aff039 Mon Sep 17 00:00:00 2001
From: Ettore Di Giacinto <mudler@users.noreply.github.com>
Date: Thu, 6 Feb 2025 11:59:14 +0100
Subject: [PATCH 74/85] chore(model gallery): add
 arliai_llama-3.3-70b-arliai-rpmax-v1.4 (#4772)

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
---
 gallery/index.yaml | 16 +++++++++++++++-
 1 file changed, 15 insertions(+), 1 deletion(-)

diff --git a/gallery/index.yaml b/gallery/index.yaml
index d55adda9..b57d337f 100644
--- a/gallery/index.yaml
+++ b/gallery/index.yaml
@@ -523,6 +523,20 @@
     - filename: Nohobby_L3.3-Prikol-70B-v0.4-Q4_K_M.gguf
       sha256: e1d67a40bdf0526bdfcaa16c6e4dfeecad41651e201b4009b65f4f444b773604
       uri: huggingface://bartowski/Nohobby_L3.3-Prikol-70B-v0.4-GGUF/Nohobby_L3.3-Prikol-70B-v0.4-Q4_K_M.gguf
+- !!merge <<: *llama33
+  name: "arliai_llama-3.3-70b-arliai-rpmax-v1.4"
+  urls:
+    - https://huggingface.co/ArliAI/Llama-3.3-70B-ArliAI-RPMax-v1.4
+    - https://huggingface.co/bartowski/ArliAI_Llama-3.3-70B-ArliAI-RPMax-v1.4-GGUF
+  description: |
+    RPMax is a series of models that are trained on a diverse set of curated creative writing and RP datasets with a focus on variety and deduplication. This model is designed to be highly creative and non-repetitive by making sure no two entries in the dataset have repeated characters or situations, which makes sure the model does not latch on to a certain personality and be capable of understanding and acting appropriately to any characters or situations.
+  overrides:
+    parameters:
+      model: ArliAI_Llama-3.3-70B-ArliAI-RPMax-v1.4-Q4_K_M.gguf
+  files:
+    - filename: ArliAI_Llama-3.3-70B-ArliAI-RPMax-v1.4-Q4_K_M.gguf
+      sha256: 7c79e76e5c057cfe32529d930360fbebd29697948e5bac4e4b2eb6d2ee596e31
+      uri: huggingface://bartowski/ArliAI_Llama-3.3-70B-ArliAI-RPMax-v1.4-GGUF/ArliAI_Llama-3.3-70B-ArliAI-RPMax-v1.4-Q4_K_M.gguf
 - &rwkv
   url: "github:mudler/LocalAI/gallery/rwkv.yaml@master"
   name: "rwkv-6-world-7b"
@@ -1448,7 +1462,7 @@
       sha256: 6063cf3cf90f72cfb6ad7564bca8229806cb9823a055adcbce3dc539c2a75765
       uri: huggingface://bartowski/AGI-0_Art-Skynet-3B-GGUF/AGI-0_Art-Skynet-3B-Q4_K_M.gguf
 - !!merge <<: *llama32
-  name: "localai-functioncall-llama3.2-3b-v0.5"
+  name: "LocalAI-functioncall-llama3.2-3b-v0.5"
   icon: https://cdn-uploads.huggingface.co/production/uploads/647374aa7ff32a81ac6d35d4/Dzbdzn27KEc3K6zNNi070.png
   urls:
     - https://huggingface.co/mudler/LocalAI-functioncall-llama3.2-3b-v0.5

From a801561f819bc79bc6e6c232b55c42586a406e42 Mon Sep 17 00:00:00 2001
From: Ettore Di Giacinto <mudler@users.noreply.github.com>
Date: Thu, 6 Feb 2025 12:01:56 +0100
Subject: [PATCH 75/85] chore(model gallery): add
 tiger-lab_qwen2.5-32b-instruct-cft (#4773)

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
---
 gallery/index.yaml | 14 ++++++++++++++
 1 file changed, 14 insertions(+)

diff --git a/gallery/index.yaml b/gallery/index.yaml
index b57d337f..98760238 100644
--- a/gallery/index.yaml
+++ b/gallery/index.yaml
@@ -3605,6 +3605,20 @@
     - filename: rubenroy_Gilgamesh-72B-Q4_K_M.gguf
       sha256: c6842b3bc882082c63243e762234ae697c1727bebed18b5241eb97e019f0cf68
       uri: huggingface://bartowski/rubenroy_Gilgamesh-72B-GGUF/rubenroy_Gilgamesh-72B-Q4_K_M.gguf
+- !!merge <<: *qwen25
+  name: "tiger-lab_qwen2.5-32b-instruct-cft"
+  urls:
+    - https://huggingface.co/TIGER-Lab/Qwen2.5-32B-Instruct-CFT
+    - https://huggingface.co/bartowski/TIGER-Lab_Qwen2.5-32B-Instruct-CFT-GGUF
+  description: |
+    Qwen2.5-32B-Instruct-CFT is a 32B parameter model fine-tuned using our novel Critique Fine-Tuning (CFT) approach. Built upon the Qwen2.5-32B-Instruct base model, this variant is trained to critique and analyze responses rather than simply imitate them, leading to enhanced reasoning capabilities.
+  overrides:
+    parameters:
+      model: TIGER-Lab_Qwen2.5-32B-Instruct-CFT-Q4_K_M.gguf
+  files:
+    - filename: TIGER-Lab_Qwen2.5-32B-Instruct-CFT-Q4_K_M.gguf
+      sha256: 57e87e246db368f39f31f38e44ba8e9dc838a026f729f5a123aacc2aeb5a9402
+      uri: huggingface://bartowski/TIGER-Lab_Qwen2.5-32B-Instruct-CFT-GGUF/TIGER-Lab_Qwen2.5-32B-Instruct-CFT-Q4_K_M.gguf
 - &llama31
   url: "github:mudler/LocalAI/gallery/llama3.1-instruct.yaml@master" ## LLama3.1
   icon: https://avatars.githubusercontent.com/u/153379578

From e4b8ddb6a1c3f0d14dbdde217b24896951e03da3 Mon Sep 17 00:00:00 2001
From: Ettore Di Giacinto <mudler@users.noreply.github.com>
Date: Thu, 6 Feb 2025 12:03:59 +0100
Subject: [PATCH 76/85] chore(model gallery): add
 black-ink-guild_pernicious_prophecy_70b (#4774)

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
---
 gallery/index.yaml | 16 ++++++++++++++++
 1 file changed, 16 insertions(+)

diff --git a/gallery/index.yaml b/gallery/index.yaml
index 98760238..4e75e71f 100644
--- a/gallery/index.yaml
+++ b/gallery/index.yaml
@@ -537,6 +537,22 @@
     - filename: ArliAI_Llama-3.3-70B-ArliAI-RPMax-v1.4-Q4_K_M.gguf
       sha256: 7c79e76e5c057cfe32529d930360fbebd29697948e5bac4e4b2eb6d2ee596e31
       uri: huggingface://bartowski/ArliAI_Llama-3.3-70B-ArliAI-RPMax-v1.4-GGUF/ArliAI_Llama-3.3-70B-ArliAI-RPMax-v1.4-Q4_K_M.gguf
+- !!merge <<: *llama33
+  name: "black-ink-guild_pernicious_prophecy_70b"
+  icon: https://huggingface.co/Black-Ink-Guild/Pernicious_Prophecy_70B/resolve/main/header.gif
+  urls:
+    - https://huggingface.co/Black-Ink-Guild/Pernicious_Prophecy_70B
+    - https://huggingface.co/bartowski/Black-Ink-Guild_Pernicious_Prophecy_70B-GGUF
+  description: |
+    Pernicious Prophecy 70B is a Llama-3.3 70B-based, two-step model designed by Black Ink Guild (SicariusSicariiStuff and invisietch) for uncensored roleplay, assistant tasks, and general usage.
+    NOTE: Pernicious Prophecy 70B is an uncensored model and can produce deranged, offensive, and dangerous outputs. You are solely responsible for anything that you choose to do with this model.
+  overrides:
+    parameters:
+      model: Black-Ink-Guild_Pernicious_Prophecy_70B-Q4_K_M.gguf
+  files:
+    - filename: Black-Ink-Guild_Pernicious_Prophecy_70B-Q4_K_M.gguf
+      sha256: d8d4874b837993546b750db3faf1c6e5d867883a6750f04f1f4986973d7c107b
+      uri: huggingface://bartowski/Black-Ink-Guild_Pernicious_Prophecy_70B-GGUF/Black-Ink-Guild_Pernicious_Prophecy_70B-Q4_K_M.gguf
 - &rwkv
   url: "github:mudler/LocalAI/gallery/rwkv.yaml@master"
   name: "rwkv-6-world-7b"

From 8d45670e4109db8968ffa5ae426f6656e9e0784c Mon Sep 17 00:00:00 2001
From: Ettore Di Giacinto <mudler@users.noreply.github.com>
Date: Thu, 6 Feb 2025 12:41:08 +0100
Subject: [PATCH 77/85] fix(openai): consistently return stop reason (#4771)

We were not returning a stop reason when no tool was actually called
(even if specified).

Fixes: https://github.com/mudler/LocalAI/issues/4716

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
---
 core/http/endpoints/openai/chat.go | 16 ++++++++++------
 1 file changed, 10 insertions(+), 6 deletions(-)

diff --git a/core/http/endpoints/openai/chat.go b/core/http/endpoints/openai/chat.go
index 3b8d3056..a94a729a 100644
--- a/core/http/endpoints/openai/chat.go
+++ b/core/http/endpoints/openai/chat.go
@@ -401,6 +401,11 @@ func ChatEndpoint(cl *config.BackendConfigLoader, ml *model.ModelLoader, evaluat
 				log.Debug().Msgf("Text content to return: %s", textContentToReturn)
 				noActionsToRun := len(results) > 0 && results[0].Name == noActionName || len(results) == 0
 
+				finishReason := "stop"
+				if len(input.Tools) > 0 {
+					finishReason = "tool_calls"
+				}
+
 				switch {
 				case noActionsToRun:
 					result, err := handleQuestion(config, input, ml, startupOptions, results, s, predInput)
@@ -408,19 +413,18 @@ func ChatEndpoint(cl *config.BackendConfigLoader, ml *model.ModelLoader, evaluat
 						log.Error().Err(err).Msg("error handling question")
 						return
 					}
+
 					*c = append(*c, schema.Choice{
-						Message: &schema.Message{Role: "assistant", Content: &result}})
+						FinishReason: finishReason,
+						Message:      &schema.Message{Role: "assistant", Content: &result}})
 				default:
 					toolChoice := schema.Choice{
+						FinishReason: finishReason,
 						Message: &schema.Message{
 							Role: "assistant",
 						},
 					}
 
-					if len(input.Tools) > 0 {
-						toolChoice.FinishReason = "tool_calls"
-					}
-
 					for _, ss := range results {
 						name, args := ss.Name, ss.Arguments
 						if len(input.Tools) > 0 {
@@ -438,7 +442,7 @@ func ChatEndpoint(cl *config.BackendConfigLoader, ml *model.ModelLoader, evaluat
 								},
 							)
 						} else {
-							// otherwise we return more choices directly
+							// otherwise we return more choices directly (deprecated)
 							*c = append(*c, schema.Choice{
 								FinishReason: "function_call",
 								Message: &schema.Message{

From 7f90ff7aecd973a17c77a7248b9112401eac4c97 Mon Sep 17 00:00:00 2001
From: Ettore Di Giacinto <mudler@users.noreply.github.com>
Date: Thu, 6 Feb 2025 18:36:23 +0100
Subject: [PATCH 78/85] chore(llama-ggml): drop deprecated backend (#4775)

The GGML format is now dead, since in the next version of LocalAI we
already bring many breaking compatibility changes, taking the occasion
also to drop ggml support (pre-gguf).

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
---
 Makefile                                      |  38 +---
 backend/go/llm/llama-ggml/llama.go            | 204 ------------------
 backend/go/llm/llama-ggml/main.go             |  19 --
 core/http/app_test.go                         |  71 ------
 docs/content/docs/features/text-generation.md |  17 +-
 pkg/model/initializers.go                     |   6 +-
 6 files changed, 7 insertions(+), 348 deletions(-)
 delete mode 100644 backend/go/llm/llama-ggml/llama.go
 delete mode 100644 backend/go/llm/llama-ggml/main.go

diff --git a/Makefile b/Makefile
index 7edb6f6a..790c6e6d 100644
--- a/Makefile
+++ b/Makefile
@@ -6,8 +6,6 @@ BINARY_NAME=local-ai
 DETECT_LIBS?=true
 
 # llama.cpp versions
-GOLLAMA_REPO?=https://github.com/go-skynet/go-llama.cpp
-GOLLAMA_VERSION?=2b57a8ae43e4699d3dc5d1496a1ccd42922993be
 CPPLLAMA_VERSION?=d774ab3acc4fee41fbed6dbfc192b57d5f79f34b
 
 # whisper.cpp version
@@ -151,7 +149,6 @@ ifeq ($(BUILD_TYPE),hipblas)
 	LD_LIBRARY_PATH ?= /opt/rocm/lib:/opt/rocm/llvm/lib
 	export CXX=$(ROCM_HOME)/llvm/bin/clang++
 	export CC=$(ROCM_HOME)/llvm/bin/clang
-	# llama-ggml has no hipblas support, so override it here.
 	export STABLE_BUILD_TYPE=
 	export GGML_HIP=1
 	GPU_TARGETS ?= gfx900,gfx906,gfx908,gfx940,gfx941,gfx942,gfx90a,gfx1030,gfx1031,gfx1100,gfx1101
@@ -188,7 +185,6 @@ ALL_GRPC_BACKENDS+=backend-assets/grpc/llama-cpp-avx
 ALL_GRPC_BACKENDS+=backend-assets/grpc/llama-cpp-avx2
 ALL_GRPC_BACKENDS+=backend-assets/grpc/llama-cpp-avx512
 ALL_GRPC_BACKENDS+=backend-assets/grpc/llama-cpp-fallback
-ALL_GRPC_BACKENDS+=backend-assets/grpc/llama-ggml
 ALL_GRPC_BACKENDS+=backend-assets/grpc/llama-cpp-grpc
 ALL_GRPC_BACKENDS+=backend-assets/util/llama-cpp-rpc-server
 ALL_GRPC_BACKENDS+=backend-assets/grpc/whisper
@@ -222,19 +218,6 @@ endif
 
 all: help
 
-## go-llama.cpp
-sources/go-llama.cpp:
-	mkdir -p sources/go-llama.cpp
-	cd sources/go-llama.cpp && \
-	git init && \
-	git remote add origin $(GOLLAMA_REPO) && \
-	git fetch origin && \
-	git checkout $(GOLLAMA_VERSION) && \
-	git submodule update --init --recursive --depth 1 --single-branch
-
-sources/go-llama.cpp/libbinding.a: sources/go-llama.cpp
-	$(MAKE) -C sources/go-llama.cpp BUILD_TYPE=$(STABLE_BUILD_TYPE) libbinding.a
-
 ## bark.cpp
 sources/bark.cpp:
 	git clone --recursive $(BARKCPP_REPO) sources/bark.cpp && \
@@ -310,19 +293,17 @@ sources/whisper.cpp:
 sources/whisper.cpp/libwhisper.a: sources/whisper.cpp
 	cd sources/whisper.cpp && $(MAKE) libwhisper.a libggml.a
 
-get-sources: sources/go-llama.cpp sources/go-piper sources/stablediffusion-ggml.cpp sources/bark.cpp sources/whisper.cpp backend/cpp/llama/llama.cpp
+get-sources: sources/go-piper sources/stablediffusion-ggml.cpp sources/bark.cpp sources/whisper.cpp backend/cpp/llama/llama.cpp
 
 replace:
 	$(GOCMD) mod edit -replace github.com/ggerganov/whisper.cpp=$(CURDIR)/sources/whisper.cpp
 	$(GOCMD) mod edit -replace github.com/ggerganov/whisper.cpp/bindings/go=$(CURDIR)/sources/whisper.cpp/bindings/go
 	$(GOCMD) mod edit -replace github.com/mudler/go-piper=$(CURDIR)/sources/go-piper
-	$(GOCMD) mod edit -replace github.com/go-skynet/go-llama.cpp=$(CURDIR)/sources/go-llama.cpp
 
 dropreplace:
 	$(GOCMD) mod edit -dropreplace github.com/ggerganov/whisper.cpp
 	$(GOCMD) mod edit -dropreplace github.com/ggerganov/whisper.cpp/bindings/go
 	$(GOCMD) mod edit -dropreplace github.com/mudler/go-piper
-	$(GOCMD) mod edit -dropreplace github.com/go-skynet/go-llama.cpp
 
 prepare-sources: get-sources replace
 	$(GOCMD) mod download
@@ -330,7 +311,6 @@ prepare-sources: get-sources replace
 ## GENERIC
 rebuild: ## Rebuilds the project
 	$(GOCMD) clean -cache
-	$(MAKE) -C sources/go-llama.cpp clean
 	$(MAKE) -C sources/whisper.cpp clean
 	$(MAKE) -C sources/go-piper clean
 	$(MAKE) build
@@ -434,7 +414,7 @@ run: prepare ## run local-ai
 test-models/testmodel.ggml:
 	mkdir test-models
 	mkdir test-dir
-	wget -q https://huggingface.co/TheBloke/orca_mini_3B-GGML/resolve/main/orca-mini-3b.ggmlv3.q4_0.bin -O test-models/testmodel.ggml
+	wget -q https://huggingface.co/RichardErkhov/Qwen_-_Qwen2-1.5B-Instruct-gguf/resolve/main/Qwen2-1.5B-Instruct.Q2_K.gguf -O test-models/testmodel.ggml
 	wget -q https://huggingface.co/ggerganov/whisper.cpp/resolve/main/ggml-base.en.bin -O test-models/whisper-en
 	wget -q https://huggingface.co/mudler/all-MiniLM-L6-v2/resolve/main/ggml-model-q4_0.bin -O test-models/bert
 	wget -q https://cdn.openai.com/whisper/draft-20220913a/micro-machines.wav -O test-dir/audio.wav
@@ -449,8 +429,7 @@ test: prepare test-models/testmodel.ggml grpcs
 	export GO_TAGS="tts debug"
 	$(MAKE) prepare-test
 	HUGGINGFACE_GRPC=$(abspath ./)/backend/python/transformers/run.sh TEST_DIR=$(abspath ./)/test-dir/ FIXTURES=$(abspath ./)/tests/fixtures CONFIG_FILE=$(abspath ./)/test-models/config.yaml MODELS_PATH=$(abspath ./)/test-models \
-	$(GOCMD) run github.com/onsi/ginkgo/v2/ginkgo --label-filter="!llama && !llama-gguf"  --flake-attempts $(TEST_FLAKES) --fail-fast -v -r $(TEST_PATHS)
-	$(MAKE) test-llama
+	$(GOCMD) run github.com/onsi/ginkgo/v2/ginkgo --label-filter="!llama-gguf"  --flake-attempts $(TEST_FLAKES) --fail-fast -v -r $(TEST_PATHS)
 	$(MAKE) test-llama-gguf
 	$(MAKE) test-tts
 	$(MAKE) test-stablediffusion
@@ -479,10 +458,6 @@ teardown-e2e:
 	rm -rf $(TEST_DIR) || true
 	docker stop $$(docker ps -q --filter ancestor=localai-tests)
 
-test-llama: prepare-test
-	TEST_DIR=$(abspath ./)/test-dir/ FIXTURES=$(abspath ./)/tests/fixtures CONFIG_FILE=$(abspath ./)/test-models/config.yaml MODELS_PATH=$(abspath ./)/test-models \
-	$(GOCMD) run github.com/onsi/ginkgo/v2/ginkgo --label-filter="llama" --flake-attempts $(TEST_FLAKES) -v -r $(TEST_PATHS)
-
 test-llama-gguf: prepare-test
 	TEST_DIR=$(abspath ./)/test-dir/ FIXTURES=$(abspath ./)/tests/fixtures CONFIG_FILE=$(abspath ./)/test-models/config.yaml MODELS_PATH=$(abspath ./)/test-models \
 	$(GOCMD) run github.com/onsi/ginkgo/v2/ginkgo --label-filter="llama-gguf" --flake-attempts $(TEST_FLAKES) -v -r $(TEST_PATHS)
@@ -760,13 +735,6 @@ backend-assets/util/llama-cpp-rpc-server: backend-assets/grpc/llama-cpp-grpc
 	mkdir -p backend-assets/util/
 	cp -rf backend/cpp/llama-grpc/llama.cpp/build/bin/rpc-server backend-assets/util/llama-cpp-rpc-server
 
-backend-assets/grpc/llama-ggml: sources/go-llama.cpp sources/go-llama.cpp/libbinding.a backend-assets/grpc
-	CGO_LDFLAGS="$(CGO_LDFLAGS)" C_INCLUDE_PATH=$(CURDIR)/sources/go-llama.cpp LIBRARY_PATH=$(CURDIR)/sources/go-llama.cpp \
-	$(GOCMD) build -ldflags "$(LD_FLAGS)" -tags "$(GO_TAGS)" -o backend-assets/grpc/llama-ggml ./backend/go/llm/llama-ggml/
-ifneq ($(UPX),)
-	$(UPX) backend-assets/grpc/llama-ggml
-endif
-
 backend-assets/grpc/bark-cpp: backend/go/bark/libbark.a backend-assets/grpc
 	CGO_LDFLAGS="$(CGO_LDFLAGS)" C_INCLUDE_PATH=$(CURDIR)/backend/go/bark/ LIBRARY_PATH=$(CURDIR)/backend/go/bark/ \
 	$(GOCMD) build -ldflags "$(LD_FLAGS)" -tags "$(GO_TAGS)" -o backend-assets/grpc/bark-cpp ./backend/go/bark/
diff --git a/backend/go/llm/llama-ggml/llama.go b/backend/go/llm/llama-ggml/llama.go
deleted file mode 100644
index 1a7add69..00000000
--- a/backend/go/llm/llama-ggml/llama.go
+++ /dev/null
@@ -1,204 +0,0 @@
-package main
-
-// This is a wrapper to statisfy the GRPC service interface
-// It is meant to be used by the main executable that is the server for the specific backend type (falcon, gpt3, etc)
-import (
-	"fmt"
-
-	"github.com/go-skynet/go-llama.cpp"
-	"github.com/mudler/LocalAI/pkg/grpc/base"
-	pb "github.com/mudler/LocalAI/pkg/grpc/proto"
-)
-
-type LLM struct {
-	base.SingleThread
-
-	llama *llama.LLama
-}
-
-func (llm *LLM) Load(opts *pb.ModelOptions) error {
-	ropeFreqBase := float32(10000)
-	ropeFreqScale := float32(1)
-
-	if opts.RopeFreqBase != 0 {
-		ropeFreqBase = opts.RopeFreqBase
-	}
-	if opts.RopeFreqScale != 0 {
-		ropeFreqScale = opts.RopeFreqScale
-	}
-
-	llamaOpts := []llama.ModelOption{
-		llama.WithRopeFreqBase(ropeFreqBase),
-		llama.WithRopeFreqScale(ropeFreqScale),
-	}
-
-	if opts.NGQA != 0 {
-		llamaOpts = append(llamaOpts, llama.WithGQA(int(opts.NGQA)))
-	}
-
-	if opts.RMSNormEps != 0 {
-		llamaOpts = append(llamaOpts, llama.WithRMSNormEPS(opts.RMSNormEps))
-	}
-
-	if opts.ContextSize != 0 {
-		llamaOpts = append(llamaOpts, llama.SetContext(int(opts.ContextSize)))
-	}
-	if opts.F16Memory {
-		llamaOpts = append(llamaOpts, llama.EnableF16Memory)
-	}
-	if opts.Embeddings {
-		llamaOpts = append(llamaOpts, llama.EnableEmbeddings)
-	}
-	if opts.NGPULayers != 0 {
-		llamaOpts = append(llamaOpts, llama.SetGPULayers(int(opts.NGPULayers)))
-	}
-
-	llamaOpts = append(llamaOpts, llama.SetMMap(opts.MMap))
-	llamaOpts = append(llamaOpts, llama.SetMainGPU(opts.MainGPU))
-	llamaOpts = append(llamaOpts, llama.SetTensorSplit(opts.TensorSplit))
-	if opts.NBatch != 0 {
-		llamaOpts = append(llamaOpts, llama.SetNBatch(int(opts.NBatch)))
-	} else {
-		llamaOpts = append(llamaOpts, llama.SetNBatch(512))
-	}
-
-	if opts.NUMA {
-		llamaOpts = append(llamaOpts, llama.EnableNUMA)
-	}
-
-	if opts.LowVRAM {
-		llamaOpts = append(llamaOpts, llama.EnabelLowVRAM)
-	}
-
-	model, err := llama.New(opts.ModelFile, llamaOpts...)
-	llm.llama = model
-
-	return err
-}
-
-func buildPredictOptions(opts *pb.PredictOptions) []llama.PredictOption {
-	ropeFreqBase := float32(10000)
-	ropeFreqScale := float32(1)
-
-	if opts.RopeFreqBase != 0 {
-		ropeFreqBase = opts.RopeFreqBase
-	}
-	if opts.RopeFreqScale != 0 {
-		ropeFreqScale = opts.RopeFreqScale
-	}
-	predictOptions := []llama.PredictOption{
-		llama.SetTemperature(opts.Temperature),
-		llama.SetTopP(opts.TopP),
-		llama.SetTopK(int(opts.TopK)),
-		llama.SetTokens(int(opts.Tokens)),
-		llama.SetThreads(int(opts.Threads)),
-		llama.WithGrammar(opts.Grammar),
-		llama.SetRopeFreqBase(ropeFreqBase),
-		llama.SetRopeFreqScale(ropeFreqScale),
-		llama.SetNegativePromptScale(opts.NegativePromptScale),
-		llama.SetNegativePrompt(opts.NegativePrompt),
-	}
-
-	if opts.PromptCacheAll {
-		predictOptions = append(predictOptions, llama.EnablePromptCacheAll)
-	}
-
-	if opts.PromptCacheRO {
-		predictOptions = append(predictOptions, llama.EnablePromptCacheRO)
-	}
-
-	// Expected absolute path
-	if opts.PromptCachePath != "" {
-		predictOptions = append(predictOptions, llama.SetPathPromptCache(opts.PromptCachePath))
-	}
-
-	if opts.Mirostat != 0 {
-		predictOptions = append(predictOptions, llama.SetMirostat(int(opts.Mirostat)))
-	}
-
-	if opts.MirostatETA != 0 {
-		predictOptions = append(predictOptions, llama.SetMirostatETA(opts.MirostatETA))
-	}
-
-	if opts.MirostatTAU != 0 {
-		predictOptions = append(predictOptions, llama.SetMirostatTAU(opts.MirostatTAU))
-	}
-
-	if opts.Debug {
-		predictOptions = append(predictOptions, llama.Debug)
-	}
-
-	predictOptions = append(predictOptions, llama.SetStopWords(opts.StopPrompts...))
-
-	if opts.PresencePenalty != 0 {
-		predictOptions = append(predictOptions, llama.SetPenalty(opts.PresencePenalty))
-	}
-
-	if opts.NKeep != 0 {
-		predictOptions = append(predictOptions, llama.SetNKeep(int(opts.NKeep)))
-	}
-
-	if opts.Batch != 0 {
-		predictOptions = append(predictOptions, llama.SetBatch(int(opts.Batch)))
-	}
-
-	if opts.F16KV {
-		predictOptions = append(predictOptions, llama.EnableF16KV)
-	}
-
-	if opts.IgnoreEOS {
-		predictOptions = append(predictOptions, llama.IgnoreEOS)
-	}
-
-	if opts.Seed != 0 {
-		predictOptions = append(predictOptions, llama.SetSeed(int(opts.Seed)))
-	}
-
-	//predictOptions = append(predictOptions, llama.SetLogitBias(c.Seed))
-
-	predictOptions = append(predictOptions, llama.SetFrequencyPenalty(opts.FrequencyPenalty))
-	predictOptions = append(predictOptions, llama.SetMlock(opts.MLock))
-	predictOptions = append(predictOptions, llama.SetMemoryMap(opts.MMap))
-	predictOptions = append(predictOptions, llama.SetPredictionMainGPU(opts.MainGPU))
-	predictOptions = append(predictOptions, llama.SetPredictionTensorSplit(opts.TensorSplit))
-	predictOptions = append(predictOptions, llama.SetTailFreeSamplingZ(opts.TailFreeSamplingZ))
-	predictOptions = append(predictOptions, llama.SetTypicalP(opts.TypicalP))
-	return predictOptions
-}
-
-func (llm *LLM) Predict(opts *pb.PredictOptions) (string, error) {
-	return llm.llama.Predict(opts.Prompt, buildPredictOptions(opts)...)
-}
-
-func (llm *LLM) PredictStream(opts *pb.PredictOptions, results chan string) error {
-	predictOptions := buildPredictOptions(opts)
-
-	predictOptions = append(predictOptions, llama.SetTokenCallback(func(token string) bool {
-		results <- token
-		return true
-	}))
-
-	go func() {
-		_, err := llm.llama.Predict(opts.Prompt, predictOptions...)
-		if err != nil {
-			fmt.Println("err: ", err)
-		}
-		close(results)
-	}()
-
-	return nil
-}
-
-func (llm *LLM) Embeddings(opts *pb.PredictOptions) ([]float32, error) {
-	predictOptions := buildPredictOptions(opts)
-
-	if len(opts.EmbeddingTokens) > 0 {
-		tokens := []int{}
-		for _, t := range opts.EmbeddingTokens {
-			tokens = append(tokens, int(t))
-		}
-		return llm.llama.TokenEmbeddings(tokens, predictOptions...)
-	}
-
-	return llm.llama.Embeddings(opts.Embeddings, predictOptions...)
-}
diff --git a/backend/go/llm/llama-ggml/main.go b/backend/go/llm/llama-ggml/main.go
deleted file mode 100644
index 544771db..00000000
--- a/backend/go/llm/llama-ggml/main.go
+++ /dev/null
@@ -1,19 +0,0 @@
-package main
-
-import (
-	"flag"
-
-	grpc "github.com/mudler/LocalAI/pkg/grpc"
-)
-
-var (
-	addr = flag.String("addr", "localhost:50051", "the address to connect to")
-)
-
-func main() {
-	flag.Parse()
-
-	if err := grpc.StartServer(*addr, &LLM{}); err != nil {
-		panic(err)
-	}
-}
diff --git a/core/http/app_test.go b/core/http/app_test.go
index ca7a2eaa..ecaf6da3 100644
--- a/core/http/app_test.go
+++ b/core/http/app_test.go
@@ -526,77 +526,6 @@ var _ = Describe("API test", func() {
 				Expect(content["usage"]).To(ContainSubstring("You can test this model with curl like this"))
 			})
 
-			It("runs openllama(llama-ggml backend)", Label("llama"), func() {
-				if runtime.GOOS != "linux" {
-					Skip("test supported only on linux")
-				}
-				response := postModelApplyRequest("http://127.0.0.1:9090/models/apply", modelApplyRequest{
-					URL:       "github:go-skynet/model-gallery/openllama_3b.yaml",
-					Name:      "openllama_3b",
-					Overrides: map[string]interface{}{"backend": "llama-ggml", "mmap": true, "f16": true, "context_size": 128},
-				})
-
-				Expect(response["uuid"]).ToNot(BeEmpty(), fmt.Sprint(response))
-
-				uuid := response["uuid"].(string)
-
-				Eventually(func() bool {
-					response := getModelStatus("http://127.0.0.1:9090/models/jobs/" + uuid)
-					return response["processed"].(bool)
-				}, "360s", "10s").Should(Equal(true))
-
-				By("testing completion")
-				resp, err := client.CreateCompletion(context.TODO(), openai.CompletionRequest{Model: "openllama_3b", Prompt: "Count up to five: one, two, three, four, "})
-				Expect(err).ToNot(HaveOccurred())
-				Expect(len(resp.Choices)).To(Equal(1))
-				Expect(resp.Choices[0].Text).To(ContainSubstring("five"))
-
-				By("testing functions")
-				resp2, err := client.CreateChatCompletion(
-					context.TODO(),
-					openai.ChatCompletionRequest{
-						Model: "openllama_3b",
-						Messages: []openai.ChatCompletionMessage{
-							{
-								Role:    "user",
-								Content: "What is the weather like in San Francisco (celsius)?",
-							},
-						},
-						Functions: []openai.FunctionDefinition{
-							openai.FunctionDefinition{
-								Name:        "get_current_weather",
-								Description: "Get the current weather",
-								Parameters: jsonschema.Definition{
-									Type: jsonschema.Object,
-									Properties: map[string]jsonschema.Definition{
-										"location": {
-											Type:        jsonschema.String,
-											Description: "The city and state, e.g. San Francisco, CA",
-										},
-										"unit": {
-											Type: jsonschema.String,
-											Enum: []string{"celcius", "fahrenheit"},
-										},
-									},
-									Required: []string{"location"},
-								},
-							},
-						},
-					})
-				Expect(err).ToNot(HaveOccurred())
-				Expect(len(resp2.Choices)).To(Equal(1))
-				Expect(resp2.Choices[0].Message.FunctionCall).ToNot(BeNil())
-				Expect(resp2.Choices[0].Message.FunctionCall.Name).To(Equal("get_current_weather"), resp2.Choices[0].Message.FunctionCall.Name)
-
-				var res map[string]string
-				err = json.Unmarshal([]byte(resp2.Choices[0].Message.FunctionCall.Arguments), &res)
-				Expect(err).ToNot(HaveOccurred())
-				Expect(res["location"]).To(ContainSubstring("San Francisco"), fmt.Sprint(res))
-				Expect(res["unit"]).To(Equal("celcius"), fmt.Sprint(res))
-				Expect(string(resp2.Choices[0].FinishReason)).To(Equal("function_call"), fmt.Sprint(resp2.Choices[0].FinishReason))
-
-			})
-
 			It("runs openllama gguf(llama-cpp)", Label("llama-gguf"), func() {
 				if runtime.GOOS != "linux" {
 					Skip("test supported only on linux")
diff --git a/docs/content/docs/features/text-generation.md b/docs/content/docs/features/text-generation.md
index 11ab3999..342b8e76 100644
--- a/docs/content/docs/features/text-generation.md
+++ b/docs/content/docs/features/text-generation.md
@@ -124,7 +124,7 @@ Note: rwkv models needs to specify the backend `rwkv` in the YAML config files a
 
 {{% alert note %}}
 
-The `ggml` file format has been deprecated. If you are using `ggml` models and you are configuring your model with a YAML file, specify, use the `llama-ggml` backend instead. If you are relying in automatic detection of the model, you should be fine. For `gguf` models, use the `llama` backend. The go backend is deprecated as well but still available as `go-llama`. The go backend supports still features not available in the mainline: speculative sampling and embeddings.
+The `ggml` file format has been deprecated. If you are using `ggml` models and you are configuring your model with a YAML file, specify, use a LocalAI version older than v2.25.0. For `gguf` models, use the `llama` backend. The go backend is deprecated as well but still available as `go-llama`.
 
 {{% /alert %}}
 
@@ -175,25 +175,12 @@ name: llama
 backend: llama
 parameters:
   # Relative to the models path
-  model: file.gguf.bin
-```
-
-In the example above we specify `llama` as the backend to restrict loading `gguf` models only. 
-
-For instance, to use the `llama-ggml` backend for `ggml` models:
-
-```yaml
-name: llama
-backend: llama-ggml
-parameters:
-  # Relative to the models path
-  model: file.ggml.bin
+  model: file.gguf
 ```
 
 #### Reference
 
 - [llama](https://github.com/ggerganov/llama.cpp)
-- [binding](https://github.com/go-skynet/go-llama.cpp)
 
 
 ### exllama/2
diff --git a/pkg/model/initializers.go b/pkg/model/initializers.go
index ace72fa3..5e465cf0 100644
--- a/pkg/model/initializers.go
+++ b/pkg/model/initializers.go
@@ -43,8 +43,6 @@ var TypeAlias map[string]string = map[string]string{
 var AutoDetect = os.Getenv("DISABLE_AUTODETECT") != "true"
 
 const (
-	LlamaGGML = "llama-ggml"
-
 	LLamaCPP = "llama-cpp"
 
 	LLamaCPPAVX2     = "llama-cpp-avx2"
@@ -143,10 +141,10 @@ func orderBackends(backends map[string][]string) ([]string, error) {
 
 	// sets a priority list - first has more priority
 	priorityList := []string{
-		// First llama.cpp(variants) and llama-ggml to follow.
+		// First llama.cpp(variants)
 		// We keep the fallback to prevent that if the llama.cpp variants
 		// that depends on shared libs if breaks have still a safety net.
-		LLamaCPP, LlamaGGML, LLamaCPPFallback,
+		LLamaCPP, LLamaCPPFallback,
 	}
 
 	toTheEnd := []string{

From cc1f6f913f3c271cc2e73080991163b18ea03be0 Mon Sep 17 00:00:00 2001
From: Ettore Di Giacinto <mudler@users.noreply.github.com>
Date: Thu, 6 Feb 2025 19:39:59 +0100
Subject: [PATCH 79/85] fix(llama.cpp): disable mirostat as default (#2911)

Even if increasing the quality of the output, it has shown to have
performance drawbacks to be so noticeable that the confuses users about
speed of LocalAI ( see also
https://github.com/mudler/LocalAI/issues/2780 ).

This changeset disables Mirostat by default (which can
be still enabled manually).

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
Co-authored-by: Dave <dave@gray101.com>
---
 core/config/backend_config.go | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/core/config/backend_config.go b/core/config/backend_config.go
index 8ce93d9f..2b130ec8 100644
--- a/core/config/backend_config.go
+++ b/core/config/backend_config.go
@@ -287,7 +287,8 @@ func (cfg *BackendConfig) SetDefaults(opts ...ConfigLoaderOption) {
 	defaultTopP := 0.95
 	defaultTopK := 40
 	defaultTemp := 0.9
-	defaultMirostat := 2
+	// https://github.com/mudler/LocalAI/issues/2780
+	defaultMirostat := 0
 	defaultMirostatTAU := 5.0
 	defaultMirostatETA := 0.1
 	defaultTypicalP := 1.0

From 731674eee7457642a042a043398d40e6cbf3e06a Mon Sep 17 00:00:00 2001
From: "LocalAI [bot]" <139863280+localai-bot@users.noreply.github.com>
Date: Thu, 6 Feb 2025 23:02:00 +0100
Subject: [PATCH 80/85] chore: :arrow_up: Update ggerganov/llama.cpp to
 `8a59053f63fffc24e730cd3ea067760abfe4a919` (#4776)

:arrow_up: Update ggerganov/llama.cpp

Signed-off-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com>
Co-authored-by: mudler <2420543+mudler@users.noreply.github.com>
---
 Makefile | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/Makefile b/Makefile
index 790c6e6d..a1224035 100644
--- a/Makefile
+++ b/Makefile
@@ -6,7 +6,7 @@ BINARY_NAME=local-ai
 DETECT_LIBS?=true
 
 # llama.cpp versions
-CPPLLAMA_VERSION?=d774ab3acc4fee41fbed6dbfc192b57d5f79f34b
+CPPLLAMA_VERSION?=8a59053f63fffc24e730cd3ea067760abfe4a919
 
 # whisper.cpp version
 WHISPER_REPO?=https://github.com/ggerganov/whisper.cpp

From f670e0a91c788bde1c84d96958b3843d13f8f0f3 Mon Sep 17 00:00:00 2001
From: Ettore Di Giacinto <mudler@users.noreply.github.com>
Date: Fri, 7 Feb 2025 13:29:53 +0100
Subject: [PATCH 81/85] chore(model gallery): add nohobby_l3.3-prikol-70b-v0.5
 (#4777)

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
---
 gallery/index.yaml | 23 +++++++++++++++++++++++
 1 file changed, 23 insertions(+)

diff --git a/gallery/index.yaml b/gallery/index.yaml
index 4e75e71f..5bde3e85 100644
--- a/gallery/index.yaml
+++ b/gallery/index.yaml
@@ -553,6 +553,29 @@
     - filename: Black-Ink-Guild_Pernicious_Prophecy_70B-Q4_K_M.gguf
       sha256: d8d4874b837993546b750db3faf1c6e5d867883a6750f04f1f4986973d7c107b
       uri: huggingface://bartowski/Black-Ink-Guild_Pernicious_Prophecy_70B-GGUF/Black-Ink-Guild_Pernicious_Prophecy_70B-Q4_K_M.gguf
+- !!merge <<: *llama33
+  name: "nohobby_l3.3-prikol-70b-v0.5"
+  icon: https://files.catbox.moe/x9t3zo.png
+  urls:
+    - https://huggingface.co/Nohobby/L3.3-Prikol-70B-v0.5
+    - https://huggingface.co/bartowski/Nohobby_L3.3-Prikol-70B-v0.5-GGUF
+  description: |
+    99% of mergekit addicts quit before they hit it big.
+
+    Gosh, I need to create an org for my test runs - my profile looks like a dumpster.
+
+    What was it again? Ah, the new model.
+
+    Exactly what I wanted. All I had to do was yank out the cursed official DeepSeek distill and here we are.
+
+    From the brief tests it gave me some unusual takes on the character cards I'm used to. Just this makes it worth it imo. Also the writing is kinda nice.
+  overrides:
+    parameters:
+      model: Nohobby_L3.3-Prikol-70B-v0.5-Q4_K_M.gguf
+  files:
+    - filename: Nohobby_L3.3-Prikol-70B-v0.5-Q4_K_M.gguf
+      sha256: 36f29015f1f420f51569603445a3ea5fe72e3651c2022ef064086f5617578fe6
+      uri: huggingface://bartowski/Nohobby_L3.3-Prikol-70B-v0.5-GGUF/Nohobby_L3.3-Prikol-70B-v0.5-Q4_K_M.gguf
 - &rwkv
   url: "github:mudler/LocalAI/gallery/rwkv.yaml@master"
   name: "rwkv-6-world-7b"

From cc163429dc3ea027d9a6b6578757e942fcb62ce1 Mon Sep 17 00:00:00 2001
From: Ettore Di Giacinto <mudler@users.noreply.github.com>
Date: Fri, 7 Feb 2025 13:31:49 +0100
Subject: [PATCH 82/85] chore(model gallery): add
 cognitivecomputations_dolphin3.0-r1-mistral-24b (#4778)

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
---
 gallery/index.yaml | 16 ++++++++++++++++
 1 file changed, 16 insertions(+)

diff --git a/gallery/index.yaml b/gallery/index.yaml
index 5bde3e85..5af8f895 100644
--- a/gallery/index.yaml
+++ b/gallery/index.yaml
@@ -6913,6 +6913,22 @@
     - filename: krutrim-ai-labs_Krutrim-2-instruct-Q4_K_M.gguf
       sha256: 03aa6d1fb7ab70482a2242839b8d8e1c789aa90a8be415076ddf84bef65f06c7
       uri: huggingface://bartowski/krutrim-ai-labs_Krutrim-2-instruct-GGUF/krutrim-ai-labs_Krutrim-2-instruct-Q4_K_M.gguf
+- !!merge <<: *mistral03
+  name: "cognitivecomputations_dolphin3.0-r1-mistral-24b"
+  url: "github:mudler/LocalAI/gallery/chatml.yaml@master"
+  icon: https://cdn-uploads.huggingface.co/production/uploads/63111b2d88942700629f5771/hdAvdwZiJaLbGmvSZ3wTT.png
+  urls:
+    - https://huggingface.co/cognitivecomputations/Dolphin3.0-R1-Mistral-24B
+    - https://huggingface.co/bartowski/cognitivecomputations_Dolphin3.0-R1-Mistral-24B-GGUF
+  description: |
+    Dolphin 3.0 R1 is the next generation of the Dolphin series of instruct-tuned models. Designed to be the ultimate general purpose local model, enabling coding, math, agentic, function calling, and general use cases.
+  overrides:
+    parameters:
+      model: cognitivecomputations_Dolphin3.0-R1-Mistral-24B-Q4_K_M.gguf
+  files:
+    - filename: cognitivecomputations_Dolphin3.0-R1-Mistral-24B-Q4_K_M.gguf
+      sha256: d67de1e94fb32742bd09ee8beebbeb36a4b544785a8f8413dc4d9490e04eda6c
+      uri: huggingface://bartowski/cognitivecomputations_Dolphin3.0-R1-Mistral-24B-GGUF/cognitivecomputations_Dolphin3.0-R1-Mistral-24B-Q4_K_M.gguf
 - &mudler
   url: "github:mudler/LocalAI/gallery/mudler.yaml@master" ### START mudler's LocalAI specific-models
   name: "LocalAI-llama3-8b-function-call-v0.2"

From 230fe0098faeca88a6ab4ddcba8e70ce0794ea86 Mon Sep 17 00:00:00 2001
From: Ettore Di Giacinto <mudler@users.noreply.github.com>
Date: Fri, 7 Feb 2025 13:33:24 +0100
Subject: [PATCH 83/85] chore(model gallery): add
 cognitivecomputations_dolphin3.0-mistral-24b (#4779)

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
---
 gallery/index.yaml | 16 ++++++++++++++++
 1 file changed, 16 insertions(+)

diff --git a/gallery/index.yaml b/gallery/index.yaml
index 5af8f895..3e0c1ac6 100644
--- a/gallery/index.yaml
+++ b/gallery/index.yaml
@@ -6929,6 +6929,22 @@
     - filename: cognitivecomputations_Dolphin3.0-R1-Mistral-24B-Q4_K_M.gguf
       sha256: d67de1e94fb32742bd09ee8beebbeb36a4b544785a8f8413dc4d9490e04eda6c
       uri: huggingface://bartowski/cognitivecomputations_Dolphin3.0-R1-Mistral-24B-GGUF/cognitivecomputations_Dolphin3.0-R1-Mistral-24B-Q4_K_M.gguf
+- !!merge <<: *mistral03
+  name: "cognitivecomputations_dolphin3.0-mistral-24b"
+  url: "github:mudler/LocalAI/gallery/chatml.yaml@master"
+  icon: https://cdn-uploads.huggingface.co/production/uploads/63111b2d88942700629f5771/cNCs1TBD3FelWCJGkZ3cd.png
+  urls:
+    - https://huggingface.co/cognitivecomputations/Dolphin3.0-Mistral-24B
+    - https://huggingface.co/bartowski/cognitivecomputations_Dolphin3.0-Mistral-24B-GGUF
+  description: |
+    Dolphin 3.0 is the next generation of the Dolphin series of instruct-tuned models. Designed to be the ultimate general purpose local model, enabling coding, math, agentic, function calling, and general use cases.
+  overrides:
+    parameters:
+      model: cognitivecomputations_Dolphin3.0-Mistral-24B-Q4_K_M.gguf
+  files:
+    - filename: cognitivecomputations_Dolphin3.0-Mistral-24B-Q4_K_M.gguf
+      sha256: 6f193bbf98628140194df257c7466e2c6f80a7ef70a6ebae26c53b2f2ef21994
+      uri: huggingface://bartowski/cognitivecomputations_Dolphin3.0-Mistral-24B-GGUF/cognitivecomputations_Dolphin3.0-Mistral-24B-Q4_K_M.gguf
 - &mudler
   url: "github:mudler/LocalAI/gallery/mudler.yaml@master" ### START mudler's LocalAI specific-models
   name: "LocalAI-llama3-8b-function-call-v0.2"

From 4b1b942a7f747755fe3e45bead662eeb96db3959 Mon Sep 17 00:00:00 2001
From: Ettore Di Giacinto <mudler@users.noreply.github.com>
Date: Sat, 8 Feb 2025 09:04:18 +0100
Subject: [PATCH 84/85] chore(model gallery): add
 sicariussicariistuff_redemption_wind_24b (#4781)

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
---
 gallery/index.yaml | 22 ++++++++++++++++++++++
 1 file changed, 22 insertions(+)

diff --git a/gallery/index.yaml b/gallery/index.yaml
index 3e0c1ac6..4b61a0e3 100644
--- a/gallery/index.yaml
+++ b/gallery/index.yaml
@@ -6945,6 +6945,28 @@
     - filename: cognitivecomputations_Dolphin3.0-Mistral-24B-Q4_K_M.gguf
       sha256: 6f193bbf98628140194df257c7466e2c6f80a7ef70a6ebae26c53b2f2ef21994
       uri: huggingface://bartowski/cognitivecomputations_Dolphin3.0-Mistral-24B-GGUF/cognitivecomputations_Dolphin3.0-Mistral-24B-Q4_K_M.gguf
+- !!merge <<: *mistral03
+  name: "sicariussicariistuff_redemption_wind_24b"
+  url: "github:mudler/LocalAI/gallery/chatml.yaml@master"
+  icon: https://huggingface.co/SicariusSicariiStuff/Redemption_Wind_24B/resolve/main/Images/Redemption_Wind_24B.png
+  urls:
+    - https://huggingface.co/SicariusSicariiStuff/Redemption_Wind_24B
+    - https://huggingface.co/bartowski/SicariusSicariiStuff_Redemption_Wind_24B-GGUF
+  description: |
+    This is a lightly fine-tuned version of the Mistral 24B base model, designed as an accessible and adaptable foundation for further fine-tuning and merging fodder. Key modifications include:
+    ChatML-ified, with no additional tokens introduced.
+    High quality private instruct—not generated by ChatGPT or Claude, ensuring no slop and good markdown understanding.
+    No refusals—since it’s a base model, refusals should be minimal to non-existent, though, in early testing, occasional warnings still appear (I assume some were baked into the pre-train).
+    High-quality private creative writing dataset Mainly to dilute baked-in slop further, but it can actually write some stories, not bad for loss ~8.
+    Small, high-quality private RP dataset This was done so further tuning for RP will be easier. The dataset was kept small and contains ZERO SLOP, some entries are of 16k token length.
+    Exceptional adherence to character cards This was done to make it easier for further tunes intended for roleplay.
+  overrides:
+    parameters:
+      model: SicariusSicariiStuff_Redemption_Wind_24B-Q4_K_M.gguf
+  files:
+    - filename: SicariusSicariiStuff_Redemption_Wind_24B-Q4_K_M.gguf
+      sha256: 40025eb00d83c9e9393555962962a2dfc5251fe7bd70812835ff0bcc55ecc463
+      uri: huggingface://bartowski/SicariusSicariiStuff_Redemption_Wind_24B-GGUF/SicariusSicariiStuff_Redemption_Wind_24B-Q4_K_M.gguf
 - &mudler
   url: "github:mudler/LocalAI/gallery/mudler.yaml@master" ### START mudler's LocalAI specific-models
   name: "LocalAI-llama3-8b-function-call-v0.2"

From 7a5912908a6c8ae2791ddc6d5a733181ae02828a Mon Sep 17 00:00:00 2001
From: "LocalAI [bot]" <139863280+localai-bot@users.noreply.github.com>
Date: Sat, 8 Feb 2025 09:44:34 +0100
Subject: [PATCH 85/85] chore: :arrow_up: Update ggerganov/llama.cpp to
 `d2fe216fb2fb7ca8627618c9ea3a2e7886325780` (#4780)

:arrow_up: Update ggerganov/llama.cpp

Signed-off-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com>
Co-authored-by: mudler <2420543+mudler@users.noreply.github.com>
---
 Makefile | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/Makefile b/Makefile
index a1224035..01d5a14a 100644
--- a/Makefile
+++ b/Makefile
@@ -6,7 +6,7 @@ BINARY_NAME=local-ai
 DETECT_LIBS?=true
 
 # llama.cpp versions
-CPPLLAMA_VERSION?=8a59053f63fffc24e730cd3ea067760abfe4a919
+CPPLLAMA_VERSION?=d2fe216fb2fb7ca8627618c9ea3a2e7886325780
 
 # whisper.cpp version
 WHISPER_REPO?=https://github.com/ggerganov/whisper.cpp