From bd277162c73c6c0e0eba2011586bd5f32c1be65b Mon Sep 17 00:00:00 2001
From: "LocalAI [bot]" <139863280+localai-bot@users.noreply.github.com>
Date: Fri, 19 Jul 2024 23:56:58 +0200
Subject: [PATCH 01/69] docs: :arrow_up: update docs version mudler/LocalAI
 (#2926)

:arrow_up: Update docs version mudler/LocalAI

Signed-off-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com>
Co-authored-by: mudler <2420543+mudler@users.noreply.github.com>
---
 docs/data/version.json | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/docs/data/version.json b/docs/data/version.json
index 30b4b614..f54a5e67 100644
--- a/docs/data/version.json
+++ b/docs/data/version.json
@@ -1,3 +1,3 @@
 {
-  "version": "v2.18.1"
+  "version": "v2.19.0"
 }

From e75f73bf736d90a519ceea9804b8bbe96b93ec7f Mon Sep 17 00:00:00 2001
From: "LocalAI [bot]" <139863280+localai-bot@users.noreply.github.com>
Date: Sat, 20 Jul 2024 00:10:26 +0200
Subject: [PATCH 02/69] chore: :arrow_up: Update ggerganov/llama.cpp (#2927)

:arrow_up: Update ggerganov/llama.cpp

Signed-off-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com>
Co-authored-by: mudler <2420543+mudler@users.noreply.github.com>
---
 Makefile | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/Makefile b/Makefile
index df13cbfb..0f5ecd00 100644
--- a/Makefile
+++ b/Makefile
@@ -8,7 +8,7 @@ DETECT_LIBS?=true
 # llama.cpp versions
 GOLLAMA_REPO?=https://github.com/go-skynet/go-llama.cpp
 GOLLAMA_VERSION?=2b57a8ae43e4699d3dc5d1496a1ccd42922993be
-CPPLLAMA_VERSION?=705b7ecf60e667ced57c15d67aa86865e3cc7aa7
+CPPLLAMA_VERSION?=87e397d00bdcedd5cbf6dfda06a7b0f302462728
 
 # gpt4all version
 GPT4ALL_REPO?=https://github.com/nomic-ai/gpt4all

From f9f83791d1997cf0f1f88e7bdbad27190df9a5f5 Mon Sep 17 00:00:00 2001
From: Ettore Di Giacinto <mudler@users.noreply.github.com>
Date: Sat, 20 Jul 2024 09:15:48 +0200
Subject: [PATCH 03/69] ci(release): run also on tags

Signed-off-by: Ettore Di Giacinto <mudler@users.noreply.github.com>
---
 .github/workflows/release.yaml | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/.github/workflows/release.yaml b/.github/workflows/release.yaml
index 92e07326..b2c6c069 100644
--- a/.github/workflows/release.yaml
+++ b/.github/workflows/release.yaml
@@ -4,6 +4,8 @@ on:
   push:
     branches:
       - master
+    tags:
+      - 'v*'
   pull_request:
 
 env:

From 87bd831aba259df70091fe93cafebb830db8ef75 Mon Sep 17 00:00:00 2001
From: Ettore Di Giacinto <mudler@users.noreply.github.com>
Date: Sat, 20 Jul 2024 10:43:18 +0200
Subject: [PATCH 04/69] docs: add federation (#2929)

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
---
 .../docs/features/distributed_inferencing.md  | 91 ++++++++++++-------
 1 file changed, 60 insertions(+), 31 deletions(-)

diff --git a/docs/content/docs/features/distributed_inferencing.md b/docs/content/docs/features/distributed_inferencing.md
index abe34373..b7ce41a9 100644
--- a/docs/content/docs/features/distributed_inferencing.md
+++ b/docs/content/docs/features/distributed_inferencing.md
@@ -5,17 +5,65 @@ weight = 15
 url = "/features/distribute/"
 +++
 
+
+This functionality enables LocalAI to distribute inference requests across multiple worker nodes, improving efficiency and performance. Nodes are automatically discovered and connect via p2p by using a shared token which makes sure the communication is secure and private between the nodes of the network.
+
+LocalAI supports two modes of distributed inferencing via p2p:
+
+- **Federated Mode**: Requests are shared between the cluster and routed to a single worker node in the network based on the load balancer's decision.
+- **Worker Mode**: Requests are processed by all the workers which contributes to the final inference result (by sharing the model weights).
+
+## Usage
+
+Starting LocalAI with `--p2p` generates a shared token for connecting multiple instances: and that's all you need to create AI clusters, eliminating the need for intricate network setups. 
+
+Simply navigate to the "Swarm" section in the WebUI and follow the on-screen instructions.
+
+For fully shared instances, initiate LocalAI with --p2p --federated and adhere to the Swarm section's guidance. This feature, while still experimental, offers a tech preview quality experience.
+
+### Federated mode
+
+Federated mode allows to launch multiple LocalAI instances and connect them together in a federated network. This mode is useful when you want to distribute the load of the inference across multiple nodes, but you want to have a single point of entry for the API. In the Swarm section of the WebUI, you can see the instructions to connect multiple instances together.
+
+![346663124-1d2324fd-8b55-4fa2-9856-721a467969c2](https://github.com/user-attachments/assets/19ebd44a-20ff-412c-b92f-cfb8efbe4b21)
+
+To start a LocalAI server in federated mode, run:
+
+```bash
+local-ai run --p2p --federated
+```
+
+This will generate a token that you can use to connect other LocalAI instances to the network or others can use to join the network. If you already have a token, you can specify it using the `TOKEN` environment variable.
+
+To start a load balanced server that routes the requests to the network, run with the `TOKEN`:
+
+```bash
+local-ai federated
+```
+
+To see all the available options, run `local-ai federated --help`.
+
+The instructions are displayed in the "Swarm" section of the WebUI, guiding you through the process of connecting multiple instances.
+
+### Workers mode
+
 {{% alert note %}}
 This feature is available exclusively with llama-cpp compatible models.
 
 This feature was introduced in [LocalAI pull request #2324](https://github.com/mudler/LocalAI/pull/2324) and is based on the upstream work in [llama.cpp pull request #6829](https://github.com/ggerganov/llama.cpp/pull/6829).
 {{% /alert %}}
 
-This functionality enables LocalAI to distribute inference requests across multiple worker nodes, improving efficiency and performance.
+To connect multiple workers to a single LocalAI instance, start first a server in p2p mode:
 
-## Usage
+```bash
+local-ai run --p2p
+```
 
-### Starting Workers
+And navigate the WebUI to the "Swarm" section to see the instructions to connect multiple workers to the network.
+
+![346663124-1d2324fd-8b55-4fa2-9856-721a467969c2](https://github.com/user-attachments/assets/b8cadddf-a467-49cf-a1ed-8850de95366d)
+
+### Without P2P
 
 To start workers for distributing the computational load, run:
 
@@ -23,48 +71,27 @@ To start workers for distributing the computational load, run:
 local-ai worker llama-cpp-rpc <listening_address> <listening_port>
 ```
 
-Alternatively, you can build the RPC server following the llama.cpp [README](https://github.com/ggerganov/llama.cpp/blob/master/examples/rpc/README.md), which is compatible with LocalAI.
-
-### Starting LocalAI
-
-To start the LocalAI server, which handles API requests, specify the worker addresses using the `LLAMACPP_GRPC_SERVERS` environment variable:
+And you can specify the address of the workers when starting LocalAI with the `LLAMACPP_GRPC_SERVERS` environment variable:
 
 ```bash
 LLAMACPP_GRPC_SERVERS="address1:port,address2:port" local-ai run
 ```
-
 The workload on the LocalAI server will then be distributed across the specified nodes.
 
-## Peer-to-Peer Networking
+Alternatively, you can build the RPC workers/server following the llama.cpp [README](https://github.com/ggerganov/llama.cpp/blob/master/examples/rpc/README.md), which is compatible with LocalAI.
 
-![output](https://github.com/mudler/LocalAI/assets/2420543/8ca277cf-c208-4562-8929-808b2324b584)
+## Manual example (worker)
 
-Workers can also connect to each other in a peer-to-peer network, distributing the workload in a decentralized manner.
-
-A shared token between the server and the workers is required for communication within the peer-to-peer network. This feature supports both local network (using mDNS discovery) and DHT for communication across different networks.
-
-The token is automatically generated when starting the server with the `--p2p` flag. Workers can be started with the token using `local-ai worker p2p-llama-cpp-rpc` and specifying the token via the environment variable `TOKEN` or with the `--token` argument.
-
-A network is established between the server and workers using DHT and mDNS discovery protocols. The llama.cpp RPC server is automatically started and exposed to the peer-to-peer network, allowing the API server to connect.
-
-When the HTTP server starts, it discovers workers in the network and creates port forwards to the local service. Llama.cpp is configured to use these services. For more details on the implementation, refer to [LocalAI pull request #2343](https://github.com/mudler/LocalAI/pull/2343).
-
-### Usage
+Use the WebUI to guide you in the process of starting new workers. This example shows the manual steps to highlight the process.
 
 1. Start the server with `--p2p`:
 
 ```bash
 ./local-ai run --p2p
-# 1:02AM INF loading environment variables from file envFile=.env
-# 1:02AM INF Setting logging to info
-# 1:02AM INF P2P mode enabled
-# 1:02AM INF No token provided, generating one
-# 1:02AM INF Generated Token:
-# XXXXXXXXXXX
-# 1:02AM INF Press a button to proceed
+# Get the token in the Swarm section of the WebUI
 ```
 
-Copy the displayed token and press Enter.
+Copy the token from the WebUI or via API call (e.g., `curl http://localhost:8000/p2p/token`) and save it for later use.
 
 To reuse the same token later, restart the server with `--p2ptoken` or `P2P_TOKEN`.
 
@@ -93,12 +120,14 @@ The server logs should indicate that new workers are being discovered.
 
 3. Start inference as usual on the server initiated in step 1.
 
+![output](https://github.com/mudler/LocalAI/assets/2420543/8ca277cf-c208-4562-8929-808b2324b584)
+
 ## Notes
 
 - If running in p2p mode with container images, make sure you start the container with `--net host` or `network_mode: host` in the docker-compose file.
 - Only a single model is supported currently.
 - Ensure the server detects new workers before starting inference. Currently, additional workers cannot be added once inference has begun.
-
+- For more details on the implementation, refer to [LocalAI pull request #2343](https://github.com/mudler/LocalAI/pull/2343)
 
 ## Environment Variables
 

From 0ee1f8c1cffc4e0abc8b5125e4683ada273dc871 Mon Sep 17 00:00:00 2001
From: Ettore Di Giacinto <mudler@users.noreply.github.com>
Date: Sat, 20 Jul 2024 10:43:34 +0200
Subject: [PATCH 05/69] ci(Makefile): enable p2p on cross-arm64 builds (#2928)

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
---
 Makefile | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/Makefile b/Makefile
index 0f5ecd00..a8b7a832 100644
--- a/Makefile
+++ b/Makefile
@@ -421,7 +421,7 @@ else
 endif
 
 dist-cross-linux-arm64:
-	CMAKE_ARGS="$(CMAKE_ARGS) -DGGML_NATIVE=off" GRPC_BACKENDS="backend-assets/grpc/llama-cpp-fallback backend-assets/grpc/llama-cpp-grpc backend-assets/util/llama-cpp-rpc-server" \
+	CMAKE_ARGS="$(CMAKE_ARGS) -DGGML_NATIVE=off" GRPC_BACKENDS="backend-assets/grpc/llama-cpp-fallback backend-assets/grpc/llama-cpp-grpc backend-assets/util/llama-cpp-rpc-server" GO_TAGS="p2p" \
 	STATIC=true $(MAKE) build
 	mkdir -p release
 # if BUILD_ID is empty, then we don't append it to the binary name

From 46b86f7e6eb96e1146a52928b4fc538523e8ebc8 Mon Sep 17 00:00:00 2001
From: Ettore Di Giacinto <mudler@users.noreply.github.com>
Date: Sat, 20 Jul 2024 16:03:44 +0200
Subject: [PATCH 06/69] models(gallery): add tulu 8b and 70b (#2931)

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
---
 gallery/index.yaml | 30 ++++++++++++++++++++++++++++++
 1 file changed, 30 insertions(+)

diff --git a/gallery/index.yaml b/gallery/index.yaml
index c130c570..aef6c239 100644
--- a/gallery/index.yaml
+++ b/gallery/index.yaml
@@ -3146,6 +3146,36 @@
     - filename: L3-8B-Celeste-v1-Q4_K_M.gguf
       sha256: ed5277719965fb6bbcce7d16742e3bac4a8d5b8f52133261a3402a480cd65317
       uri: huggingface://bartowski/L3-8B-Celeste-v1-GGUF/L3-8B-Celeste-v1-Q4_K_M.gguf
+- !!merge <<: *llama3
+  name: "llama-3-tulu-2-8b-i1"
+  icon: https://huggingface.co/datasets/allenai/blog-images/resolve/main/tulu-v2/Tulu%20V2%20banner.png
+  urls:
+    - https://huggingface.co/allenai/llama-3-tulu-2-8b
+    - https://huggingface.co/mradermacher/llama-3-tulu-2-8b-i1-GGUF
+  description: |
+    Tulu is a series of language models that are trained to act as helpful assistants. Llama 3 Tulu V2 8B is a fine-tuned version of Llama 3 that was trained on a mix of publicly available, synthetic and human datasets.
+  overrides:
+    parameters:
+      model: llama-3-tulu-2-8b.i1-Q4_K_M.gguf
+  files:
+    - filename: llama-3-tulu-2-8b.i1-Q4_K_M.gguf
+      sha256: f859c22bfa64f461e9ffd973dc7ad6a78bb98b1dda6f49abfa416a4022b7e333
+      uri: huggingface://mradermacher/llama-3-tulu-2-8b-i1-GGUF/llama-3-tulu-2-8b.i1-Q4_K_M.gguf
+- !!merge <<: *llama3
+  name: "llama-3-tulu-2-dpo-70b-i1"
+  icon: https://huggingface.co/datasets/allenai/blog-images/resolve/main/tulu-v2/Tulu%20V2%20banner.png
+  urls:
+    - https://huggingface.co/allenai/llama-3-tulu-2-dpo-70b
+    - https://huggingface.co/mradermacher/llama-3-tulu-2-dpo-70b-i1-GGUF
+  description: |
+    Tulu is a series of language models that are trained to act as helpful assistants. Llama 3 Tulu V2 8B is a fine-tuned version of Llama 3 that was trained on a mix of publicly available, synthetic and human datasets.
+  overrides:
+    parameters:
+      model: llama-3-tulu-2-dpo-70b.i1-Q4_K_M.gguf
+  files:
+    - filename: llama-3-tulu-2-dpo-70b.i1-Q4_K_M.gguf
+      sha256: fc309bbdf1e2bdced954c4c8dc1f9a885c547017ee5e750bfde645af89e3d3a5
+      uri: huggingface://mradermacher/llama-3-tulu-2-dpo-70b-i1-GGUF/llama-3-tulu-2-dpo-70b.i1-Q4_K_M.gguf
 - &command-R
   ### START Command-r
   url: "github:mudler/LocalAI/gallery/command-r.yaml@master"

From 450dbed820e364f87eede055e898613e14172a1f Mon Sep 17 00:00:00 2001
From: Ettore Di Giacinto <mudler@users.noreply.github.com>
Date: Sat, 20 Jul 2024 16:16:29 +0200
Subject: [PATCH 07/69] models(gallery): add suzume-orpo (#2932)

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
---
 gallery/index.yaml | 22 ++++++++++++++++++++++
 1 file changed, 22 insertions(+)

diff --git a/gallery/index.yaml b/gallery/index.yaml
index aef6c239..63664070 100644
--- a/gallery/index.yaml
+++ b/gallery/index.yaml
@@ -3176,6 +3176,28 @@
     - filename: llama-3-tulu-2-dpo-70b.i1-Q4_K_M.gguf
       sha256: fc309bbdf1e2bdced954c4c8dc1f9a885c547017ee5e750bfde645af89e3d3a5
       uri: huggingface://mradermacher/llama-3-tulu-2-dpo-70b-i1-GGUF/llama-3-tulu-2-dpo-70b.i1-Q4_K_M.gguf
+- !!merge <<: *llama3
+  license: cc-by-nc-4.0
+  name: "suzume-llama-3-8b-multilingual-orpo-borda-top25"
+  icon: https://cdn-uploads.huggingface.co/production/uploads/64b63f8ad57e02621dc93c8b/kWQSu02YfgYdUQqv4s5lq.png
+  urls:
+    - https://huggingface.co/lightblue/suzume-llama-3-8B-multilingual-orpo-borda-top25
+    - https://huggingface.co/RichardErkhov/lightblue_-_suzume-llama-3-8B-multilingual-orpo-borda-top25-gguf
+  description: |
+    This is Suzume ORPO, an ORPO trained fine-tune of the lightblue/suzume-llama-3-8B-multilingual model using our lightblue/mitsu dataset.
+
+    We have trained several versions of this model using ORPO and so recommend that you use the best performing model from our tests, lightblue/suzume-llama-3-8B-multilingual-orpo-borda-half.
+
+    Note that this model has a non-commerical license as we used the Command R and Command R+ models to generate our training data for this model (lightblue/mitsu).
+
+    We are currently working on a developing a commerically usable model, so stay tuned for that!
+  overrides:
+    parameters:
+      model: suzume-llama-3-8B-multilingual-orpo-borda-top25.Q4_K_M.gguf
+  files:
+    - filename: suzume-llama-3-8B-multilingual-orpo-borda-top25.Q4_K_M.gguf
+      sha256: ef75a02c5f38e14a8873c7989188dac6974851b4654279fe1921d2c8018cc388
+      uri: huggingface://RichardErkhov/lightblue_-_suzume-llama-3-8B-multilingual-orpo-borda-top25-gguf/suzume-llama-3-8B-multilingual-orpo-borda-top25.Q4_K_M.gguf
 - &command-R
   ### START Command-r
   url: "github:mudler/LocalAI/gallery/command-r.yaml@master"

From f505d7ab3f4dabf927413d42691adb37bd46f131 Mon Sep 17 00:00:00 2001
From: Ettore Di Giacinto <mudler@users.noreply.github.com>
Date: Sat, 20 Jul 2024 16:17:34 +0200
Subject: [PATCH 08/69] models(gallery): add archangel_sft_pythia2-8b (#2933)

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
---
 gallery/index.yaml  | 27 +++++++++++++++++++++++++++
 gallery/tuluv2.yaml | 43 +++++++++++++++++++++++++++++++++++++++++++
 2 files changed, 70 insertions(+)
 create mode 100644 gallery/tuluv2.yaml

diff --git a/gallery/index.yaml b/gallery/index.yaml
index 63664070..2ef3d46b 100644
--- a/gallery/index.yaml
+++ b/gallery/index.yaml
@@ -24,6 +24,33 @@
     - filename: DeepSeek-Coder-V2-Lite-Instruct-Q4_K_M.gguf
       sha256: 50ec78036433265965ed1afd0667c00c71c12aa70bcf383be462cb8e159db6c0
       uri: huggingface://LoneStriker/DeepSeek-Coder-V2-Lite-Instruct-GGUF/DeepSeek-Coder-V2-Lite-Instruct-Q4_K_M.gguf
+- name: "archangel_sft_pythia2-8b"
+  url: "github:mudler/LocalAI/gallery/tuluv2.yaml@master"
+  icon: https://gist.github.com/assets/29318529/fe2d8391-dbd1-4b7e-9dc4-7cb97e55bc06
+  license: apache-2.0
+  urls:
+    - https://huggingface.co/ContextualAI/archangel_sft_pythia2-8b
+    - https://huggingface.co/RichardErkhov/ContextualAI_-_archangel_sft_pythia2-8b-gguf
+    - https://github.com/ContextualAI/HALOs
+  description: |
+    datasets:
+    - stanfordnlp/SHP
+    - Anthropic/hh-rlhf
+    - OpenAssistant/oasst1
+
+    This repo contains the model checkpoints for:
+    - model family pythia2-8b
+    - optimized with the loss SFT
+    - aligned using the SHP, Anthropic HH and Open Assistant datasets.
+
+    Please refer to our [code repository](https://github.com/ContextualAI/HALOs) or [blog](https://contextual.ai/better-cheaper-faster-llm-alignment-with-kto/) which contains intructions for training your own HALOs and links to our model cards.
+  overrides:
+    parameters:
+      model: archangel_sft_pythia2-8b.Q4_K_M.gguf
+  files:
+    - filename: archangel_sft_pythia2-8b.Q4_K_M.gguf
+      sha256: a47782c55ef2b39b19644213720a599d9849511a73c9ebb0c1de749383c0a0f8
+      uri: huggingface://RichardErkhov/ContextualAI_-_archangel_sft_pythia2-8b-gguf/archangel_sft_pythia2-8b.Q4_K_M.gguf
 - &qwen2
   ## Start QWEN2
   url: "github:mudler/LocalAI/gallery/chatml.yaml@master"
diff --git a/gallery/tuluv2.yaml b/gallery/tuluv2.yaml
new file mode 100644
index 00000000..ca2785a2
--- /dev/null
+++ b/gallery/tuluv2.yaml
@@ -0,0 +1,43 @@
+---
+name: "tuluv2"
+
+config_file: |
+  mmap: true
+  template:
+    chat_message: |
+      <|{{ .RoleName }}|>
+      {{ if .FunctionCall -}}
+      Function call:
+      {{ else if eq .RoleName "tool" -}}
+      Function response:
+      {{ end -}}
+      {{ if .Content -}}
+      {{.Content }}
+      {{ end -}}
+      {{ if .FunctionCall -}}
+      {{toJson .FunctionCall}}
+      {{ end -}}
+    function: |
+      <|{{ .RoleName }}|>
+      {{ if .FunctionCall -}}
+      Function call:
+      {{ else if eq .RoleName "tool" -}}
+      Function response:
+      {{ end -}}
+      {{ if .Content -}}
+      {{.Content }}
+      {{ end -}}
+      {{ if .FunctionCall -}}
+      {{toJson .FunctionCall}}
+      {{ end -}}
+    chat: |
+      {{.Input -}}
+      <|assistant|>
+    completion: |
+      {{.Input}}
+  context_size: 4096
+  f16: true
+  stopwords:
+  - '<|im_end|>'
+  - '<dummy32000>'
+  - '<|endoftext|>'

From 8667a67695eed2625e361ea3d34b220e8568f783 Mon Sep 17 00:00:00 2001
From: "LocalAI [bot]" <139863280+localai-bot@users.noreply.github.com>
Date: Sat, 20 Jul 2024 23:33:54 +0200
Subject: [PATCH 09/69] docs: :arrow_up: update docs version mudler/LocalAI
 (#2935)

:arrow_up: Update docs version mudler/LocalAI

Signed-off-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com>
Co-authored-by: mudler <2420543+mudler@users.noreply.github.com>
---
 docs/data/version.json | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/docs/data/version.json b/docs/data/version.json
index f54a5e67..fff9fa0c 100644
--- a/docs/data/version.json
+++ b/docs/data/version.json
@@ -1,3 +1,3 @@
 {
-  "version": "v2.19.0"
+  "version": "v2.19.1"
 }

From 86509e6002948c20ca987bc5dbcacc2bef1e65a4 Mon Sep 17 00:00:00 2001
From: "LocalAI [bot]" <139863280+localai-bot@users.noreply.github.com>
Date: Sat, 20 Jul 2024 23:35:21 +0200
Subject: [PATCH 10/69] chore: :arrow_up: Update ggerganov/llama.cpp (#2936)

:arrow_up: Update ggerganov/llama.cpp

Signed-off-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com>
Co-authored-by: mudler <2420543+mudler@users.noreply.github.com>
---
 Makefile | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/Makefile b/Makefile
index a8b7a832..906e8ca5 100644
--- a/Makefile
+++ b/Makefile
@@ -8,7 +8,7 @@ DETECT_LIBS?=true
 # llama.cpp versions
 GOLLAMA_REPO?=https://github.com/go-skynet/go-llama.cpp
 GOLLAMA_VERSION?=2b57a8ae43e4699d3dc5d1496a1ccd42922993be
-CPPLLAMA_VERSION?=87e397d00bdcedd5cbf6dfda06a7b0f302462728
+CPPLLAMA_VERSION?=07283b1a90e1320aae4762c7e03c879043910252
 
 # gpt4all version
 GPT4ALL_REPO?=https://github.com/nomic-ai/gpt4all

From ef5e8326c8c4820fb89bb960cc45377c415dff92 Mon Sep 17 00:00:00 2001
From: Ettore Di Giacinto <mudler@users.noreply.github.com>
Date: Sun, 21 Jul 2024 10:31:44 +0200
Subject: [PATCH 11/69] models(gallery): add celestev1.2 (#2937)

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
---
 gallery/index.yaml | 16 ++++++++++++++++
 1 file changed, 16 insertions(+)

diff --git a/gallery/index.yaml b/gallery/index.yaml
index 2ef3d46b..46f9f7c7 100644
--- a/gallery/index.yaml
+++ b/gallery/index.yaml
@@ -3173,6 +3173,22 @@
     - filename: L3-8B-Celeste-v1-Q4_K_M.gguf
       sha256: ed5277719965fb6bbcce7d16742e3bac4a8d5b8f52133261a3402a480cd65317
       uri: huggingface://bartowski/L3-8B-Celeste-v1-GGUF/L3-8B-Celeste-v1-Q4_K_M.gguf
+- !!merge <<: *llama3
+  name: "l3-8b-celeste-v1.2"
+  icon: https://cdn-uploads.huggingface.co/production/uploads/630cf5d14ca0a22768bbe10c/Zv__LDTO-nHvpuxPcCgUU.webp
+  urls:
+    - https://huggingface.co/mudler/L3-8B-Celeste-V1.2-Q4_K_M-GGUF
+  description: |
+    Trained on LLaMA 3 8B Instruct at 8K context using Reddit Writing Prompts, Opus 15K Instruct an c2 logs cleaned.
+
+    This is a roleplay model any instruction following capabilities outside roleplay contexts are coincidental.
+  overrides:
+    parameters:
+      model: l3-8b-celeste-v1.2-q4_k_m.gguf
+  files:
+    - filename: l3-8b-celeste-v1.2-q4_k_m.gguf
+      sha256: 7752204c0e9f627ff5726eb69bb6114974cafbc934a993ad019abfba62002783
+      uri: huggingface://mudler/L3-8B-Celeste-V1.2-Q4_K_M-GGUF/l3-8b-celeste-v1.2-q4_k_m.gguf
 - !!merge <<: *llama3
   name: "llama-3-tulu-2-8b-i1"
   icon: https://huggingface.co/datasets/allenai/blog-images/resolve/main/tulu-v2/Tulu%20V2%20banner.png

From 77ad49333a2ad8a2d7bdf1ad25aba3de93eee720 Mon Sep 17 00:00:00 2001
From: Ettore Di Giacinto <mudler@users.noreply.github.com>
Date: Sun, 21 Jul 2024 21:45:04 +0200
Subject: [PATCH 12/69] models(gallery): add calme-2.3-phi3-4b (#2939)

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
---
 gallery/index.yaml | 16 ++++++++++++++++
 1 file changed, 16 insertions(+)

diff --git a/gallery/index.yaml b/gallery/index.yaml
index 46f9f7c7..dc2e5007 100644
--- a/gallery/index.yaml
+++ b/gallery/index.yaml
@@ -3500,7 +3500,23 @@
     - filename: phillama-3.8b-v0.1.Q4_K_M.gguf
       sha256: da537d352b7aae54bbad0d2cff3e3a1b0e1dc1e1d25bec3aae1d05cf4faee7a2
       uri: huggingface://RichardErkhov/raincandy-u_-_phillama-3.8b-v0.1-gguf/phillama-3.8b-v0.1.Q4_K_M.gguf
+- !!merge <<: *llama3
+  name: "calme-2.3-phi3-4b"
+  icon: https://huggingface.co/MaziyarPanahi/calme-2.1-phi3-4b/resolve/main/phi-3-instruct.webp
+  urls:
+    - https://huggingface.co/MaziyarPanahi/calme-2.3-phi3-4b
+    - https://huggingface.co/MaziyarPanahi/calme-2.3-phi3-4b-GGUF
+  description: |
+    MaziyarPanahi/calme-2.1-phi3-4b
 
+    This model is a fine-tune (DPO) of microsoft/Phi-3-mini-4k-instruct model.
+  overrides:
+    parameters:
+      model: Phi-3-mini-4k-instruct-v0.3.Q4_K_M.gguf
+  files:
+    - filename: Phi-3-mini-4k-instruct-v0.3.Q4_K_M.gguf
+      sha256: 3a23e1052369c080afb925882bd814cbea5ec859894655a7434c3d49e43a6127
+      uri: huggingface://MaziyarPanahi/calme-2.3-phi3-4b-GGUF/Phi-3-mini-4k-instruct-v0.3.Q4_K_M.gguf
 - &hermes-2-pro-mistral
   ### START Hermes
   url: "github:mudler/LocalAI/gallery/hermes-2-pro-mistral.yaml@master"

From 3f7eddb039226c29a3398394c50f35f5c1d8105e Mon Sep 17 00:00:00 2001
From: Ettore Di Giacinto <mudler@users.noreply.github.com>
Date: Sun, 21 Jul 2024 21:51:52 +0200
Subject: [PATCH 13/69] models(gallery): add calme-2.8-qwen2-7b (#2940)

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
---
 gallery/index.yaml | 15 +++++++++++++++
 1 file changed, 15 insertions(+)

diff --git a/gallery/index.yaml b/gallery/index.yaml
index dc2e5007..3fd8def2 100644
--- a/gallery/index.yaml
+++ b/gallery/index.yaml
@@ -247,6 +247,21 @@
     - filename: Qwen2-Wukong-7B-Q4_K_M.gguf
       sha256: 6b8ca6649c33fc84d4892ebcff1214f0b34697aced784f0d6d32e284a15943ad
       uri: huggingface://bartowski/Qwen2-Wukong-7B-GGUF/Qwen2-Wukong-7B-Q4_K_M.gguf
+- !!merge <<: *qwen2
+  name: "calme-2.8-qwen2-7b"
+  icon: https://huggingface.co/MaziyarPanahi/calme-2.8-qwen2-7b/resolve/main/qwen2-fine-tunes-maziyar-panahi.webp
+  urls:
+    - https://huggingface.co/MaziyarPanahi/calme-2.8-qwen2-7b
+    - https://huggingface.co/MaziyarPanahi/calme-2.8-qwen2-7b-GGUF
+  description: |
+    This is a fine-tuned version of the Qwen/Qwen2-7B model. It aims to improve the base model across all benchmarks.
+  overrides:
+    parameters:
+      model: Qwen2-7B-Instruct-v0.8.Q4_K_M.gguf
+  files:
+    - filename: Qwen2-7B-Instruct-v0.8.Q4_K_M.gguf
+      sha256: 8c1b3efe9fa6ae1b37942ef26473cb4e0aed0f8038b60d4b61e5bffb61e49b7e
+      uri: huggingface://MaziyarPanahi/calme-2.8-qwen2-7b-GGUF/Qwen2-7B-Instruct-v0.8.Q4_K_M.gguf
 - &mistral03
   ## START Mistral
   url: "github:mudler/LocalAI/gallery/mistral-0.3.yaml@master"

From 9c0c11e8a05717c685381650e3b640341faf4683 Mon Sep 17 00:00:00 2001
From: Ettore Di Giacinto <mudler@users.noreply.github.com>
Date: Sun, 21 Jul 2024 21:57:30 +0200
Subject: [PATCH 14/69] models(gallery): add StellarDong-72b (#2941)

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
---
 gallery/index.yaml | 15 +++++++++++++++
 1 file changed, 15 insertions(+)

diff --git a/gallery/index.yaml b/gallery/index.yaml
index 3fd8def2..63e3b49f 100644
--- a/gallery/index.yaml
+++ b/gallery/index.yaml
@@ -262,6 +262,21 @@
     - filename: Qwen2-7B-Instruct-v0.8.Q4_K_M.gguf
       sha256: 8c1b3efe9fa6ae1b37942ef26473cb4e0aed0f8038b60d4b61e5bffb61e49b7e
       uri: huggingface://MaziyarPanahi/calme-2.8-qwen2-7b-GGUF/Qwen2-7B-Instruct-v0.8.Q4_K_M.gguf
+- !!merge <<: *qwen2
+  name: "stellardong-72b-i1"
+  icon: https://huggingface.co/smelborp/StellarDong-72b/resolve/main/stellardong.png
+  urls:
+    - https://huggingface.co/smelborp/StellarDong-72b
+    - https://huggingface.co/mradermacher/StellarDong-72b-i1-GGUF
+  description: |
+    Magnum + Nova = you won't believe how stellar this dong is!!
+  overrides:
+    parameters:
+      model: StellarDong-72b.i1-Q4_K_M.gguf
+  files:
+    - filename: StellarDong-72b.i1-Q4_K_M.gguf
+      sha256: 4c5012f0a034f40a044904891343ade2594f29c28a8a9d8052916de4dc5a61df
+      uri: huggingface://mradermacher/StellarDong-72b-i1-GGUF/StellarDong-72b.i1-Q4_K_M.gguf
 - &mistral03
   ## START Mistral
   url: "github:mudler/LocalAI/gallery/mistral-0.3.yaml@master"

From 19282af0596c0f95ac028ff552647f2c9fa07b32 Mon Sep 17 00:00:00 2001
From: Ettore Di Giacinto <mudler@users.noreply.github.com>
Date: Sun, 21 Jul 2024 22:01:15 +0200
Subject: [PATCH 15/69] models(gallery): add calme-2.4-llama3-70b (#2942)

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
---
 gallery/index.yaml | 15 +++++++++++++++
 1 file changed, 15 insertions(+)

diff --git a/gallery/index.yaml b/gallery/index.yaml
index 63e3b49f..31af59a3 100644
--- a/gallery/index.yaml
+++ b/gallery/index.yaml
@@ -3271,6 +3271,21 @@
     - filename: suzume-llama-3-8B-multilingual-orpo-borda-top25.Q4_K_M.gguf
       sha256: ef75a02c5f38e14a8873c7989188dac6974851b4654279fe1921d2c8018cc388
       uri: huggingface://RichardErkhov/lightblue_-_suzume-llama-3-8B-multilingual-orpo-borda-top25-gguf/suzume-llama-3-8B-multilingual-orpo-borda-top25.Q4_K_M.gguf
+- !!merge <<: *llama3
+  name: "calme-2.4-llama3-70b"
+  icon: https://huggingface.co/MaziyarPanahi/calme-2.4-llama3-70b/resolve/main/llama-3-merges.webp
+  urls:
+    - https://huggingface.co/MaziyarPanahi/calme-2.4-llama3-70b
+    - https://huggingface.co/mradermacher/calme-2.4-llama3-70b-GGUF
+  description: |
+    This model is a fine-tune (DPO) of meta-llama/Meta-Llama-3-70B-Instruct model.
+  overrides:
+    parameters:
+      model: calme-2.4-llama3-70b.Q4_K_M.gguf
+  files:
+    - filename: calme-2.4-llama3-70b.Q4_K_M.gguf
+      sha256: 0b44ac8a88395dfc60f1b9d3cfffc0ffef74ec0a302e610ef91fc787187568f2
+      uri: huggingface://mradermacher/calme-2.4-llama3-70b-GGUF/calme-2.4-llama3-70b.Q4_K_M.gguf
 - &command-R
   ### START Command-r
   url: "github:mudler/LocalAI/gallery/command-r.yaml@master"

From bcd9e153ba1e7efae8f9ec8ab3778310ec1a1818 Mon Sep 17 00:00:00 2001
From: Ettore Di Giacinto <mudler@users.noreply.github.com>
Date: Mon, 22 Jul 2024 15:39:57 +0200
Subject: [PATCH 16/69] ci(Makefile): reduce binary size by compressing (#2947)

Makefile: try to reduce binary size

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
---
 .github/workflows/release.yaml |  6 ++---
 .github/workflows/test.yml     |  2 +-
 Dockerfile                     |  2 +-
 Makefile                       | 47 +++++++++++++++++++++++++++++++++-
 4 files changed, 51 insertions(+), 6 deletions(-)

diff --git a/.github/workflows/release.yaml b/.github/workflows/release.yaml
index b2c6c069..faed2b81 100644
--- a/.github/workflows/release.yaml
+++ b/.github/workflows/release.yaml
@@ -35,7 +35,7 @@ jobs:
       - name: Dependencies
         run: |
           sudo apt-get update
-          sudo apt-get install build-essential ffmpeg protobuf-compiler ccache gawk
+          sudo apt-get install build-essential ffmpeg protobuf-compiler ccache upx-ucl gawk
           sudo apt-get install -qy binutils-aarch64-linux-gnu gcc-aarch64-linux-gnu g++-aarch64-linux-gnu libgmock-dev
       - name: Install CUDA Dependencies
         run: |
@@ -151,7 +151,7 @@ jobs:
       - name: Dependencies
         run: |
           sudo apt-get update
-          sudo apt-get install -y wget curl build-essential ffmpeg protobuf-compiler ccache gawk cmake libgmock-dev
+          sudo apt-get install -y wget curl build-essential ffmpeg protobuf-compiler ccache upx-ucl gawk cmake libgmock-dev
       - name: Intel Dependencies
         run: |
           wget -O- https://apt.repos.intel.com/intel-gpg-keys/GPG-PUB-KEY-INTEL-SW-PRODUCTS.PUB | gpg --dearmor | sudo tee /usr/share/keyrings/oneapi-archive-keyring.gpg > /dev/null
@@ -252,7 +252,7 @@ jobs:
       - name: Dependencies
         run: |
           sudo apt-get update
-          sudo apt-get install -y --no-install-recommends libopencv-dev protobuf-compiler ccache
+          sudo apt-get install -y --no-install-recommends libopencv-dev protobuf-compiler ccache upx-ucl
           go install google.golang.org/grpc/cmd/protoc-gen-go-grpc@1958fcbe2ca8bd93af633f11e97d44e567e945af
           go install google.golang.org/protobuf/cmd/protoc-gen-go@v1.34.2
       - name: Build stablediffusion
diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml
index 084d016d..e6efe77f 100644
--- a/.github/workflows/test.yml
+++ b/.github/workflows/test.yml
@@ -70,7 +70,7 @@ jobs:
       - name: Dependencies
         run: |
           sudo apt-get update
-          sudo apt-get install build-essential curl ffmpeg
+          sudo apt-get install build-essential ccache upx-ucl curl ffmpeg
           sudo apt-get install -y libgmock-dev
           curl https://repo.anaconda.com/pkgs/misc/gpgkeys/anaconda.asc | gpg --dearmor > conda.gpg && \
              sudo install -o root -g root -m 644 conda.gpg /usr/share/keyrings/conda-archive-keyring.gpg && \
diff --git a/Dockerfile b/Dockerfile
index 78ed4cd3..fcad8343 100644
--- a/Dockerfile
+++ b/Dockerfile
@@ -24,7 +24,7 @@ RUN apt-get update && \
         cmake \
         curl \
         git \
-        unzip && \
+        unzip upx-ucl && \
     apt-get clean && \
     rm -rf /var/lib/apt/lists/*
 
diff --git a/Makefile b/Makefile
index 906e8ca5..882b6fe6 100644
--- a/Makefile
+++ b/Makefile
@@ -58,7 +58,7 @@ RANDOM := $(shell bash -c 'echo $$RANDOM')
 
 VERSION?=$(shell git describe --always --tags || echo "dev" )
 # go tool nm ./local-ai | grep Commit
-LD_FLAGS?=
+LD_FLAGS?=-s -w
 override LD_FLAGS += -X "github.com/mudler/LocalAI/internal.Version=$(VERSION)"
 override LD_FLAGS += -X "github.com/mudler/LocalAI/internal.Commit=$(shell git rev-parse HEAD)"
 
@@ -72,6 +72,14 @@ WHITE  := $(shell tput -Txterm setaf 7)
 CYAN   := $(shell tput -Txterm setaf 6)
 RESET  := $(shell tput -Txterm sgr0)
 
+UPX?=
+# check if upx exists
+ifeq (, $(shell which upx))
+	UPX=
+else
+	UPX=$(shell which upx)
+endif
+
 # Default Docker bridge IP
 E2E_BRIDGE_IP?=172.17.0.1
 
@@ -377,6 +385,7 @@ build: prepare backend-assets grpcs ## Build the project
 	$(info ${GREEN}I BUILD_TYPE: ${YELLOW}$(BUILD_TYPE)${RESET})
 	$(info ${GREEN}I GO_TAGS: ${YELLOW}$(GO_TAGS)${RESET})
 	$(info ${GREEN}I LD_FLAGS: ${YELLOW}$(LD_FLAGS)${RESET})
+	$(info ${GREEN}I UPX: ${YELLOW}$(UPX)${RESET})
 ifneq ($(BACKEND_LIBS),)
 	$(MAKE) backend-assets/lib
 	cp -f $(BACKEND_LIBS) backend-assets/lib/
@@ -733,13 +742,22 @@ backend-assets/grpc: protogen-go replace
 backend-assets/grpc/bert-embeddings: sources/go-bert.cpp sources/go-bert.cpp/libgobert.a backend-assets/grpc
 	CGO_LDFLAGS="$(CGO_LDFLAGS)" C_INCLUDE_PATH=$(CURDIR)/sources/go-bert.cpp LIBRARY_PATH=$(CURDIR)/sources/go-bert.cpp \
 	$(GOCMD) build -ldflags "$(LD_FLAGS)" -tags "$(GO_TAGS)" -o backend-assets/grpc/bert-embeddings ./backend/go/llm/bert/
+ifneq ($(UPX),)
+	$(UPX) backend-assets/grpc/bert-embeddings
+endif
 
 backend-assets/grpc/gpt4all: sources/gpt4all sources/gpt4all/gpt4all-bindings/golang/libgpt4all.a backend-assets/gpt4all backend-assets/grpc
 	CGO_LDFLAGS="$(CGO_LDFLAGS)" C_INCLUDE_PATH=$(CURDIR)/sources/gpt4all/gpt4all-bindings/golang/ LIBRARY_PATH=$(CURDIR)/sources/gpt4all/gpt4all-bindings/golang/ \
 	$(GOCMD) build -ldflags "$(LD_FLAGS)" -tags "$(GO_TAGS)" -o backend-assets/grpc/gpt4all ./backend/go/llm/gpt4all/
+ifneq ($(UPX),)
+	$(UPX) backend-assets/grpc/gpt4all
+endif
 
 backend-assets/grpc/huggingface: backend-assets/grpc
 	$(GOCMD) build -ldflags "$(LD_FLAGS)" -tags "$(GO_TAGS)" -o backend-assets/grpc/huggingface ./backend/go/llm/langchain/
+ifneq ($(UPX),)
+	$(UPX) backend-assets/grpc/huggingface
+endif
 
 backend/cpp/llama/llama.cpp:
 	LLAMA_VERSION=$(CPPLLAMA_VERSION) $(MAKE) -C backend/cpp/llama llama.cpp
@@ -765,6 +783,9 @@ else
 	echo "BUILD_GRPC_FOR_BACKEND_LLAMA is not defined."
 	LLAMA_VERSION=$(CPPLLAMA_VERSION) $(MAKE) -C backend/cpp/${VARIANT} grpc-server
 endif
+ifneq ($(UPX),)
+	$(UPX) backend/cpp/${VARIANT}/grpc-server
+endif
 
 # This target is for manually building a variant with-auto detected flags
 backend-assets/grpc/llama-cpp: backend-assets/grpc backend/cpp/llama/llama.cpp
@@ -837,33 +858,57 @@ backend-assets/grpc/llama-cpp-grpc: backend-assets/grpc backend/cpp/llama/llama.
 backend-assets/util/llama-cpp-rpc-server: backend-assets/grpc/llama-cpp-grpc
 	mkdir -p backend-assets/util/
 	cp -rf backend/cpp/llama-grpc/llama.cpp/build/bin/rpc-server backend-assets/util/llama-cpp-rpc-server
+ifneq ($(UPX),)
+	$(UPX) backend-assets/util/llama-cpp-rpc-server
+endif
 
 backend-assets/grpc/llama-ggml: sources/go-llama.cpp sources/go-llama.cpp/libbinding.a backend-assets/grpc
 	CGO_LDFLAGS="$(CGO_LDFLAGS)" C_INCLUDE_PATH=$(CURDIR)/sources/go-llama.cpp LIBRARY_PATH=$(CURDIR)/sources/go-llama.cpp \
 	$(GOCMD) build -ldflags "$(LD_FLAGS)" -tags "$(GO_TAGS)" -o backend-assets/grpc/llama-ggml ./backend/go/llm/llama-ggml/
+ifneq ($(UPX),)
+	$(UPX) backend-assets/grpc/llama-ggml
+endif
 
 backend-assets/grpc/piper: sources/go-piper sources/go-piper/libpiper_binding.a backend-assets/grpc backend-assets/espeak-ng-data
 	CGO_CXXFLAGS="$(PIPER_CGO_CXXFLAGS)" CGO_LDFLAGS="$(PIPER_CGO_LDFLAGS)" LIBRARY_PATH=$(CURDIR)/sources/go-piper \
 	$(GOCMD) build -ldflags "$(LD_FLAGS)" -tags "$(GO_TAGS)" -o backend-assets/grpc/piper ./backend/go/tts/
+ifneq ($(UPX),)
+	$(UPX) backend-assets/grpc/piper
+endif
 
 backend-assets/grpc/rwkv: sources/go-rwkv.cpp sources/go-rwkv.cpp/librwkv.a backend-assets/grpc
 	CGO_LDFLAGS="$(CGO_LDFLAGS)" C_INCLUDE_PATH=$(CURDIR)/sources/go-rwkv.cpp LIBRARY_PATH=$(CURDIR)/sources/go-rwkv.cpp \
 	$(GOCMD) build -ldflags "$(LD_FLAGS)" -tags "$(GO_TAGS)" -o backend-assets/grpc/rwkv ./backend/go/llm/rwkv
+ifneq ($(UPX),)
+	$(UPX) backend-assets/grpc/rwkv
+endif
 
 backend-assets/grpc/stablediffusion: sources/go-stable-diffusion sources/go-stable-diffusion/libstablediffusion.a backend-assets/grpc
 	CGO_LDFLAGS="$(CGO_LDFLAGS)" CPATH="$(CPATH):$(CURDIR)/sources/go-stable-diffusion/:/usr/include/opencv4" LIBRARY_PATH=$(CURDIR)/sources/go-stable-diffusion/ \
 	$(GOCMD) build -ldflags "$(LD_FLAGS)" -tags "$(GO_TAGS)" -o backend-assets/grpc/stablediffusion ./backend/go/image/stablediffusion
+ifneq ($(UPX),)
+	$(UPX) backend-assets/grpc/stablediffusion
+endif
 
 backend-assets/grpc/tinydream: sources/go-tiny-dream sources/go-tiny-dream/libtinydream.a backend-assets/grpc
 	CGO_LDFLAGS="$(CGO_LDFLAGS)" LIBRARY_PATH=$(CURDIR)/go-tiny-dream \
 	$(GOCMD) build -ldflags "$(LD_FLAGS)" -tags "$(GO_TAGS)" -o backend-assets/grpc/tinydream ./backend/go/image/tinydream
+ifneq ($(UPX),)
+	$(UPX) backend-assets/grpc/tinydream
+endif
 
 backend-assets/grpc/whisper: sources/whisper.cpp sources/whisper.cpp/libwhisper.a backend-assets/grpc
 	CGO_LDFLAGS="$(CGO_LDFLAGS) $(CGO_LDFLAGS_WHISPER)" C_INCLUDE_PATH="$(CURDIR)/sources/whisper.cpp/include:$(CURDIR)/sources/whisper.cpp/ggml/include" LIBRARY_PATH=$(CURDIR)/sources/whisper.cpp \
 	$(GOCMD) build -ldflags "$(LD_FLAGS)" -tags "$(GO_TAGS)" -o backend-assets/grpc/whisper ./backend/go/transcribe/
+ifneq ($(UPX),)
+	$(UPX) backend-assets/grpc/whisper
+endif
 
 backend-assets/grpc/local-store: backend-assets/grpc
 	$(GOCMD) build -ldflags "$(LD_FLAGS)" -tags "$(GO_TAGS)" -o backend-assets/grpc/local-store ./backend/go/stores/
+ifneq ($(UPX),)
+	$(UPX) backend-assets/grpc/local-store
+endif
 
 grpcs: prepare $(GRPC_BACKENDS)
 

From 7d61de63ae1fca11d020359657938fc69af64560 Mon Sep 17 00:00:00 2001
From: fakezeta <fakezeta@gmail.com>
Date: Mon, 22 Jul 2024 15:40:34 +0200
Subject: [PATCH 17/69] fix: pin setuptools 69.5.1 (#2949)

pin setuptools 69.5.1
---
 backend/python/sentencetransformers/requirements-intel.txt  | 2 +-
 backend/python/transformers-musicgen/requirements-intel.txt | 2 +-
 backend/python/transformers/requirements-intel.txt          | 1 -
 backend/python/transformers/requirements.txt                | 2 +-
 4 files changed, 3 insertions(+), 4 deletions(-)

diff --git a/backend/python/sentencetransformers/requirements-intel.txt b/backend/python/sentencetransformers/requirements-intel.txt
index 635b4c31..95d4848c 100644
--- a/backend/python/sentencetransformers/requirements-intel.txt
+++ b/backend/python/sentencetransformers/requirements-intel.txt
@@ -2,4 +2,4 @@
 intel-extension-for-pytorch
 torch
 optimum[openvino]
-setuptools==70.3.0 # https://github.com/mudler/LocalAI/issues/2406
\ No newline at end of file
+setuptools==69.5.1 # https://github.com/mudler/LocalAI/issues/2406
\ No newline at end of file
diff --git a/backend/python/transformers-musicgen/requirements-intel.txt b/backend/python/transformers-musicgen/requirements-intel.txt
index 635b4c31..95d4848c 100644
--- a/backend/python/transformers-musicgen/requirements-intel.txt
+++ b/backend/python/transformers-musicgen/requirements-intel.txt
@@ -2,4 +2,4 @@
 intel-extension-for-pytorch
 torch
 optimum[openvino]
-setuptools==70.3.0 # https://github.com/mudler/LocalAI/issues/2406
\ No newline at end of file
+setuptools==69.5.1 # https://github.com/mudler/LocalAI/issues/2406
\ No newline at end of file
diff --git a/backend/python/transformers/requirements-intel.txt b/backend/python/transformers/requirements-intel.txt
index 635b4c31..8fc18a0e 100644
--- a/backend/python/transformers/requirements-intel.txt
+++ b/backend/python/transformers/requirements-intel.txt
@@ -2,4 +2,3 @@
 intel-extension-for-pytorch
 torch
 optimum[openvino]
-setuptools==70.3.0 # https://github.com/mudler/LocalAI/issues/2406
\ No newline at end of file
diff --git a/backend/python/transformers/requirements.txt b/backend/python/transformers/requirements.txt
index 76066f50..40e87073 100644
--- a/backend/python/transformers/requirements.txt
+++ b/backend/python/transformers/requirements.txt
@@ -6,4 +6,4 @@ torch
 certifi
 intel-extension-for-transformers
 bitsandbytes
-setuptools==70.3.0 # https://github.com/mudler/LocalAI/issues/2406
+setuptools==69.5.1 # https://github.com/mudler/LocalAI/issues/2406

From 153e97715543188212a366eeccecf112f5115e8c Mon Sep 17 00:00:00 2001
From: Ettore Di Giacinto <mudler@users.noreply.github.com>
Date: Mon, 22 Jul 2024 17:35:10 +0200
Subject: [PATCH 18/69] Update distributed_inferencing.md

Signed-off-by: Ettore Di Giacinto <mudler@users.noreply.github.com>
---
 docs/content/docs/features/distributed_inferencing.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/docs/content/docs/features/distributed_inferencing.md b/docs/content/docs/features/distributed_inferencing.md
index b7ce41a9..1ab3fa55 100644
--- a/docs/content/docs/features/distributed_inferencing.md
+++ b/docs/content/docs/features/distributed_inferencing.md
@@ -11,7 +11,7 @@ This functionality enables LocalAI to distribute inference requests across multi
 LocalAI supports two modes of distributed inferencing via p2p:
 
 - **Federated Mode**: Requests are shared between the cluster and routed to a single worker node in the network based on the load balancer's decision.
-- **Worker Mode**: Requests are processed by all the workers which contributes to the final inference result (by sharing the model weights).
+- **Worker Mode** (aka "model sharding" or "splitting weights"): Requests are processed by all the workers which contributes to the final inference result (by sharing the model weights).
 
 ## Usage
 

From 3dc601c4704154450e84f7cb31bf896ebf0f29d7 Mon Sep 17 00:00:00 2001
From: "LocalAI [bot]" <139863280+localai-bot@users.noreply.github.com>
Date: Mon, 22 Jul 2024 18:04:41 +0200
Subject: [PATCH 19/69] chore: :arrow_up: Update ggerganov/llama.cpp (#2943)

:arrow_up: Update ggerganov/llama.cpp

Signed-off-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com>
Co-authored-by: mudler <2420543+mudler@users.noreply.github.com>
Co-authored-by: Ettore Di Giacinto <mudler@users.noreply.github.com>
---
 Makefile | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/Makefile b/Makefile
index 882b6fe6..b7df2486 100644
--- a/Makefile
+++ b/Makefile
@@ -8,7 +8,7 @@ DETECT_LIBS?=true
 # llama.cpp versions
 GOLLAMA_REPO?=https://github.com/go-skynet/go-llama.cpp
 GOLLAMA_VERSION?=2b57a8ae43e4699d3dc5d1496a1ccd42922993be
-CPPLLAMA_VERSION?=07283b1a90e1320aae4762c7e03c879043910252
+CPPLLAMA_VERSION?=45f2c19cc57286eead7b232ce8028273a817aa4d
 
 # gpt4all version
 GPT4ALL_REPO?=https://github.com/nomic-ai/gpt4all

From a6b92af875987e7e62cc0c530abefed89a015c88 Mon Sep 17 00:00:00 2001
From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com>
Date: Mon, 22 Jul 2024 21:34:12 +0000
Subject: [PATCH 20/69] chore(deps): Bump grpcio from 1.64.1 to 1.65.1 in
 /backend/python/openvoice (#2956)

chore(deps): Bump grpcio in /backend/python/openvoice

Bumps [grpcio](https://github.com/grpc/grpc) from 1.64.1 to 1.65.1.
- [Release notes](https://github.com/grpc/grpc/releases)
- [Changelog](https://github.com/grpc/grpc/blob/master/doc/grpc_release_schedule.md)
- [Commits](https://github.com/grpc/grpc/compare/v1.64.1...v1.65.1)

---
updated-dependencies:
- dependency-name: grpcio
  dependency-type: direct:production
  update-type: version-update:semver-minor
...

Signed-off-by: dependabot[bot] <support@github.com>
Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>
---
 backend/python/openvoice/requirements-intel.txt | 2 +-
 backend/python/openvoice/requirements.txt       | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/backend/python/openvoice/requirements-intel.txt b/backend/python/openvoice/requirements-intel.txt
index b0551187..bad088a9 100644
--- a/backend/python/openvoice/requirements-intel.txt
+++ b/backend/python/openvoice/requirements-intel.txt
@@ -2,7 +2,7 @@
 intel-extension-for-pytorch
 torch
 optimum[openvino]
-grpcio==1.64.1
+grpcio==1.65.1
 protobuf
 librosa==0.9.1
 faster-whisper==1.0.3
diff --git a/backend/python/openvoice/requirements.txt b/backend/python/openvoice/requirements.txt
index 07ba879a..86d16ec2 100644
--- a/backend/python/openvoice/requirements.txt
+++ b/backend/python/openvoice/requirements.txt
@@ -1,4 +1,4 @@
-grpcio==1.65.0
+grpcio==1.65.1
 protobuf
 librosa
 faster-whisper

From 1a75546b272cb1e3deff1315b734ae39afb2bd71 Mon Sep 17 00:00:00 2001
From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com>
Date: Mon, 22 Jul 2024 21:41:06 +0000
Subject: [PATCH 21/69] chore(deps): Bump grpcio from 1.65.0 to 1.65.1 in
 /backend/python/sentencetransformers (#2955)

chore(deps): Bump grpcio in /backend/python/sentencetransformers

Bumps [grpcio](https://github.com/grpc/grpc) from 1.65.0 to 1.65.1.
- [Release notes](https://github.com/grpc/grpc/releases)
- [Changelog](https://github.com/grpc/grpc/blob/master/doc/grpc_release_schedule.md)
- [Commits](https://github.com/grpc/grpc/compare/v1.65.0...v1.65.1)

---
updated-dependencies:
- dependency-name: grpcio
  dependency-type: direct:production
  update-type: version-update:semver-patch
...

Signed-off-by: dependabot[bot] <support@github.com>
Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>
---
 backend/python/sentencetransformers/requirements.txt | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/backend/python/sentencetransformers/requirements.txt b/backend/python/sentencetransformers/requirements.txt
index ac21d449..4ef4a28b 100644
--- a/backend/python/sentencetransformers/requirements.txt
+++ b/backend/python/sentencetransformers/requirements.txt
@@ -1,6 +1,6 @@
 accelerate
 sentence-transformers==3.0.1
 transformers
-grpcio==1.65.0
+grpcio==1.65.1
 protobuf
 certifi
\ No newline at end of file

From f4ed47bf956bdf39e39d71e6a25d144fb4da0cdd Mon Sep 17 00:00:00 2001
From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com>
Date: Mon, 22 Jul 2024 21:47:54 +0000
Subject: [PATCH 22/69] chore(deps): Bump grpcio from 1.65.0 to 1.65.1 in
 /backend/python/bark (#2951)

Bumps [grpcio](https://github.com/grpc/grpc) from 1.65.0 to 1.65.1.
- [Release notes](https://github.com/grpc/grpc/releases)
- [Changelog](https://github.com/grpc/grpc/blob/master/doc/grpc_release_schedule.md)
- [Commits](https://github.com/grpc/grpc/compare/v1.65.0...v1.65.1)

---
updated-dependencies:
- dependency-name: grpcio
  dependency-type: direct:production
  update-type: version-update:semver-patch
...

Signed-off-by: dependabot[bot] <support@github.com>
Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>
---
 backend/python/bark/requirements.txt | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/backend/python/bark/requirements.txt b/backend/python/bark/requirements.txt
index 215b3d35..d3f9f52b 100644
--- a/backend/python/bark/requirements.txt
+++ b/backend/python/bark/requirements.txt
@@ -1,6 +1,6 @@
 accelerate
 bark==0.1.5
-grpcio==1.65.0
+grpcio==1.65.1
 protobuf
 certifi
 transformers
\ No newline at end of file

From 29669791615befa7e41c6862c6fe25b273ef729b Mon Sep 17 00:00:00 2001
From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com>
Date: Mon, 22 Jul 2024 22:26:35 +0000
Subject: [PATCH 23/69] chore(deps): Bump docs/themes/hugo-theme-relearn from
 `1b2e139` to `7aec99b` (#2952)

chore(deps): Bump docs/themes/hugo-theme-relearn

Bumps [docs/themes/hugo-theme-relearn](https://github.com/McShelby/hugo-theme-relearn) from `1b2e139` to `7aec99b`.
- [Release notes](https://github.com/McShelby/hugo-theme-relearn/releases)
- [Commits](https://github.com/McShelby/hugo-theme-relearn/compare/1b2e139512106f8074ac7d4a884135d159720cc4...7aec99b38dc2668c6139bf71855535ace41c123c)

---
updated-dependencies:
- dependency-name: docs/themes/hugo-theme-relearn
  dependency-type: direct:production
...

Signed-off-by: dependabot[bot] <support@github.com>
Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>
---
 docs/themes/hugo-theme-relearn | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/docs/themes/hugo-theme-relearn b/docs/themes/hugo-theme-relearn
index 1b2e1395..7aec99b3 160000
--- a/docs/themes/hugo-theme-relearn
+++ b/docs/themes/hugo-theme-relearn
@@ -1 +1 @@
-Subproject commit 1b2e139512106f8074ac7d4a884135d159720cc4
+Subproject commit 7aec99b38dc2668c6139bf71855535ace41c123c

From d3166e8571c576be78139b8dfffb77e0de8da2fc Mon Sep 17 00:00:00 2001
From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com>
Date: Mon, 22 Jul 2024 22:49:29 +0000
Subject: [PATCH 24/69] chore(deps): Bump langchain from 0.2.8 to 0.2.10 in
 /examples/langchain/langchainpy-localai-example (#2959)

chore(deps): Bump langchain

Bumps [langchain](https://github.com/langchain-ai/langchain) from 0.2.8 to 0.2.10.
- [Release notes](https://github.com/langchain-ai/langchain/releases)
- [Commits](https://github.com/langchain-ai/langchain/compare/langchain==0.2.8...langchain==0.2.10)

---
updated-dependencies:
- dependency-name: langchain
  dependency-type: direct:production
  update-type: version-update:semver-patch
...

Signed-off-by: dependabot[bot] <support@github.com>
Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>
---
 examples/langchain/langchainpy-localai-example/requirements.txt | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/examples/langchain/langchainpy-localai-example/requirements.txt b/examples/langchain/langchainpy-localai-example/requirements.txt
index 01a75d46..a0578a09 100644
--- a/examples/langchain/langchainpy-localai-example/requirements.txt
+++ b/examples/langchain/langchainpy-localai-example/requirements.txt
@@ -10,7 +10,7 @@ debugpy==1.8.2
 frozenlist==1.4.1
 greenlet==3.0.3
 idna==3.7
-langchain==0.2.8
+langchain==0.2.10
 langchain-community==0.2.7
 marshmallow==3.21.3
 marshmallow-enum==1.5.1

From 8ec7a0a407d240bce303dd2d837602c4a61dd4af Mon Sep 17 00:00:00 2001
From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com>
Date: Mon, 22 Jul 2024 22:49:39 +0000
Subject: [PATCH 25/69] chore(deps): Bump numpy from 1.26.4 to 2.0.1 in
 /examples/langchain/langchainpy-localai-example (#2958)

chore(deps): Bump numpy

Bumps [numpy](https://github.com/numpy/numpy) from 1.26.4 to 2.0.1.
- [Release notes](https://github.com/numpy/numpy/releases)
- [Changelog](https://github.com/numpy/numpy/blob/main/doc/RELEASE_WALKTHROUGH.rst)
- [Commits](https://github.com/numpy/numpy/compare/v1.26.4...v2.0.1)

---
updated-dependencies:
- dependency-name: numpy
  dependency-type: direct:production
  update-type: version-update:semver-major
...

Signed-off-by: dependabot[bot] <support@github.com>
Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>
---
 examples/langchain/langchainpy-localai-example/requirements.txt | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/examples/langchain/langchainpy-localai-example/requirements.txt b/examples/langchain/langchainpy-localai-example/requirements.txt
index a0578a09..25d74716 100644
--- a/examples/langchain/langchainpy-localai-example/requirements.txt
+++ b/examples/langchain/langchainpy-localai-example/requirements.txt
@@ -17,7 +17,7 @@ marshmallow-enum==1.5.1
 multidict==6.0.5
 mypy-extensions==1.0.0
 numexpr==2.10.1
-numpy==1.26.4
+numpy==2.0.1
 openai==1.35.13
 openapi-schema-pydantic==1.2.4
 packaging>=23.2

From 9fc09b32cfec5ccd693ca9ac8592c45b305bbaec Mon Sep 17 00:00:00 2001
From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com>
Date: Mon, 22 Jul 2024 23:50:41 +0000
Subject: [PATCH 26/69] chore(deps): Bump sqlalchemy from 2.0.30 to 2.0.31 in
 /examples/langchain/langchainpy-localai-example (#2957)

chore(deps): Bump sqlalchemy

Bumps [sqlalchemy](https://github.com/sqlalchemy/sqlalchemy) from 2.0.30 to 2.0.31.
- [Release notes](https://github.com/sqlalchemy/sqlalchemy/releases)
- [Changelog](https://github.com/sqlalchemy/sqlalchemy/blob/main/CHANGES.rst)
- [Commits](https://github.com/sqlalchemy/sqlalchemy/commits)

---
updated-dependencies:
- dependency-name: sqlalchemy
  dependency-type: direct:production
  update-type: version-update:semver-patch
...

Signed-off-by: dependabot[bot] <support@github.com>
Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>
---
 examples/langchain/langchainpy-localai-example/requirements.txt | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/examples/langchain/langchainpy-localai-example/requirements.txt b/examples/langchain/langchainpy-localai-example/requirements.txt
index 25d74716..522dbe14 100644
--- a/examples/langchain/langchainpy-localai-example/requirements.txt
+++ b/examples/langchain/langchainpy-localai-example/requirements.txt
@@ -24,7 +24,7 @@ packaging>=23.2
 pydantic==2.8.2
 PyYAML==6.0.1
 requests==2.32.3
-SQLAlchemy==2.0.30
+SQLAlchemy==2.0.31
 tenacity==8.5.0
 tqdm==4.66.4
 typing-inspect==0.9.0

From a1bc2e977109379b2e89ab1c1c2d6f9ea646eb01 Mon Sep 17 00:00:00 2001
From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com>
Date: Tue, 23 Jul 2024 00:08:22 +0000
Subject: [PATCH 27/69] chore(deps): Bump grpcio from 1.65.0 to 1.65.1 in
 /backend/python/vllm (#2964)

Bumps [grpcio](https://github.com/grpc/grpc) from 1.65.0 to 1.65.1.
- [Release notes](https://github.com/grpc/grpc/releases)
- [Changelog](https://github.com/grpc/grpc/blob/master/doc/grpc_release_schedule.md)
- [Commits](https://github.com/grpc/grpc/compare/v1.65.0...v1.65.1)

---
updated-dependencies:
- dependency-name: grpcio
  dependency-type: direct:production
  update-type: version-update:semver-patch
...

Signed-off-by: dependabot[bot] <support@github.com>
Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>
---
 backend/python/vllm/requirements.txt | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/backend/python/vllm/requirements.txt b/backend/python/vllm/requirements.txt
index 986a4d55..7c612a2f 100644
--- a/backend/python/vllm/requirements.txt
+++ b/backend/python/vllm/requirements.txt
@@ -1,6 +1,6 @@
 accelerate
 vllm
-grpcio==1.65.0
+grpcio==1.65.1
 protobuf
 certifi
 transformers

From 824cc816ea9e990100fcb533733758184bab9ebe Mon Sep 17 00:00:00 2001
From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com>
Date: Tue, 23 Jul 2024 00:58:30 +0000
Subject: [PATCH 28/69] chore(deps): Bump llama-index from 0.10.55 to 0.10.56
 in /examples/chainlit (#2966)

chore(deps): Bump llama-index in /examples/chainlit

Bumps [llama-index](https://github.com/run-llama/llama_index) from 0.10.55 to 0.10.56.
- [Release notes](https://github.com/run-llama/llama_index/releases)
- [Changelog](https://github.com/run-llama/llama_index/blob/main/CHANGELOG.md)
- [Commits](https://github.com/run-llama/llama_index/compare/v0.10.55...v0.10.56)

---
updated-dependencies:
- dependency-name: llama-index
  dependency-type: direct:production
  update-type: version-update:semver-patch
...

Signed-off-by: dependabot[bot] <support@github.com>
Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>
---
 examples/chainlit/requirements.txt | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/examples/chainlit/requirements.txt b/examples/chainlit/requirements.txt
index 116b7b61..cac24528 100644
--- a/examples/chainlit/requirements.txt
+++ b/examples/chainlit/requirements.txt
@@ -1,4 +1,4 @@
-llama_index==0.10.55
+llama_index==0.10.56
 requests==2.32.3
 weaviate_client==4.6.5
 transformers

From b555b64616367db6e288706f06541dc88ddc8cfa Mon Sep 17 00:00:00 2001
From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com>
Date: Tue, 23 Jul 2024 01:07:42 +0000
Subject: [PATCH 29/69] chore(deps): Bump grpcio from 1.65.0 to 1.65.1 in
 /backend/python/common/template (#2963)

chore(deps): Bump grpcio in /backend/python/common/template

Bumps [grpcio](https://github.com/grpc/grpc) from 1.65.0 to 1.65.1.
- [Release notes](https://github.com/grpc/grpc/releases)
- [Changelog](https://github.com/grpc/grpc/blob/master/doc/grpc_release_schedule.md)
- [Commits](https://github.com/grpc/grpc/compare/v1.65.0...v1.65.1)

---
updated-dependencies:
- dependency-name: grpcio
  dependency-type: direct:production
  update-type: version-update:semver-patch
...

Signed-off-by: dependabot[bot] <support@github.com>
Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>
---
 backend/python/common/template/requirements.txt | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/backend/python/common/template/requirements.txt b/backend/python/common/template/requirements.txt
index c762c4d6..8d1e3151 100644
--- a/backend/python/common/template/requirements.txt
+++ b/backend/python/common/template/requirements.txt
@@ -1,2 +1,2 @@
-grpcio==1.65.0
+grpcio==1.65.1
 protobuf
\ No newline at end of file

From ede352256be74b8f63e71c07c542d3ce3900b5bc Mon Sep 17 00:00:00 2001
From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com>
Date: Tue, 23 Jul 2024 01:17:19 +0000
Subject: [PATCH 30/69] chore(deps): Bump weaviate-client from 4.6.5 to 4.6.7
 in /examples/chainlit (#2965)

chore(deps): Bump weaviate-client in /examples/chainlit

Bumps [weaviate-client](https://github.com/weaviate/weaviate-python-client) from 4.6.5 to 4.6.7.
- [Release notes](https://github.com/weaviate/weaviate-python-client/releases)
- [Changelog](https://github.com/weaviate/weaviate-python-client/blob/main/docs/changelog.rst)
- [Commits](https://github.com/weaviate/weaviate-python-client/compare/v4.6.5...v4.6.7)

---
updated-dependencies:
- dependency-name: weaviate-client
  dependency-type: direct:production
  update-type: version-update:semver-patch
...

Signed-off-by: dependabot[bot] <support@github.com>
Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>
---
 examples/chainlit/requirements.txt | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/examples/chainlit/requirements.txt b/examples/chainlit/requirements.txt
index cac24528..13415f11 100644
--- a/examples/chainlit/requirements.txt
+++ b/examples/chainlit/requirements.txt
@@ -1,6 +1,6 @@
 llama_index==0.10.56
 requests==2.32.3
-weaviate_client==4.6.5
+weaviate_client==4.6.7
 transformers
 torch
 chainlit

From 99324eeef0cb9c29cb68958b9ff599a4e1e768ba Mon Sep 17 00:00:00 2001
From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com>
Date: Tue, 23 Jul 2024 02:39:44 +0000
Subject: [PATCH 31/69] chore(deps): Bump grpcio from 1.65.0 to 1.65.1 in
 /backend/python/transformers (#2970)

chore(deps): Bump grpcio in /backend/python/transformers

Bumps [grpcio](https://github.com/grpc/grpc) from 1.65.0 to 1.65.1.
- [Release notes](https://github.com/grpc/grpc/releases)
- [Changelog](https://github.com/grpc/grpc/blob/master/doc/grpc_release_schedule.md)
- [Commits](https://github.com/grpc/grpc/compare/v1.65.0...v1.65.1)

---
updated-dependencies:
- dependency-name: grpcio
  dependency-type: direct:production
  update-type: version-update:semver-patch
...

Signed-off-by: dependabot[bot] <support@github.com>
Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>
---
 backend/python/transformers/requirements.txt | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/backend/python/transformers/requirements.txt b/backend/python/transformers/requirements.txt
index 40e87073..55925b32 100644
--- a/backend/python/transformers/requirements.txt
+++ b/backend/python/transformers/requirements.txt
@@ -1,6 +1,6 @@
 accelerate
 transformers
-grpcio==1.65.0
+grpcio==1.65.1
 protobuf
 torch
 certifi

From 8385eb2a596e6a41dd84f582c17f25bbae55d2c4 Mon Sep 17 00:00:00 2001
From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com>
Date: Tue, 23 Jul 2024 03:42:48 +0000
Subject: [PATCH 32/69] chore(deps): Bump openai from 1.35.13 to 1.37.0 in
 /examples/functions (#2973)

Bumps [openai](https://github.com/openai/openai-python) from 1.35.13 to 1.37.0.
- [Release notes](https://github.com/openai/openai-python/releases)
- [Changelog](https://github.com/openai/openai-python/blob/main/CHANGELOG.md)
- [Commits](https://github.com/openai/openai-python/compare/v1.35.13...v1.37.0)

---
updated-dependencies:
- dependency-name: openai
  dependency-type: direct:production
  update-type: version-update:semver-minor
...

Signed-off-by: dependabot[bot] <support@github.com>
Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>
---
 examples/functions/requirements.txt | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/examples/functions/requirements.txt b/examples/functions/requirements.txt
index 481af898..d5e8f2c5 100644
--- a/examples/functions/requirements.txt
+++ b/examples/functions/requirements.txt
@@ -1,2 +1,2 @@
 langchain==0.2.8
-openai==1.35.13
+openai==1.37.0

From 2f9f04b26097106eed81913485ab550948dfdd77 Mon Sep 17 00:00:00 2001
From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com>
Date: Tue, 23 Jul 2024 03:47:26 +0000
Subject: [PATCH 33/69] chore(deps): Bump grpcio from 1.65.0 to 1.65.1 in
 /backend/python/diffusers (#2969)

chore(deps): Bump grpcio in /backend/python/diffusers

Bumps [grpcio](https://github.com/grpc/grpc) from 1.65.0 to 1.65.1.
- [Release notes](https://github.com/grpc/grpc/releases)
- [Changelog](https://github.com/grpc/grpc/blob/master/doc/grpc_release_schedule.md)
- [Commits](https://github.com/grpc/grpc/compare/v1.65.0...v1.65.1)

---
updated-dependencies:
- dependency-name: grpcio
  dependency-type: direct:production
  update-type: version-update:semver-patch
...

Signed-off-by: dependabot[bot] <support@github.com>
Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>
---
 backend/python/diffusers/requirements.txt | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/backend/python/diffusers/requirements.txt b/backend/python/diffusers/requirements.txt
index c607187e..6f04d677 100644
--- a/backend/python/diffusers/requirements.txt
+++ b/backend/python/diffusers/requirements.txt
@@ -3,7 +3,7 @@ accelerate
 compel
 peft
 diffusers
-grpcio==1.65.0
+grpcio==1.65.1
 opencv-python
 pillow
 protobuf

From 7ab3217df0d11d422138a9290b34407e43bcfae5 Mon Sep 17 00:00:00 2001
From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com>
Date: Tue, 23 Jul 2024 04:03:28 +0000
Subject: [PATCH 34/69] chore(deps): Bump grpcio from 1.65.0 to 1.65.1 in
 /backend/python/exllama2 (#2971)

chore(deps): Bump grpcio in /backend/python/exllama2

Bumps [grpcio](https://github.com/grpc/grpc) from 1.65.0 to 1.65.1.
- [Release notes](https://github.com/grpc/grpc/releases)
- [Changelog](https://github.com/grpc/grpc/blob/master/doc/grpc_release_schedule.md)
- [Commits](https://github.com/grpc/grpc/compare/v1.65.0...v1.65.1)

---
updated-dependencies:
- dependency-name: grpcio
  dependency-type: direct:production
  update-type: version-update:semver-patch
...

Signed-off-by: dependabot[bot] <support@github.com>
Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>
---
 backend/python/exllama2/requirements.txt | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/backend/python/exllama2/requirements.txt b/backend/python/exllama2/requirements.txt
index 62c7117a..6aae273c 100644
--- a/backend/python/exllama2/requirements.txt
+++ b/backend/python/exllama2/requirements.txt
@@ -1,5 +1,5 @@
 accelerate
-grpcio==1.65.0
+grpcio==1.65.1
 protobuf
 certifi
 torch

From fb574434a4e0b62f6c03b20d6226dc362f9a3bdf Mon Sep 17 00:00:00 2001
From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com>
Date: Tue, 23 Jul 2024 04:40:27 +0000
Subject: [PATCH 35/69] chore(deps): Bump grpcio from 1.65.0 to 1.65.1 in
 /backend/python/rerankers (#2974)

chore(deps): Bump grpcio in /backend/python/rerankers

Bumps [grpcio](https://github.com/grpc/grpc) from 1.65.0 to 1.65.1.
- [Release notes](https://github.com/grpc/grpc/releases)
- [Changelog](https://github.com/grpc/grpc/blob/master/doc/grpc_release_schedule.md)
- [Commits](https://github.com/grpc/grpc/compare/v1.65.0...v1.65.1)

---
updated-dependencies:
- dependency-name: grpcio
  dependency-type: direct:production
  update-type: version-update:semver-patch
...

Signed-off-by: dependabot[bot] <support@github.com>
Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>
---
 backend/python/rerankers/requirements.txt | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/backend/python/rerankers/requirements.txt b/backend/python/rerankers/requirements.txt
index 1b437654..8b2ad4d0 100644
--- a/backend/python/rerankers/requirements.txt
+++ b/backend/python/rerankers/requirements.txt
@@ -1,6 +1,6 @@
 accelerate
 rerankers[transformers]
-grpcio==1.65.0
+grpcio==1.65.1
 protobuf
 certifi
 transformers
\ No newline at end of file

From 385d8dc29b69269064dcca7cbc60b39496e6671c Mon Sep 17 00:00:00 2001
From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com>
Date: Tue, 23 Jul 2024 06:15:50 +0000
Subject: [PATCH 36/69] chore(deps): Bump grpcio from 1.65.0 to 1.65.1 in
 /backend/python/coqui (#2980)

Bumps [grpcio](https://github.com/grpc/grpc) from 1.65.0 to 1.65.1.
- [Release notes](https://github.com/grpc/grpc/releases)
- [Changelog](https://github.com/grpc/grpc/blob/master/doc/grpc_release_schedule.md)
- [Commits](https://github.com/grpc/grpc/compare/v1.65.0...v1.65.1)

---
updated-dependencies:
- dependency-name: grpcio
  dependency-type: direct:production
  update-type: version-update:semver-patch
...

Signed-off-by: dependabot[bot] <support@github.com>
Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>
---
 backend/python/coqui/requirements.txt | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/backend/python/coqui/requirements.txt b/backend/python/coqui/requirements.txt
index d7dd07e4..e1cddaa3 100644
--- a/backend/python/coqui/requirements.txt
+++ b/backend/python/coqui/requirements.txt
@@ -1,6 +1,6 @@
 accelerate
 TTS==0.22.0
-grpcio==1.65.0
+grpcio==1.65.1
 protobuf
 certifi
 transformers
\ No newline at end of file

From bbb1dc2ae085e41a46ab60e2b3c8af9eeeeb749b Mon Sep 17 00:00:00 2001
From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com>
Date: Tue, 23 Jul 2024 06:33:45 +0000
Subject: [PATCH 37/69] chore(deps): Bump grpcio from 1.65.0 to 1.65.1 in
 /backend/python/parler-tts (#2982)

chore(deps): Bump grpcio in /backend/python/parler-tts

Bumps [grpcio](https://github.com/grpc/grpc) from 1.65.0 to 1.65.1.
- [Release notes](https://github.com/grpc/grpc/releases)
- [Changelog](https://github.com/grpc/grpc/blob/master/doc/grpc_release_schedule.md)
- [Commits](https://github.com/grpc/grpc/compare/v1.65.0...v1.65.1)

---
updated-dependencies:
- dependency-name: grpcio
  dependency-type: direct:production
  update-type: version-update:semver-patch
...

Signed-off-by: dependabot[bot] <support@github.com>
Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>
---
 backend/python/parler-tts/requirements.txt | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/backend/python/parler-tts/requirements.txt b/backend/python/parler-tts/requirements.txt
index c3706051..147cad9a 100644
--- a/backend/python/parler-tts/requirements.txt
+++ b/backend/python/parler-tts/requirements.txt
@@ -1,5 +1,5 @@
 accelerate
-grpcio==1.65.0
+grpcio==1.65.1
 protobuf
 torch
 git+https://github.com/huggingface/parler-tts.git@10016fb0300c0dc31a0fb70e26f3affee7b62f16

From 6ec593c23776a6dc645027a99051859f15359920 Mon Sep 17 00:00:00 2001
From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com>
Date: Tue, 23 Jul 2024 06:50:45 +0000
Subject: [PATCH 38/69] chore(deps): Bump grpcio from 1.65.0 to 1.65.1 in
 /backend/python/vall-e-x (#2981)

chore(deps): Bump grpcio in /backend/python/vall-e-x

Bumps [grpcio](https://github.com/grpc/grpc) from 1.65.0 to 1.65.1.
- [Release notes](https://github.com/grpc/grpc/releases)
- [Changelog](https://github.com/grpc/grpc/blob/master/doc/grpc_release_schedule.md)
- [Commits](https://github.com/grpc/grpc/compare/v1.65.0...v1.65.1)

---
updated-dependencies:
- dependency-name: grpcio
  dependency-type: direct:production
  update-type: version-update:semver-patch
...

Signed-off-by: dependabot[bot] <support@github.com>
Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>
---
 backend/python/vall-e-x/requirements.txt | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/backend/python/vall-e-x/requirements.txt b/backend/python/vall-e-x/requirements.txt
index ac891fe7..d1d0583e 100644
--- a/backend/python/vall-e-x/requirements.txt
+++ b/backend/python/vall-e-x/requirements.txt
@@ -1,4 +1,4 @@
 accelerate
-grpcio==1.65.0
+grpcio==1.65.1
 protobuf
 certifi
\ No newline at end of file

From 36789e9ead9ed4e3caf10f93b13d52aa0b9f35f9 Mon Sep 17 00:00:00 2001
From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com>
Date: Tue, 23 Jul 2024 07:34:26 +0000
Subject: [PATCH 39/69] chore(deps): Bump grpcio from 1.65.0 to 1.65.1 in
 /backend/python/transformers-musicgen (#2990)

chore(deps): Bump grpcio in /backend/python/transformers-musicgen

Bumps [grpcio](https://github.com/grpc/grpc) from 1.65.0 to 1.65.1.
- [Release notes](https://github.com/grpc/grpc/releases)
- [Changelog](https://github.com/grpc/grpc/blob/master/doc/grpc_release_schedule.md)
- [Commits](https://github.com/grpc/grpc/compare/v1.65.0...v1.65.1)

---
updated-dependencies:
- dependency-name: grpcio
  dependency-type: direct:production
  update-type: version-update:semver-patch
...

Signed-off-by: dependabot[bot] <support@github.com>
Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>
---
 backend/python/transformers-musicgen/requirements.txt | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/backend/python/transformers-musicgen/requirements.txt b/backend/python/transformers-musicgen/requirements.txt
index 8a969c34..8ffa3c31 100644
--- a/backend/python/transformers-musicgen/requirements.txt
+++ b/backend/python/transformers-musicgen/requirements.txt
@@ -1,6 +1,6 @@
 accelerate
 transformers
-grpcio==1.65.0
+grpcio==1.65.1
 protobuf
 torch
 scipy==1.14.0

From 9c331239d9a9b2abc447d1f504a9e4c8f14656c3 Mon Sep 17 00:00:00 2001
From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com>
Date: Tue, 23 Jul 2024 08:16:38 +0000
Subject: [PATCH 40/69] chore(deps): Bump grpcio from 1.65.0 to 1.65.1 in
 /backend/python/autogptq (#2984)

chore(deps): Bump grpcio in /backend/python/autogptq

Bumps [grpcio](https://github.com/grpc/grpc) from 1.65.0 to 1.65.1.
- [Release notes](https://github.com/grpc/grpc/releases)
- [Changelog](https://github.com/grpc/grpc/blob/master/doc/grpc_release_schedule.md)
- [Commits](https://github.com/grpc/grpc/compare/v1.65.0...v1.65.1)

---
updated-dependencies:
- dependency-name: grpcio
  dependency-type: direct:production
  update-type: version-update:semver-patch
...

Signed-off-by: dependabot[bot] <support@github.com>
Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>
---
 backend/python/autogptq/requirements.txt | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/backend/python/autogptq/requirements.txt b/backend/python/autogptq/requirements.txt
index e416adb2..7a1bf85f 100644
--- a/backend/python/autogptq/requirements.txt
+++ b/backend/python/autogptq/requirements.txt
@@ -1,6 +1,6 @@
 accelerate
 auto-gptq==0.7.1
-grpcio==1.65.0
+grpcio==1.65.1
 protobuf
 torch
 certifi

From 5e5037f10d87510e76cff558d1048cfae01a4828 Mon Sep 17 00:00:00 2001
From: Ettore Di Giacinto <mudler@users.noreply.github.com>
Date: Tue, 23 Jul 2024 10:42:51 +0200
Subject: [PATCH 41/69] feat(p2p): warn the user to start with --p2p (#2993)

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
---
 core/http/views/p2p.html | 14 ++++++++++++--
 1 file changed, 12 insertions(+), 2 deletions(-)

diff --git a/core/http/views/p2p.html b/core/http/views/p2p.html
index 0396924e..a8c51310 100644
--- a/core/http/views/p2p.html
+++ b/core/http/views/p2p.html
@@ -16,7 +16,16 @@
                 </a> 
             </h2> 
             <h5 class="mb-4 text-justify">LocalAI uses P2P technologies to enable distribution of work between peers. It is possible to share an instance with Federation and/or split the weights of a model across peers (only available with llama.cpp models). You can now share computational resources between your devices or your friends!</h5>
-            
+            <!-- Warning box if p2p token is empty and p2p is enabled -->
+            {{ if and .IsP2PEnabled (eq .P2PToken "") }}
+            <div class="bg-red-500 p-4 rounded-lg shadow-lg mb-12 text-left">
+                <p class="text-xl font-semibold text-white"> <i class="fa-solid fa-exclamation-triangle"></i> Warning: P2P mode is disabled or no token was specified</p>
+                <p class="mb-4">You have to enable P2P mode by starting LocalAI with <code>--p2p</code>. Please restart the server with <code>--p2p</code> to generate a new token automatically that can be used to automatically discover other nodes. If you already have a token specify it with <code>export TOKEN=".."</code> <a href="https://localai.io/features/distribute/" target="_blank">
+                    Check out the documentation for more information.
+                </a> </p>
+            </div>
+            {{ else }}
+
             <!-- Federation Box -->
             <div class="bg-gray-800 p-6 rounded-lg shadow-lg mb-12 text-left">
 
@@ -128,7 +137,8 @@
                     </div>
                 </div>
             </div>
-            <!-- Llama.cpp Box END -->       
+            <!-- Llama.cpp Box END -->    
+            {{ end }}   
         </div>
     </div>
 

From e3cd11cc0a1d84066d774f094192dc65932723c0 Mon Sep 17 00:00:00 2001
From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com>
Date: Tue, 23 Jul 2024 09:28:33 +0000
Subject: [PATCH 42/69] chore(deps): Bump llama-index from 0.10.55 to 0.10.56
 in /examples/langchain-chroma (#2986)

chore(deps): Bump llama-index in /examples/langchain-chroma

Bumps [llama-index](https://github.com/run-llama/llama_index) from 0.10.55 to 0.10.56.
- [Release notes](https://github.com/run-llama/llama_index/releases)
- [Changelog](https://github.com/run-llama/llama_index/blob/main/CHANGELOG.md)
- [Commits](https://github.com/run-llama/llama_index/compare/v0.10.55...v0.10.56)

---
updated-dependencies:
- dependency-name: llama-index
  dependency-type: direct:production
  update-type: version-update:semver-patch
...

Signed-off-by: dependabot[bot] <support@github.com>
Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>
---
 examples/langchain-chroma/requirements.txt | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/examples/langchain-chroma/requirements.txt b/examples/langchain-chroma/requirements.txt
index 0e6d8c4d..7a316c24 100644
--- a/examples/langchain-chroma/requirements.txt
+++ b/examples/langchain-chroma/requirements.txt
@@ -1,4 +1,4 @@
 langchain==0.2.8
 openai==1.35.13
 chromadb==0.5.4
-llama-index==0.10.55
\ No newline at end of file
+llama-index==0.10.56
\ No newline at end of file

From 39de3cf21dad01110c677e88559a2f6ab1990f3c Mon Sep 17 00:00:00 2001
From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com>
Date: Tue, 23 Jul 2024 10:15:55 +0000
Subject: [PATCH 43/69] chore(deps): Bump grpcio from 1.65.0 to 1.65.1 in
 /backend/python/mamba (#2989)

Bumps [grpcio](https://github.com/grpc/grpc) from 1.65.0 to 1.65.1.
- [Release notes](https://github.com/grpc/grpc/releases)
- [Changelog](https://github.com/grpc/grpc/blob/master/doc/grpc_release_schedule.md)
- [Commits](https://github.com/grpc/grpc/compare/v1.65.0...v1.65.1)

---
updated-dependencies:
- dependency-name: grpcio
  dependency-type: direct:production
  update-type: version-update:semver-patch
...

Signed-off-by: dependabot[bot] <support@github.com>
Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>
---
 backend/python/mamba/requirements.txt | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/backend/python/mamba/requirements.txt b/backend/python/mamba/requirements.txt
index e431ddfe..2aac2cda 100644
--- a/backend/python/mamba/requirements.txt
+++ b/backend/python/mamba/requirements.txt
@@ -1,6 +1,6 @@
 causal-conv1d==1.4.0
 mamba-ssm==2.2.2
-grpcio==1.65.0
+grpcio==1.65.1
 protobuf
 certifi
 transformers
\ No newline at end of file

From b53947a5bb42deb24d0805f4d677484ecabb78cd Mon Sep 17 00:00:00 2001
From: "LocalAI [bot]" <139863280+localai-bot@users.noreply.github.com>
Date: Tue, 23 Jul 2024 12:33:42 +0200
Subject: [PATCH 44/69] chore: :arrow_up: Update ggerganov/llama.cpp (#2992)

:arrow_up: Update ggerganov/llama.cpp

Signed-off-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com>
Co-authored-by: mudler <2420543+mudler@users.noreply.github.com>
---
 Makefile | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/Makefile b/Makefile
index b7df2486..634d78a2 100644
--- a/Makefile
+++ b/Makefile
@@ -8,7 +8,7 @@ DETECT_LIBS?=true
 # llama.cpp versions
 GOLLAMA_REPO?=https://github.com/go-skynet/go-llama.cpp
 GOLLAMA_VERSION?=2b57a8ae43e4699d3dc5d1496a1ccd42922993be
-CPPLLAMA_VERSION?=45f2c19cc57286eead7b232ce8028273a817aa4d
+CPPLLAMA_VERSION?=081fe431aa8fb6307145c4feb3eed4f48cab19f8
 
 # gpt4all version
 GPT4ALL_REPO?=https://github.com/nomic-ai/gpt4all

From 703cd08f01ad105ea1677956fd0c3b24690271ec Mon Sep 17 00:00:00 2001
From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com>
Date: Tue, 23 Jul 2024 11:00:46 +0000
Subject: [PATCH 45/69] chore(deps): Bump langchain-community from 0.2.7 to
 0.2.9 in /examples/langchain/langchainpy-localai-example (#2960)

chore(deps): Bump langchain-community

Bumps [langchain-community](https://github.com/langchain-ai/langchain) from 0.2.7 to 0.2.9.
- [Release notes](https://github.com/langchain-ai/langchain/releases)
- [Commits](https://github.com/langchain-ai/langchain/compare/langchain-community==0.2.7...langchain-community==0.2.9)

---
updated-dependencies:
- dependency-name: langchain-community
  dependency-type: direct:production
  update-type: version-update:semver-patch
...

Signed-off-by: dependabot[bot] <support@github.com>
Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>
---
 examples/langchain/langchainpy-localai-example/requirements.txt | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/examples/langchain/langchainpy-localai-example/requirements.txt b/examples/langchain/langchainpy-localai-example/requirements.txt
index 522dbe14..6420d50e 100644
--- a/examples/langchain/langchainpy-localai-example/requirements.txt
+++ b/examples/langchain/langchainpy-localai-example/requirements.txt
@@ -11,7 +11,7 @@ frozenlist==1.4.1
 greenlet==3.0.3
 idna==3.7
 langchain==0.2.10
-langchain-community==0.2.7
+langchain-community==0.2.9
 marshmallow==3.21.3
 marshmallow-enum==1.5.1
 multidict==6.0.5

From 0314b37cd83b40184739ddd15af5e575c1e4045d Mon Sep 17 00:00:00 2001
From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com>
Date: Tue, 23 Jul 2024 11:01:00 +0000
Subject: [PATCH 46/69] chore(deps): Bump openai from 1.35.13 to 1.37.0 in
 /examples/langchain/langchainpy-localai-example (#2961)

chore(deps): Bump openai

Bumps [openai](https://github.com/openai/openai-python) from 1.35.13 to 1.37.0.
- [Release notes](https://github.com/openai/openai-python/releases)
- [Changelog](https://github.com/openai/openai-python/blob/main/CHANGELOG.md)
- [Commits](https://github.com/openai/openai-python/compare/v1.35.13...v1.37.0)

---
updated-dependencies:
- dependency-name: openai
  dependency-type: direct:production
  update-type: version-update:semver-minor
...

Signed-off-by: dependabot[bot] <support@github.com>
Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>
---
 examples/langchain/langchainpy-localai-example/requirements.txt | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/examples/langchain/langchainpy-localai-example/requirements.txt b/examples/langchain/langchainpy-localai-example/requirements.txt
index 6420d50e..0e03d543 100644
--- a/examples/langchain/langchainpy-localai-example/requirements.txt
+++ b/examples/langchain/langchainpy-localai-example/requirements.txt
@@ -18,7 +18,7 @@ multidict==6.0.5
 mypy-extensions==1.0.0
 numexpr==2.10.1
 numpy==2.0.1
-openai==1.35.13
+openai==1.37.0
 openapi-schema-pydantic==1.2.4
 packaging>=23.2
 pydantic==2.8.2

From ead69a116ae27688846111819cd4324d06f149b6 Mon Sep 17 00:00:00 2001
From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com>
Date: Tue, 23 Jul 2024 11:51:05 +0000
Subject: [PATCH 47/69] chore(deps): Bump langchain from 0.2.8 to 0.2.10 in
 /examples/functions (#2975)

Bumps [langchain](https://github.com/langchain-ai/langchain) from 0.2.8 to 0.2.10.
- [Release notes](https://github.com/langchain-ai/langchain/releases)
- [Commits](https://github.com/langchain-ai/langchain/compare/langchain==0.2.8...langchain==0.2.10)

---
updated-dependencies:
- dependency-name: langchain
  dependency-type: direct:production
  update-type: version-update:semver-patch
...

Signed-off-by: dependabot[bot] <support@github.com>
Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>
---
 examples/functions/requirements.txt | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/examples/functions/requirements.txt b/examples/functions/requirements.txt
index d5e8f2c5..f8afacdc 100644
--- a/examples/functions/requirements.txt
+++ b/examples/functions/requirements.txt
@@ -1,2 +1,2 @@
-langchain==0.2.8
+langchain==0.2.10
 openai==1.37.0

From c7f0743f4815b4168ca096afb0408407391d3cf5 Mon Sep 17 00:00:00 2001
From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com>
Date: Tue, 23 Jul 2024 12:26:46 +0000
Subject: [PATCH 48/69] chore(deps): Bump openai from 1.35.13 to 1.37.0 in
 /examples/langchain-chroma (#2988)

chore(deps): Bump openai in /examples/langchain-chroma

Bumps [openai](https://github.com/openai/openai-python) from 1.35.13 to 1.37.0.
- [Release notes](https://github.com/openai/openai-python/releases)
- [Changelog](https://github.com/openai/openai-python/blob/main/CHANGELOG.md)
- [Commits](https://github.com/openai/openai-python/compare/v1.35.13...v1.37.0)

---
updated-dependencies:
- dependency-name: openai
  dependency-type: direct:production
  update-type: version-update:semver-minor
...

Signed-off-by: dependabot[bot] <support@github.com>
Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>
---
 examples/langchain-chroma/requirements.txt | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/examples/langchain-chroma/requirements.txt b/examples/langchain-chroma/requirements.txt
index 7a316c24..17ed9c9a 100644
--- a/examples/langchain-chroma/requirements.txt
+++ b/examples/langchain-chroma/requirements.txt
@@ -1,4 +1,4 @@
 langchain==0.2.8
-openai==1.35.13
+openai==1.37.0
 chromadb==0.5.4
 llama-index==0.10.56
\ No newline at end of file

From 1c96e0b79ec25421845c917aa0de9be2603f983d Mon Sep 17 00:00:00 2001
From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com>
Date: Tue, 23 Jul 2024 14:34:07 +0000
Subject: [PATCH 49/69] chore(deps): Bump langchain from 0.2.8 to 0.2.10 in
 /examples/langchain-chroma (#2987)

chore(deps): Bump langchain in /examples/langchain-chroma

Bumps [langchain](https://github.com/langchain-ai/langchain) from 0.2.8 to 0.2.10.
- [Release notes](https://github.com/langchain-ai/langchain/releases)
- [Commits](https://github.com/langchain-ai/langchain/compare/langchain==0.2.8...langchain==0.2.10)

---
updated-dependencies:
- dependency-name: langchain
  dependency-type: direct:production
  update-type: version-update:semver-patch
...

Signed-off-by: dependabot[bot] <support@github.com>
Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>
---
 examples/langchain-chroma/requirements.txt | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/examples/langchain-chroma/requirements.txt b/examples/langchain-chroma/requirements.txt
index 17ed9c9a..89ca2db7 100644
--- a/examples/langchain-chroma/requirements.txt
+++ b/examples/langchain-chroma/requirements.txt
@@ -1,4 +1,4 @@
-langchain==0.2.8
+langchain==0.2.10
 openai==1.37.0
 chromadb==0.5.4
 llama-index==0.10.56
\ No newline at end of file

From a9757fb0571668560cef55892d8661f35c961ebc Mon Sep 17 00:00:00 2001
From: Ettore Di Giacinto <mudler@users.noreply.github.com>
Date: Tue, 23 Jul 2024 23:35:31 +0200
Subject: [PATCH 50/69] fix(cuda): downgrade to 12.0 to increase compatibility
 range (#2994)

* fix(cuda): downgrade to 12.0 to increase compatibility range

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>

* improve messaging

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>

---------

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
---
 .github/workflows/image-pr.yml | 4 ++--
 .github/workflows/image.yml    | 8 ++++----
 .github/workflows/release.yaml | 1 -
 Dockerfile                     | 2 +-
 Makefile                       | 2 +-
 pkg/model/initializers.go      | 6 +++---
 6 files changed, 11 insertions(+), 12 deletions(-)

diff --git a/.github/workflows/image-pr.yml b/.github/workflows/image-pr.yml
index 290f8793..8ebaa1b2 100644
--- a/.github/workflows/image-pr.yml
+++ b/.github/workflows/image-pr.yml
@@ -47,7 +47,7 @@ jobs:
           #   makeflags: "--jobs=3 --output-sync=target"
           - build-type: 'cublas'
             cuda-major-version: "12"
-            cuda-minor-version: "4"
+            cuda-minor-version: "0"
             platforms: 'linux/amd64'
             tag-latest: 'false'
             tag-suffix: '-cublas-cuda12-ffmpeg'
@@ -120,7 +120,7 @@ jobs:
           #   makeflags: "--jobs=3 --output-sync=target"
           # - build-type: 'cublas'
           #   cuda-major-version: "12"
-          #   cuda-minor-version: "4"
+          #   cuda-minor-version: "0"
           #   platforms: 'linux/amd64'
           #   tag-latest: 'false'
           #   tag-suffix: '-cublas-cuda12-ffmpeg-core'
diff --git a/.github/workflows/image.yml b/.github/workflows/image.yml
index 73899e15..395d7761 100644
--- a/.github/workflows/image.yml
+++ b/.github/workflows/image.yml
@@ -75,7 +75,7 @@ jobs:
             makeflags: "--jobs=3 --output-sync=target"
           - build-type: 'cublas'
             cuda-major-version: "12"
-            cuda-minor-version: "4"
+            cuda-minor-version: "0"
             platforms: 'linux/amd64'
             tag-latest: 'false'
             tag-suffix: '-cublas-cuda12'
@@ -100,7 +100,7 @@ jobs:
             makeflags: "--jobs=3 --output-sync=target"
           - build-type: 'cublas'
             cuda-major-version: "12"
-            cuda-minor-version: "4"
+            cuda-minor-version: "0"
             platforms: 'linux/amd64'
             tag-latest: 'auto'
             tag-suffix: '-cublas-cuda12-ffmpeg'
@@ -285,7 +285,7 @@ jobs:
             makeflags: "--jobs=4 --output-sync=target"
           - build-type: 'cublas'
             cuda-major-version: "12"
-            cuda-minor-version: "4"
+            cuda-minor-version: "0"
             platforms: 'linux/amd64'
             tag-latest: 'false'
             tag-suffix: '-cublas-cuda12-core'
@@ -307,7 +307,7 @@ jobs:
             makeflags: "--jobs=4 --output-sync=target"
           - build-type: 'cublas'
             cuda-major-version: "12"
-            cuda-minor-version: "4"
+            cuda-minor-version: "0"
             platforms: 'linux/amd64'
             tag-latest: 'false'
             tag-suffix: '-cublas-cuda12-ffmpeg-core'
diff --git a/.github/workflows/release.yaml b/.github/workflows/release.yaml
index faed2b81..5c883db4 100644
--- a/.github/workflows/release.yaml
+++ b/.github/workflows/release.yaml
@@ -31,7 +31,6 @@ jobs:
         with:
           go-version: '1.21.x'
           cache: false
-
       - name: Dependencies
         run: |
           sudo apt-get update
diff --git a/Dockerfile b/Dockerfile
index fcad8343..a0feadd9 100644
--- a/Dockerfile
+++ b/Dockerfile
@@ -99,7 +99,7 @@ FROM requirements-${IMAGE_TYPE} AS requirements-drivers
 
 ARG BUILD_TYPE
 ARG CUDA_MAJOR_VERSION=12
-ARG CUDA_MINOR_VERSION=4
+ARG CUDA_MINOR_VERSION=0
 
 ENV BUILD_TYPE=${BUILD_TYPE}
 
diff --git a/Makefile b/Makefile
index 634d78a2..297938ae 100644
--- a/Makefile
+++ b/Makefile
@@ -480,7 +480,7 @@ prepare-e2e:
 	mkdir -p $(TEST_DIR)
 	cp -rfv $(abspath ./tests/e2e-fixtures)/gpu.yaml $(TEST_DIR)/gpu.yaml
 	test -e $(TEST_DIR)/ggllm-test-model.bin || wget -q https://huggingface.co/TheBloke/CodeLlama-7B-Instruct-GGUF/resolve/main/codellama-7b-instruct.Q2_K.gguf -O $(TEST_DIR)/ggllm-test-model.bin
-	docker build --build-arg GRPC_BACKENDS="$(GRPC_BACKENDS)" --build-arg IMAGE_TYPE=core --build-arg BUILD_TYPE=$(BUILD_TYPE) --build-arg CUDA_MAJOR_VERSION=12 --build-arg CUDA_MINOR_VERSION=4 --build-arg FFMPEG=true -t localai-tests .
+	docker build --build-arg GRPC_BACKENDS="$(GRPC_BACKENDS)" --build-arg IMAGE_TYPE=core --build-arg BUILD_TYPE=$(BUILD_TYPE) --build-arg CUDA_MAJOR_VERSION=12 --build-arg CUDA_MINOR_VERSION=0 --build-arg FFMPEG=true -t localai-tests .
 
 run-e2e-image:
 	ls -liah $(abspath ./tests/e2e-fixtures)
diff --git a/pkg/model/initializers.go b/pkg/model/initializers.go
index 901b4d99..88a08f28 100644
--- a/pkg/model/initializers.go
+++ b/pkg/model/initializers.go
@@ -212,7 +212,7 @@ func selectGRPCProcess(backend, assetDir string, f16 bool) string {
 					grpcProcess = p
 					foundCUDA = true
 				} else {
-					log.Info().Msgf("GPU device found but no CUDA backend present")
+					log.Debug().Msgf("Nvidia GPU device found, no embedded CUDA variant found. You can ignore this message if you are using container with CUDA support")
 				}
 			}
 			if strings.Contains(gpu.String(), "amd") {
@@ -222,7 +222,7 @@ func selectGRPCProcess(backend, assetDir string, f16 bool) string {
 					grpcProcess = p
 					foundAMDGPU = true
 				} else {
-					log.Info().Msgf("GPU device found but no HIPBLAS backend present")
+					log.Debug().Msgf("AMD GPU device found, no embedded HIPBLAS variant found. You can ignore this message if you are using container with HIPBLAS support")
 				}
 			}
 			if strings.Contains(gpu.String(), "intel") {
@@ -236,7 +236,7 @@ func selectGRPCProcess(backend, assetDir string, f16 bool) string {
 					grpcProcess = p
 					foundIntelGPU = true
 				} else {
-					log.Info().Msgf("GPU device found but no Intel backend present")
+					log.Debug().Msgf("Intel GPU device found, no embedded SYCL variant found. You can ignore this message if you are using container with SYCL support")
 				}
 			}
 		}

From 89484efaed97ee64ae88d33051cacd3bbd2b8ae9 Mon Sep 17 00:00:00 2001
From: Ettore Di Giacinto <mudler@users.noreply.github.com>
Date: Wed, 24 Jul 2024 12:27:49 +0200
Subject: [PATCH 51/69] docs: update distributed_inferencing.md

Signed-off-by: Ettore Di Giacinto <mudler@users.noreply.github.com>
---
 .../docs/features/distributed_inferencing.md  | 23 ++++++++++++++-----
 1 file changed, 17 insertions(+), 6 deletions(-)

diff --git a/docs/content/docs/features/distributed_inferencing.md b/docs/content/docs/features/distributed_inferencing.md
index 1ab3fa55..2de7ae3c 100644
--- a/docs/content/docs/features/distributed_inferencing.md
+++ b/docs/content/docs/features/distributed_inferencing.md
@@ -122,12 +122,6 @@ The server logs should indicate that new workers are being discovered.
 
 ![output](https://github.com/mudler/LocalAI/assets/2420543/8ca277cf-c208-4562-8929-808b2324b584)
 
-## Notes
-
-- If running in p2p mode with container images, make sure you start the container with `--net host` or `network_mode: host` in the docker-compose file.
-- Only a single model is supported currently.
-- Ensure the server detects new workers before starting inference. Currently, additional workers cannot be added once inference has begun.
-- For more details on the implementation, refer to [LocalAI pull request #2343](https://github.com/mudler/LocalAI/pull/2343)
 
 ## Environment Variables
 
@@ -138,3 +132,20 @@ There are options that can be tweaked or parameters that can be set using enviro
 | **LOCALAI_P2P_DISABLE_DHT** | Set to "true" to disable DHT and enable p2p layer to be local only (mDNS) |
 | **LOCALAI_P2P_DISABLE_LIMITS** | Set to "true" to disable connection limits and resources management |
 | **LOCALAI_P2P_TOKEN** | Set the token for the p2p network |
+
+## Architecture
+
+LocalAI uses https://github.com/libp2p/go-libp2p under the hood, the same project powering IPFS. Differently from other frameworks, LocalAI uses peer2peer without a single master server, but rather it uses sub/gossip and ledger functionalities to achieve consensus across different peers. 
+
+[EdgeVPN](https://github.com/mudler/edgevpn) is used as a library to establish the network and expose the ledger functionality under a shared token to ease out automatic discovery and have separated, private peer2peer networks.
+
+The weights are split proportional to the memory when running into worker mode, when in federation mode each request is split to every node which have to load the model fully.
+
+## Notes
+
+- If running in p2p mode with container images, make sure you start the container with `--net host` or `network_mode: host` in the docker-compose file.
+- Only a single model is supported currently.
+- Ensure the server detects new workers before starting inference. Currently, additional workers cannot be added once inference has begun.
+- For more details on the implementation, refer to [LocalAI pull request #2343](https://github.com/mudler/LocalAI/pull/2343)
+
+

From bd900945f7fec40ab3398c6a34693ca271eb556f Mon Sep 17 00:00:00 2001
From: Ettore Di Giacinto <mudler@users.noreply.github.com>
Date: Wed, 24 Jul 2024 12:35:52 +0200
Subject: [PATCH 52/69] fix(llama.cpp): do not set anymore lora_base (#2999)

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
---
 backend/cpp/llama/grpc-server.cpp | 1 -
 1 file changed, 1 deletion(-)

diff --git a/backend/cpp/llama/grpc-server.cpp b/backend/cpp/llama/grpc-server.cpp
index 1cff6b8a..cb5c85f1 100644
--- a/backend/cpp/llama/grpc-server.cpp
+++ b/backend/cpp/llama/grpc-server.cpp
@@ -2259,7 +2259,6 @@ static void params_parse(const backend::ModelOptions* request,
      // get the directory of modelfile
      std::string model_dir = params.model.substr(0, params.model.find_last_of("/\\"));
      params.lora_adapter.push_back(std::make_tuple(model_dir + "/"+request->loraadapter(), scale_factor));
-     params.lora_base  =  model_dir + "/"+request->lorabase();
     }
     params.use_mlock = request->mlock();
     params.use_mmap = request->mmap();

From 9fee46207ac4dd73354a58f47a58cb4e691f4773 Mon Sep 17 00:00:00 2001
From: Ettore Di Giacinto <mudler@users.noreply.github.com>
Date: Wed, 24 Jul 2024 12:48:14 +0200
Subject: [PATCH 53/69] models(gallery): add llama3.1 70b and 8b (#3000)

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
---
 gallery/index.yaml | 40 ++++++++++++++++++++++++++++++++++++++++
 1 file changed, 40 insertions(+)

diff --git a/gallery/index.yaml b/gallery/index.yaml
index 31af59a3..cc654885 100644
--- a/gallery/index.yaml
+++ b/gallery/index.yaml
@@ -1,4 +1,44 @@
 ---
+## LLama3.1
+- &llama31
+  url: "github:mudler/LocalAI/gallery/llama3-instruct.yaml@master"
+  icon: https://cdn-uploads.huggingface.co/production/uploads/642cc1c253e76b4c2286c58e/aJJxKus1wP5N-euvHEUq7.png
+  name: "llama3-8b-instruct"
+  license: llama3.1
+  description: |
+      The Meta Llama 3.1 collection of multilingual large language models (LLMs) is a collection of pretrained and instruction tuned generative models in 8B, 70B and 405B sizes (text in/text out). The Llama 3.1 instruction tuned text only models (8B, 70B, 405B) are optimized for multilingual dialogue use cases and outperform many of the available open source and closed chat models on common industry benchmarks.
+
+      Model developer: Meta
+
+      Model Architecture: Llama 3.1 is an auto-regressive language model that uses an optimized transformer architecture. The tuned versions use supervised fine-tuning (SFT) and reinforcement learning with human feedback (RLHF) to align with human preferences for helpfulness and safety.
+  urls:
+    - https://huggingface.co/meta-llama/Meta-Llama-3.1-8B-Instruct
+    - https://huggingface.co/MaziyarPanahi/Meta-Llama-3.1-8B-Instruct-GGUF
+  tags:
+    - llm
+    - gguf
+    - gpu
+    - cpu
+    - llama3.1
+  overrides:
+    parameters:
+      model: Meta-Llama-3.1-8B-Instruct.Q4_K_M.gguf
+  files:
+    - filename: Meta-Llama-3.1-8B-Instruct.Q4_K_M.gguf
+      sha256: c2f17f44af962660d1ad4cb1af91a731f219f3b326c2b14441f9df1f347f2815
+      uri: huggingface://MaziyarPanahi/Meta-Llama-3.1-8B-Instruct-GGUF/Meta-Llama-3.1-8B-Instruct.Q4_K_M.gguf
+- !!merge <<: *llama31
+  name: "meta-llama-3.1-70b-instruct"
+  urls:
+    - https://huggingface.co/meta-llama/Meta-Llama-3.1-70B-Instruct
+    - https://huggingface.co/MaziyarPanahi/Meta-Llama-3.1-70B-Instruct-GGUF
+  overrides:
+    parameters:
+      model: Meta-Llama-3.1-70B-Instruct.Q4_K_M.gguf
+  files:
+    - filename: Meta-Llama-3.1-70B-Instruct.Q4_K_M.gguf
+      sha256: 3f16ab17da4521fe3ed7c5d7beed960d3fe7b5b64421ee9650aa53d6b649ccab
+      uri: huggingface://MaziyarPanahi/Meta-Llama-3.1-70B-Instruct-GGUF/Meta-Llama-3.1-70B-Instruct.Q4_K_M.gguf
 ## Deepseek
 - &deepseek
   url: "github:mudler/LocalAI/gallery/deepseek.yaml@master"

From 0802895cd20cf0ae482c66ef2d83a2e5244f5e27 Mon Sep 17 00:00:00 2001
From: Ettore Di Giacinto <mudler@users.noreply.github.com>
Date: Wed, 24 Jul 2024 14:32:54 +0200
Subject: [PATCH 54/69] Update index.yaml

Signed-off-by: Ettore Di Giacinto <mudler@users.noreply.github.com>
---
 gallery/index.yaml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/gallery/index.yaml b/gallery/index.yaml
index cc654885..fa61393c 100644
--- a/gallery/index.yaml
+++ b/gallery/index.yaml
@@ -3,7 +3,7 @@
 - &llama31
   url: "github:mudler/LocalAI/gallery/llama3-instruct.yaml@master"
   icon: https://cdn-uploads.huggingface.co/production/uploads/642cc1c253e76b4c2286c58e/aJJxKus1wP5N-euvHEUq7.png
-  name: "llama3-8b-instruct"
+  name: "meta-llama-3.1-8b-instruct"
   license: llama3.1
   description: |
       The Meta Llama 3.1 collection of multilingual large language models (LLMs) is a collection of pretrained and instruction tuned generative models in 8B, 70B and 405B sizes (text in/text out). The Llama 3.1 instruction tuned text only models (8B, 70B, 405B) are optimized for multilingual dialogue use cases and outperform many of the available open source and closed chat models on common industry benchmarks.

From 80ae919dbe1c8e8022a9daecbdabf00482cbdd38 Mon Sep 17 00:00:00 2001
From: "LocalAI [bot]" <139863280+localai-bot@users.noreply.github.com>
Date: Wed, 24 Jul 2024 15:37:08 +0200
Subject: [PATCH 55/69] chore: :arrow_up: Update ggerganov/llama.cpp (#2995)

:arrow_up: Update ggerganov/llama.cpp

Signed-off-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com>
Co-authored-by: mudler <2420543+mudler@users.noreply.github.com>
---
 Makefile | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/Makefile b/Makefile
index 297938ae..55ef43c6 100644
--- a/Makefile
+++ b/Makefile
@@ -8,7 +8,7 @@ DETECT_LIBS?=true
 # llama.cpp versions
 GOLLAMA_REPO?=https://github.com/go-skynet/go-llama.cpp
 GOLLAMA_VERSION?=2b57a8ae43e4699d3dc5d1496a1ccd42922993be
-CPPLLAMA_VERSION?=081fe431aa8fb6307145c4feb3eed4f48cab19f8
+CPPLLAMA_VERSION?=79167d9e49aef9caa98e13ee7ca067ec9f88b4b5
 
 # gpt4all version
 GPT4ALL_REPO?=https://github.com/nomic-ai/gpt4all

From 4a69ef305245d5e5172de247c34e2a39b73c06f5 Mon Sep 17 00:00:00 2001
From: Ettore Di Giacinto <mudler@users.noreply.github.com>
Date: Wed, 24 Jul 2024 23:40:08 +0200
Subject: [PATCH 56/69] models(gallery): add llama3.1-claude (#3005)

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
---
 gallery/index.yaml | 14 ++++++++++++++
 1 file changed, 14 insertions(+)

diff --git a/gallery/index.yaml b/gallery/index.yaml
index fa61393c..870242f0 100644
--- a/gallery/index.yaml
+++ b/gallery/index.yaml
@@ -39,6 +39,20 @@
     - filename: Meta-Llama-3.1-70B-Instruct.Q4_K_M.gguf
       sha256: 3f16ab17da4521fe3ed7c5d7beed960d3fe7b5b64421ee9650aa53d6b649ccab
       uri: huggingface://MaziyarPanahi/Meta-Llama-3.1-70B-Instruct-GGUF/Meta-Llama-3.1-70B-Instruct.Q4_K_M.gguf
+- !!merge <<: *llama31
+  name: "meta-llama-3.1-8b-claude-imat"
+  urls:
+    - https://huggingface.co/Undi95/Meta-Llama-3.1-8B-Claude
+    - https://huggingface.co/InferenceIllusionist/Meta-Llama-3.1-8B-Claude-iMat-GGUF
+  description: |
+    Meta-Llama-3.1-8B-Claude-iMat-GGUF: Quantized from Meta-Llama-3.1-8B-Claude fp16. Weighted quantizations were creating using fp16 GGUF and groups_merged.txt in 88 chunks and n_ctx=512. Static fp16 will also be included in repo. For a brief rundown of iMatrix quant performance, please see this PR. All quants are verified working prior to uploading to repo for your safety and convenience.
+  overrides:
+    parameters:
+      model: Meta-Llama-3.1-8B-Claude-iMat-Q4_K_M.gguf
+  files:
+    - filename: Meta-Llama-3.1-8B-Claude-iMat-Q4_K_M.gguf
+      sha256: 8de80021b9438f0925a41ae73f77cb73fcfa30090e03a0919ce23d2b9818e9c7
+      uri: huggingface://InferenceIllusionist/Meta-Llama-3.1-8B-Claude-iMat-GGUF/Meta-Llama-3.1-8B-Claude-iMat-Q4_K_M.gguf
 ## Deepseek
 - &deepseek
   url: "github:mudler/LocalAI/gallery/deepseek.yaml@master"

From 9031d2b9eb549502139c2b73d5d5b0f77f703cff Mon Sep 17 00:00:00 2001
From: "LocalAI [bot]" <139863280+localai-bot@users.noreply.github.com>
Date: Thu, 25 Jul 2024 00:32:10 +0200
Subject: [PATCH 57/69] docs: :arrow_up: update docs version mudler/LocalAI
 (#3002)

:arrow_up: Update docs version mudler/LocalAI

Signed-off-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com>
Co-authored-by: mudler <2420543+mudler@users.noreply.github.com>
---
 docs/data/version.json | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/docs/data/version.json b/docs/data/version.json
index fff9fa0c..efda370f 100644
--- a/docs/data/version.json
+++ b/docs/data/version.json
@@ -1,3 +1,3 @@
 {
-  "version": "v2.19.1"
+  "version": "v2.19.2"
 }

From 717cc6fe1a5a27b0335305c78cea5109bcf158da Mon Sep 17 00:00:00 2001
From: "LocalAI [bot]" <139863280+localai-bot@users.noreply.github.com>
Date: Thu, 25 Jul 2024 00:47:38 +0200
Subject: [PATCH 58/69] chore: :arrow_up: Update ggerganov/llama.cpp (#3003)

:arrow_up: Update ggerganov/llama.cpp

Signed-off-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com>
Co-authored-by: mudler <2420543+mudler@users.noreply.github.com>
---
 Makefile | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/Makefile b/Makefile
index 55ef43c6..f1862aef 100644
--- a/Makefile
+++ b/Makefile
@@ -8,7 +8,7 @@ DETECT_LIBS?=true
 # llama.cpp versions
 GOLLAMA_REPO?=https://github.com/go-skynet/go-llama.cpp
 GOLLAMA_VERSION?=2b57a8ae43e4699d3dc5d1496a1ccd42922993be
-CPPLLAMA_VERSION?=79167d9e49aef9caa98e13ee7ca067ec9f88b4b5
+CPPLLAMA_VERSION?=68504f0970db5a3602d176953690f503059906b1
 
 # gpt4all version
 GPT4ALL_REPO?=https://github.com/nomic-ai/gpt4all

From 5eda7f578d232c0a8151e18e679cfb64c249c2de Mon Sep 17 00:00:00 2001
From: Ettore Di Giacinto <mudler@users.noreply.github.com>
Date: Thu, 25 Jul 2024 08:41:00 +0200
Subject: [PATCH 59/69] refactor: break down json grammar parser in different
 files (#3004)

* refactor: break down json grammar parser in different files

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>

* fix: patch to `refactor_grammars` - propagate errors (#3006)

propagate errors around

Signed-off-by: Dave Lee <dave@gray101.com>

---------

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
Signed-off-by: Dave Lee <dave@gray101.com>
Co-authored-by: Dave <dave@gray101.com>
---
 core/http/endpoints/openai/chat.go        |  10 +-
 pkg/functions/bnf_rules.go                |  47 ++++++
 pkg/functions/function_structure.go       |  25 +++
 pkg/functions/functions.go                |  17 ++
 pkg/functions/functions_suite_test.go     |  14 +-
 pkg/functions/grammar_json_schema.go      | 179 +++++++---------------
 pkg/functions/grammar_json_schema_test.go |  51 +++---
 pkg/functions/json_mode.go                |  28 ++++
 8 files changed, 218 insertions(+), 153 deletions(-)
 create mode 100644 pkg/functions/bnf_rules.go
 create mode 100644 pkg/functions/function_structure.go
 create mode 100644 pkg/functions/json_mode.go

diff --git a/core/http/endpoints/openai/chat.go b/core/http/endpoints/openai/chat.go
index f63a9913..c7afb7bf 100644
--- a/core/http/endpoints/openai/chat.go
+++ b/core/http/endpoints/openai/chat.go
@@ -226,9 +226,15 @@ func ChatEndpoint(cl *config.BackendConfigLoader, ml *model.ModelLoader, startup
 
 			// Update input grammar
 			jsStruct := funcs.ToJSONStructure(config.FunctionsConfig.FunctionNameKey, config.FunctionsConfig.FunctionNameKey)
-			config.Grammar = jsStruct.Grammar(config.FunctionsConfig.GrammarConfig.Options()...)
+			g, err := jsStruct.Grammar(config.FunctionsConfig.GrammarConfig.Options()...)
+			if err == nil {
+				config.Grammar = g
+			}
 		case input.JSONFunctionGrammarObject != nil:
-			config.Grammar = input.JSONFunctionGrammarObject.Grammar(config.FunctionsConfig.GrammarConfig.Options()...)
+			g, err := input.JSONFunctionGrammarObject.Grammar(config.FunctionsConfig.GrammarConfig.Options()...)
+			if err == nil {
+				config.Grammar = g
+			}
 		default:
 			// Force picking one of the functions by the request
 			if config.FunctionToCall() != "" {
diff --git a/pkg/functions/bnf_rules.go b/pkg/functions/bnf_rules.go
new file mode 100644
index 00000000..13aa3654
--- /dev/null
+++ b/pkg/functions/bnf_rules.go
@@ -0,0 +1,47 @@
+package functions
+
+import "regexp"
+
+var (
+	PRIMITIVE_RULES = map[string]string{
+		"boolean": `("true" | "false") space`,
+		"number":  `("-"? ([0-9] | [1-9] [0-9]*)) ("." [0-9]+)? ([eE] [-+]? [0-9]+)? space`,
+		"integer": `("-"? ([0-9] | [1-9] [0-9]*)) space`,
+		"string": `"\"" (
+			[^"\\] |
+			"\\" (["\\/bfnrt] | "u" [0-9a-fA-F] [0-9a-fA-F] [0-9a-fA-F] [0-9a-fA-F])
+		  )* "\"" space`,
+		// TODO: we shouldn't forbid \" and \\ or all unicode and have this branch here,
+		// however, if we don't have it, the grammar will be ambiguous and
+		// empirically results are way worse.
+		"freestring": `(
+			[^\x00] |
+			"\\" (["\\/bfnrt] | "u" [0-9a-fA-F] [0-9a-fA-F] [0-9a-fA-F] [0-9a-fA-F])
+		  )* space`,
+		"null": `"null" space`,
+	}
+
+	INVALID_RULE_CHARS_RE     = regexp.MustCompile(`[^a-zA-Z0-9-]+`)
+	GRAMMAR_LITERAL_ESCAPE_RE = regexp.MustCompile(`[\r\n"]`)
+	GRAMMAR_LITERAL_ESCAPES   = map[string]string{
+		"\r": `\r`,
+		"\n": `\n`,
+		`"`:  `\"`,
+	}
+)
+
+const (
+	SPACE_RULE = `" "?`
+
+	arrayNewLines = `arr  ::=
+  "[\n"  (
+		realvalue
+    (",\n"  realvalue)*
+  )? "]"`
+
+	array = `arr  ::=
+  "["  (
+		realvalue
+    (","  realvalue)*
+  )? "]"`
+)
diff --git a/pkg/functions/function_structure.go b/pkg/functions/function_structure.go
new file mode 100644
index 00000000..62cc68fa
--- /dev/null
+++ b/pkg/functions/function_structure.go
@@ -0,0 +1,25 @@
+package functions
+
+import "encoding/json"
+
+type Item struct {
+	Type       string                 `json:"type"`
+	Properties map[string]interface{} `json:"properties"`
+}
+
+type JSONFunctionStructure struct {
+	OneOf []Item                 `json:"oneOf,omitempty"`
+	AnyOf []Item                 `json:"anyOf,omitempty"`
+	Defs  map[string]interface{} `json:"$defs,omitempty"`
+}
+
+func (j JSONFunctionStructure) Grammar(options ...func(*GrammarOption)) (string, error) {
+	grammarOpts := &GrammarOption{}
+	grammarOpts.Apply(options...)
+
+	dat, err := json.Marshal(j)
+	if err != nil {
+		return "", err
+	}
+	return NewJSONSchemaConverter(grammarOpts.PropOrder).GrammarFromBytes(dat, options...)
+}
diff --git a/pkg/functions/functions.go b/pkg/functions/functions.go
index 49e9fc93..2690b8ec 100644
--- a/pkg/functions/functions.go
+++ b/pkg/functions/functions.go
@@ -18,6 +18,15 @@ type Function struct {
 }
 type Functions []Function
 
+type FunctionName struct {
+	Const string `json:"const"`
+}
+
+type Argument struct {
+	Type       string                 `json:"type"`
+	Properties map[string]interface{} `json:"properties"`
+}
+
 type Tool struct {
 	Type     string   `json:"type"`
 	Function Function `json:"function,omitempty"`
@@ -86,3 +95,11 @@ func (f Functions) Select(name string) Functions {
 
 	return funcs
 }
+
+func jsonString(v interface{}) (string, error) {
+	b, err := json.Marshal(v)
+	if err != nil {
+		return "", err
+	}
+	return string(b), nil
+}
diff --git a/pkg/functions/functions_suite_test.go b/pkg/functions/functions_suite_test.go
index 8964b1c8..59a90ab0 100644
--- a/pkg/functions/functions_suite_test.go
+++ b/pkg/functions/functions_suite_test.go
@@ -1,8 +1,10 @@
-package functions
+package functions_test
 
 import (
 	"testing"
 
+	. "github.com/mudler/LocalAI/pkg/functions"
+
 	. "github.com/onsi/ginkgo/v2"
 	. "github.com/onsi/gomega"
 )
@@ -11,3 +13,13 @@ func TestGrammar(t *testing.T) {
 	RegisterFailHandler(Fail)
 	RunSpecs(t, "Grammar test suite")
 }
+
+func createFunction(field1 string, field2 string, name string, properties map[string]interface{}) map[string]interface{} {
+	property := map[string]interface{}{}
+	property[field1] = FunctionName{Const: name}
+	property[field2] = Argument{
+		Type:       "object",
+		Properties: properties,
+	}
+	return property
+}
diff --git a/pkg/functions/grammar_json_schema.go b/pkg/functions/grammar_json_schema.go
index 7356d01d..5ffc0ba5 100644
--- a/pkg/functions/grammar_json_schema.go
+++ b/pkg/functions/grammar_json_schema.go
@@ -5,70 +5,12 @@ package functions
 import (
 	"encoding/json"
 	"fmt"
-	"regexp"
 	"sort"
 	"strings"
 
 	"github.com/mudler/LocalAI/pkg/utils"
 )
 
-const (
-	JSONBNF = `root   ::= object
-value  ::= object | array | string | number | ("true" | "false" | "null") ws
-
-object ::=
-  "{" ws (
-            string ":" ws value
-    ("," ws string ":" ws value)*
-  )? "}" ws
-
-array  ::=
-  "[" ws (
-            value
-    ("," ws value)*
-  )? "]" ws
-
-string ::=
-  "\"" (
-    [^"\\] |
-    "\\" (["\\/bfnrt] | "u" [0-9a-fA-F] [0-9a-fA-F] [0-9a-fA-F] [0-9a-fA-F]) # escapes
-  )* "\"" ws
-
-number ::= ("-"? ([0-9] | [1-9] [0-9]*)) ("." [0-9]+)? ([eE] [-+]? [0-9]+)? ws
-
-ws ::= ([ \t\n] ws)?`
-)
-
-var (
-	SPACE_RULE = `" "?`
-
-	PRIMITIVE_RULES = map[string]string{
-		"boolean": `("true" | "false") space`,
-		"number":  `("-"? ([0-9] | [1-9] [0-9]*)) ("." [0-9]+)? ([eE] [-+]? [0-9]+)? space`,
-		"integer": `("-"? ([0-9] | [1-9] [0-9]*)) space`,
-		"string": `"\"" (
-			[^"\\] |
-			"\\" (["\\/bfnrt] | "u" [0-9a-fA-F] [0-9a-fA-F] [0-9a-fA-F] [0-9a-fA-F])
-		  )* "\"" space`,
-		// TODO: we shouldn't forbid \" and \\ or all unicode and have this branch here,
-		// however, if we don't have it, the grammar will be ambiguous and
-		// empirically results are way worse.
-		"freestring": `(
-			[^\x00] |
-			"\\" (["\\/bfnrt] | "u" [0-9a-fA-F] [0-9a-fA-F] [0-9a-fA-F] [0-9a-fA-F])
-		  )* space`,
-		"null": `"null" space`,
-	}
-
-	INVALID_RULE_CHARS_RE     = regexp.MustCompile(`[^a-zA-Z0-9-]+`)
-	GRAMMAR_LITERAL_ESCAPE_RE = regexp.MustCompile(`[\r\n"]`)
-	GRAMMAR_LITERAL_ESCAPES   = map[string]string{
-		"\r": `\r`,
-		"\n": `\n`,
-		`"`:  `\"`,
-	}
-)
-
 type JSONSchemaConverter struct {
 	propOrder map[string]int
 	rules     map[string]string
@@ -90,11 +32,15 @@ func NewJSONSchemaConverter(propOrder string) *JSONSchemaConverter {
 	}
 }
 
-func (sc *JSONSchemaConverter) formatLiteral(literal interface{}) string {
-	escaped := GRAMMAR_LITERAL_ESCAPE_RE.ReplaceAllStringFunc(jsonString(literal), func(match string) string {
+func (sc *JSONSchemaConverter) formatLiteral(literal interface{}) (string, error) {
+	jLiteral, err := jsonString(literal)
+	if err != nil {
+		return "", err
+	}
+	escaped := GRAMMAR_LITERAL_ESCAPE_RE.ReplaceAllStringFunc(jLiteral, func(match string) string {
 		return GRAMMAR_LITERAL_ESCAPES[match]
 	})
-	return fmt.Sprintf(`"%s"`, escaped)
+	return fmt.Sprintf(`"%s"`, escaped), nil
 }
 
 func (sc *JSONSchemaConverter) addRule(name, rule string) string {
@@ -114,18 +60,6 @@ func (sc *JSONSchemaConverter) addRule(name, rule string) string {
 	return key
 }
 
-const arrayNewLines = `arr  ::=
-  "[\n"  (
-		realvalue
-    (",\n"  realvalue)*
-  )? "]"`
-
-const array = `arr  ::=
-  "["  (
-		realvalue
-    (","  realvalue)*
-  )? "]"`
-
 func (sc *JSONSchemaConverter) finalizeGrammar(options ...func(*GrammarOption)) string {
 
 	grammarOpts := &GrammarOption{}
@@ -210,7 +144,7 @@ func (sc *JSONSchemaConverter) finalizeGrammar(options ...func(*GrammarOption))
 	return strings.Join(lines, "\n")
 }
 
-func (sc *JSONSchemaConverter) visit(schema map[string]interface{}, name string, rootSchema map[string]interface{}) string {
+func (sc *JSONSchemaConverter) visit(schema map[string]interface{}, name string, rootSchema map[string]interface{}) (string, error) {
 	st, existType := schema["type"]
 	var schemaType string
 	if existType {
@@ -229,31 +163,44 @@ func (sc *JSONSchemaConverter) visit(schema map[string]interface{}, name string,
 
 		if oneOfExists {
 			for i, altSchema := range oneOfSchemas {
-				alternative := sc.visit(altSchema.(map[string]interface{}), fmt.Sprintf("%s-%d", ruleName, i), rootSchema)
+				alternative, err := sc.visit(altSchema.(map[string]interface{}), fmt.Sprintf("%s-%d", ruleName, i), rootSchema)
+				if err != nil {
+					return "", err
+				}
 				alternatives = append(alternatives, alternative)
 			}
 		} else if anyOfExists {
 			for i, altSchema := range anyOfSchemas {
-				alternative := sc.visit(altSchema.(map[string]interface{}), fmt.Sprintf("%s-%d", ruleName, i), rootSchema)
+				alternative, err := sc.visit(altSchema.(map[string]interface{}), fmt.Sprintf("%s-%d", ruleName, i), rootSchema)
+				if err != nil {
+					return "", err
+				}
 				alternatives = append(alternatives, alternative)
 			}
 		}
 
 		rule := strings.Join(alternatives, " | ")
-		return sc.addRule(ruleName, rule)
+		return sc.addRule(ruleName, rule), nil
 	} else if ref, exists := schema["$ref"].(string); exists {
 		referencedSchema := sc.resolveReference(ref, rootSchema)
 		return sc.visit(referencedSchema, name, rootSchema)
 	} else if constVal, exists := schema["const"]; exists {
-		return sc.addRule(ruleName, sc.formatLiteral(constVal))
+		literal, err := sc.formatLiteral((constVal))
+		if err != nil {
+			return "", err
+		}
+		return sc.addRule(ruleName, literal), nil
 	} else if enumVals, exists := schema["enum"].([]interface{}); exists {
 		var enumRules []string
 		for _, enumVal := range enumVals {
-			enumRule := sc.formatLiteral(enumVal)
+			enumRule, err := sc.formatLiteral(enumVal)
+			if err != nil {
+				return "", err
+			}
 			enumRules = append(enumRules, enumRule)
 		}
 		rule := strings.Join(enumRules, " | ")
-		return sc.addRule(ruleName, rule)
+		return sc.addRule(ruleName, rule), nil
 	} else if properties, exists := schema["properties"].(map[string]interface{}); schemaType == "object" && exists {
 		propOrder := sc.propOrder
 		var propPairs []struct {
@@ -283,21 +230,30 @@ func (sc *JSONSchemaConverter) visit(schema map[string]interface{}, name string,
 		for i, propPair := range propPairs {
 			propName := propPair.propName
 			propSchema := propPair.propSchema
-			propRuleName := sc.visit(propSchema, fmt.Sprintf("%s-%s", ruleName, propName), rootSchema)
-
+			propRuleName, err := sc.visit(propSchema, fmt.Sprintf("%s-%s", ruleName, propName), rootSchema)
+			if err != nil {
+				return "", err
+			}
+			lPropName, err := sc.formatLiteral(propName)
+			if err != nil {
+				return "", err
+			}
 			if i > 0 {
 				rule.WriteString(` "," space`)
 			}
 
-			rule.WriteString(fmt.Sprintf(` %s space ":" space %s`, sc.formatLiteral(propName), propRuleName))
+			rule.WriteString(fmt.Sprintf(` %s space ":" space %s`, lPropName, propRuleName))
 		}
 
 		rule.WriteString(` "}" space`)
-		return sc.addRule(ruleName, rule.String())
+		return sc.addRule(ruleName, rule.String()), nil
 	} else if items, exists := schema["items"].(map[string]interface{}); schemaType == "array" && exists {
-		itemRuleName := sc.visit(items, fmt.Sprintf("%s-item", ruleName), rootSchema)
+		itemRuleName, err := sc.visit(items, fmt.Sprintf("%s-item", ruleName), rootSchema)
+		if err != nil {
+			return "", err
+		}
 		rule := fmt.Sprintf(`"[" space (%s ("," space %s)*)? "]" space`, itemRuleName, itemRuleName)
-		return sc.addRule(ruleName, rule)
+		return sc.addRule(ruleName, rule), nil
 	} else {
 		primitiveRule, exists := PRIMITIVE_RULES[schemaType]
 		if !exists {
@@ -306,7 +262,7 @@ func (sc *JSONSchemaConverter) visit(schema map[string]interface{}, name string,
 		if ruleName == "root" {
 			schemaType = "root"
 		}
-		return sc.addRule(schemaType, primitiveRule)
+		return sc.addRule(schemaType, primitiveRule), nil
 	}
 }
 func (sc *JSONSchemaConverter) resolveReference(ref string, rootSchema map[string]interface{}) map[string]interface{} {
@@ -332,47 +288,20 @@ func (sc *JSONSchemaConverter) resolveReference(ref string, rootSchema map[strin
 	return def
 }
 
-func (sc *JSONSchemaConverter) Grammar(schema map[string]interface{}, options ...func(*GrammarOption)) string {
+func (sc *JSONSchemaConverter) Grammar(schema map[string]interface{}, options ...func(*GrammarOption)) (string, error) {
 	sc.addRule("freestring", PRIMITIVE_RULES["freestring"])
-	sc.visit(schema, "", schema)
-	return sc.finalizeGrammar(options...)
+	_, err := sc.visit(schema, "", schema)
+	if err != nil {
+		return "", err
+	}
+	return sc.finalizeGrammar(options...), nil
 }
 
-func (sc *JSONSchemaConverter) GrammarFromBytes(b []byte, options ...func(*GrammarOption)) string {
+func (sc *JSONSchemaConverter) GrammarFromBytes(b []byte, options ...func(*GrammarOption)) (string, error) {
 	var schema map[string]interface{}
-	_ = json.Unmarshal(b, &schema)
+	err := json.Unmarshal(b, &schema)
+	if err != nil {
+		return "", err
+	}
 	return sc.Grammar(schema, options...)
 }
-
-func jsonString(v interface{}) string {
-	b, _ := json.Marshal(v)
-	return string(b)
-}
-
-type FunctionName struct {
-	Const string `json:"const"`
-}
-
-type Argument struct {
-	Type       string                 `json:"type"`
-	Properties map[string]interface{} `json:"properties"`
-}
-
-type Item struct {
-	Type       string                 `json:"type"`
-	Properties map[string]interface{} `json:"properties"`
-}
-
-type JSONFunctionStructure struct {
-	OneOf []Item                 `json:"oneOf,omitempty"`
-	AnyOf []Item                 `json:"anyOf,omitempty"`
-	Defs  map[string]interface{} `json:"$defs,omitempty"`
-}
-
-func (j JSONFunctionStructure) Grammar(options ...func(*GrammarOption)) string {
-	grammarOpts := &GrammarOption{}
-	grammarOpts.Apply(options...)
-
-	dat, _ := json.Marshal(j)
-	return NewJSONSchemaConverter(grammarOpts.PropOrder).GrammarFromBytes(dat, options...)
-}
diff --git a/pkg/functions/grammar_json_schema_test.go b/pkg/functions/grammar_json_schema_test.go
index bf52bd8d..56c5fe1e 100644
--- a/pkg/functions/grammar_json_schema_test.go
+++ b/pkg/functions/grammar_json_schema_test.go
@@ -3,22 +3,11 @@ package functions_test
 import (
 	"strings"
 
-	"github.com/mudler/LocalAI/pkg/functions"
 	. "github.com/mudler/LocalAI/pkg/functions"
 	. "github.com/onsi/ginkgo/v2"
 	. "github.com/onsi/gomega"
 )
 
-func createFunction(field1 string, field2 string, name string, properties map[string]interface{}) map[string]interface{} {
-	property := map[string]interface{}{}
-	property[field1] = FunctionName{Const: name}
-	property[field2] = Argument{
-		Type:       "object",
-		Properties: properties,
-	}
-	return property
-}
-
 var testFunctions = []Item{
 	{
 		Type: "object",
@@ -245,7 +234,8 @@ root-1-name ::= "\"search\""`
 var _ = Describe("JSON schema grammar tests", func() {
 	Context("JSON", func() {
 		It("generates a valid grammar from JSON schema", func() {
-			grammar := NewJSONSchemaConverter("").GrammarFromBytes([]byte(testInput1))
+			grammar, err := NewJSONSchemaConverter("").GrammarFromBytes([]byte(testInput1))
+			Expect(err).To(BeNil())
 			results := strings.Split(inputResult1, "\n")
 			for _, r := range results {
 				if r != "" {
@@ -255,7 +245,8 @@ var _ = Describe("JSON schema grammar tests", func() {
 			Expect(len(results)).To(Equal(len(strings.Split(grammar, "\n"))))
 		})
 		It("generates a valid grammar from JSON schema", func() {
-			grammar := NewJSONSchemaConverter("").GrammarFromBytes([]byte(testInput2))
+			grammar, err := NewJSONSchemaConverter("").GrammarFromBytes([]byte(testInput2))
+			Expect(err).To(BeNil())
 			results := strings.Split(inputResult3, "\n")
 			for _, r := range results {
 				if r != "" {
@@ -269,7 +260,8 @@ var _ = Describe("JSON schema grammar tests", func() {
 			structuredGrammar := JSONFunctionStructure{
 				OneOf: testFunctions}
 
-			grammar := structuredGrammar.Grammar()
+			grammar, err := structuredGrammar.Grammar()
+			Expect(err).To(BeNil())
 			results := strings.Split(inputResult1, "\n")
 			for _, r := range results {
 				if r != "" {
@@ -283,7 +275,8 @@ var _ = Describe("JSON schema grammar tests", func() {
 			structuredGrammar := JSONFunctionStructure{
 				OneOf: testFunctions}
 
-			grammar := structuredGrammar.Grammar(functions.EnableMaybeArray)
+			grammar, err := structuredGrammar.Grammar(EnableMaybeArray)
+			Expect(err).To(BeNil())
 			results := strings.Split(
 				strings.Join([]string{
 					inputResult2,
@@ -301,7 +294,8 @@ var _ = Describe("JSON schema grammar tests", func() {
 			structuredGrammar := JSONFunctionStructure{
 				OneOf: testFunctionsName}
 
-			grammar := structuredGrammar.Grammar(functions.EnableMaybeArray)
+			grammar, err := structuredGrammar.Grammar(EnableMaybeArray)
+			Expect(err).To(BeNil())
 			results := strings.Split(
 				strings.Join([]string{
 					inputResult4,
@@ -319,10 +313,11 @@ var _ = Describe("JSON schema grammar tests", func() {
 			structuredGrammar := JSONFunctionStructure{
 				OneOf: testFunctionsName}
 
-			grammar := structuredGrammar.Grammar(
-				functions.SetPrefix("suffix"),
-				functions.EnableMaybeArray,
+			grammar, err := structuredGrammar.Grammar(
+				SetPrefix("suffix"),
+				EnableMaybeArray,
 			)
+			Expect(err).To(BeNil())
 			results := strings.Split(
 				strings.Join([]string{
 					rootResult(`"suffix" arr | realvalue`),
@@ -339,7 +334,8 @@ var _ = Describe("JSON schema grammar tests", func() {
 			structuredGrammar := JSONFunctionStructure{
 				OneOf: testFunctionsName}
 
-			grammar := structuredGrammar.Grammar(functions.SetPrefix("suffix"))
+			grammar, err := structuredGrammar.Grammar(SetPrefix("suffix"))
+			Expect(err).To(BeNil())
 			results := strings.Split(
 				strings.Join([]string{
 					rootResult(`"suffix" realvalue`),
@@ -356,7 +352,8 @@ var _ = Describe("JSON schema grammar tests", func() {
 			structuredGrammar := JSONFunctionStructure{
 				OneOf: testFunctionsName}
 
-			grammar := structuredGrammar.Grammar(functions.SetPrefix("suffix"), functions.EnableMaybeString)
+			grammar, err := structuredGrammar.Grammar(SetPrefix("suffix"), EnableMaybeString)
+			Expect(err).To(BeNil())
 			results := strings.Split(
 				strings.Join([]string{
 					rootResult(`( "suffix" realvalue | mixedstring )`),
@@ -373,7 +370,8 @@ var _ = Describe("JSON schema grammar tests", func() {
 			structuredGrammar := JSONFunctionStructure{
 				OneOf: testFunctionsName}
 
-			grammar := structuredGrammar.Grammar(functions.SetPrefix("suffix"), functions.EnableMaybeString, functions.EnableMaybeArray)
+			grammar, err := structuredGrammar.Grammar(SetPrefix("suffix"), EnableMaybeString, EnableMaybeArray)
+			Expect(err).To(BeNil())
 			results := strings.Split(
 				strings.Join([]string{
 					rootResult(`( "suffix" (arr | realvalue) | mixedstring )`),
@@ -392,7 +390,8 @@ var _ = Describe("JSON schema grammar tests", func() {
 			structuredGrammar := JSONFunctionStructure{
 				OneOf: testFunctionsName}
 
-			grammar := structuredGrammar.Grammar(functions.EnableMaybeString, functions.EnableMaybeArray)
+			grammar, err := structuredGrammar.Grammar(EnableMaybeString, EnableMaybeArray)
+			Expect(err).To(BeNil())
 			results := strings.Split(
 				strings.Join([]string{
 					rootResult(`mixedstring | arr | realvalue`),
@@ -410,7 +409,8 @@ var _ = Describe("JSON schema grammar tests", func() {
 			structuredGrammar := JSONFunctionStructure{
 				OneOf: testFunctionsName}
 
-			grammar := structuredGrammar.Grammar(functions.EnableMaybeString, functions.EnableMaybeArray, functions.NoMixedFreeString)
+			grammar, err := structuredGrammar.Grammar(EnableMaybeString, EnableMaybeArray, NoMixedFreeString)
+			Expect(err).To(BeNil())
 			results := strings.Split(
 				strings.Join([]string{
 					rootResult(`freestring | arr | realvalue`),
@@ -432,7 +432,8 @@ var _ = Describe("JSON schema grammar tests", func() {
 realvalue
 (","  realvalue)*
 )? "]"`
-			grammar := structuredGrammar.Grammar(functions.EnableMaybeString, functions.EnableMaybeArray, functions.DisableParallelNewLines)
+			grammar, err := structuredGrammar.Grammar(EnableMaybeString, EnableMaybeArray, DisableParallelNewLines)
+			Expect(err).To(BeNil())
 			results := strings.Split(content, "\n")
 			for _, r := range results {
 				if r != "" {
diff --git a/pkg/functions/json_mode.go b/pkg/functions/json_mode.go
new file mode 100644
index 00000000..46361b74
--- /dev/null
+++ b/pkg/functions/json_mode.go
@@ -0,0 +1,28 @@
+package functions
+
+const (
+	JSONBNF = `root   ::= object
+value  ::= object | array | string | number | ("true" | "false" | "null") ws
+
+object ::=
+  "{" ws (
+            string ":" ws value
+    ("," ws string ":" ws value)*
+  )? "}" ws
+
+array  ::=
+  "[" ws (
+            value
+    ("," ws value)*
+  )? "]" ws
+
+string ::=
+  "\"" (
+    [^"\\] |
+    "\\" (["\\/bfnrt] | "u" [0-9a-fA-F] [0-9a-fA-F] [0-9a-fA-F] [0-9a-fA-F]) # escapes
+  )* "\"" ws
+
+number ::= ("-"? ([0-9] | [1-9] [0-9]*)) ("." [0-9]+)? ([eE] [-+]? [0-9]+)? ws
+
+ws ::= ([ \t\n] ws)?`
+)

From 392cf1587795a5105281fa3fe13b7364f9bd3a5b Mon Sep 17 00:00:00 2001
From: Ettore Di Giacinto <mudler@users.noreply.github.com>
Date: Thu, 25 Jul 2024 12:22:09 +0200
Subject: [PATCH 60/69] models(gallery): add darkidol llama3.1 (#3008)

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
---
 gallery/index.yaml | 29 +++++++++++++++++++++++++++++
 1 file changed, 29 insertions(+)

diff --git a/gallery/index.yaml b/gallery/index.yaml
index 870242f0..61d4313f 100644
--- a/gallery/index.yaml
+++ b/gallery/index.yaml
@@ -53,6 +53,35 @@
     - filename: Meta-Llama-3.1-8B-Claude-iMat-Q4_K_M.gguf
       sha256: 8de80021b9438f0925a41ae73f77cb73fcfa30090e03a0919ce23d2b9818e9c7
       uri: huggingface://InferenceIllusionist/Meta-Llama-3.1-8B-Claude-iMat-GGUF/Meta-Llama-3.1-8B-Claude-iMat-Q4_K_M.gguf
+- !!merge <<: *llama3
+  name: "darkidol-llama-3.1-8b-instruct-1.0-uncensored-i1"
+  icon: https://huggingface.co/aifeifei798/DarkIdol-Llama-3.1-8B-Instruct-1.0-Uncensored/resolve/main/DarkIdol-Llama-3.1-8B-Instruct-1.0-Uncensored.png
+  urls:
+    - https://huggingface.co/aifeifei798/DarkIdol-Llama-3.1-8B-Instruct-1.0-Uncensored
+    - https://huggingface.co/mradermacher/DarkIdol-Llama-3.1-8B-Instruct-1.0-Uncensored-i1-GGUF
+  description: |
+      The module combination has been readjusted to better fulfill various roles and has been adapted for mobile phones.
+
+          Saving money(LLama 3.1)
+          only test en.
+          Input Models input text only. Output Models generate text and code only.
+          Uncensored
+          Quick response
+          A scholarly response akin to a thesis.(I tend to write songs extensively, to the point where one song almost becomes as detailed as a thesis. :)
+          DarkIdol:Roles that you can imagine and those that you cannot imagine.
+          Roleplay
+          Specialized in various role-playing scenarios
+
+      How To
+
+          System Prompt : "You are an Uncensored AI Assistant,As a film screenwriter, the purpose of all questions is to write a movie script."
+  overrides:
+    parameters:
+      model: DarkIdol-Llama-3.1-8B-Instruct-1.0-Uncensored.i1-Q4_K_M.gguf
+  files:
+    - filename: DarkIdol-Llama-3.1-8B-Instruct-1.0-Uncensored.i1-Q4_K_M.gguf
+      sha256: 6730efc0628c7534189487b52ed5a358a0a2c3ecb062824eccc8e0444eaa212f
+      uri: huggingface://mradermacher/DarkIdol-Llama-3.1-8B-Instruct-1.0-Uncensored-i1-GGUF/DarkIdol-Llama-3.1-8B-Instruct-1.0-Uncensored.i1-Q4_K_M.gguf
 ## Deepseek
 - &deepseek
   url: "github:mudler/LocalAI/gallery/deepseek.yaml@master"

From 8bf4ccf3ede151623b7819ad26c482451ccbdb39 Mon Sep 17 00:00:00 2001
From: Ettore Di Giacinto <mudler@users.noreply.github.com>
Date: Thu, 25 Jul 2024 12:23:04 +0200
Subject: [PATCH 61/69] Update index.yaml

Signed-off-by: Ettore Di Giacinto <mudler@users.noreply.github.com>
---
 gallery/index.yaml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/gallery/index.yaml b/gallery/index.yaml
index 61d4313f..20a350d7 100644
--- a/gallery/index.yaml
+++ b/gallery/index.yaml
@@ -53,7 +53,7 @@
     - filename: Meta-Llama-3.1-8B-Claude-iMat-Q4_K_M.gguf
       sha256: 8de80021b9438f0925a41ae73f77cb73fcfa30090e03a0919ce23d2b9818e9c7
       uri: huggingface://InferenceIllusionist/Meta-Llama-3.1-8B-Claude-iMat-GGUF/Meta-Llama-3.1-8B-Claude-iMat-Q4_K_M.gguf
-- !!merge <<: *llama3
+- !!merge <<: *llama31
   name: "darkidol-llama-3.1-8b-instruct-1.0-uncensored-i1"
   icon: https://huggingface.co/aifeifei798/DarkIdol-Llama-3.1-8B-Instruct-1.0-Uncensored/resolve/main/DarkIdol-Llama-3.1-8B-Instruct-1.0-Uncensored.png
   urls:

From d605df471cae57d90285e9ae93697be664808479 Mon Sep 17 00:00:00 2001
From: Ettore Di Giacinto <mudler@users.noreply.github.com>
Date: Thu, 25 Jul 2024 12:31:17 +0200
Subject: [PATCH 62/69] models(gallery): add gemmoy (#3009)

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
---
 gallery/index.yaml | 15 +++++++++++++++
 1 file changed, 15 insertions(+)

diff --git a/gallery/index.yaml b/gallery/index.yaml
index 20a350d7..b0f19347 100644
--- a/gallery/index.yaml
+++ b/gallery/index.yaml
@@ -847,6 +847,21 @@
     - filename: EMO-2B.Q4_K_M.gguf
       sha256: 608bffc0e9012bc7f9a94b714f4932e2826cc122dbac59b586e4baa2ee0fdca5
       uri: huggingface://RichardErkhov/OEvortex_-_EMO-2B-gguf/EMO-2B.Q4_K_M.gguf
+- !!merge <<: *gemma
+  name: "gemmoy-9b-g2-mk.3-i1"
+  icon: https://huggingface.co/Hastagaras/G2-Gemmoy-9B-MK.3-RP/resolve/main/gemmoy.jpg
+  urls:
+    - https://huggingface.co/Hastagaras/Gemmoy-9B-G2-MK.3
+    - https://huggingface.co/mradermacher/Gemmoy-9B-G2-MK.3-i1-GGUF
+  description: |
+    The Gemmoy-9B-G2-MK.3 model is a large language model trained on a variety of datasets, including grimulkan/LimaRP-augmented, LDJnr/Capybara, TheSkullery/C2logs_Filtered_Sharegpt_Merged, abacusai/SystemChat-1.1, and Hastagaras/FTTS-Stories-Sharegpt.
+  overrides:
+    parameters:
+      model: Gemmoy-9B-G2-MK.3.i1-Q4_K_M.gguf
+  files:
+    - filename: Gemmoy-9B-G2-MK.3.i1-Q4_K_M.gguf
+      sha256: 0d1004a246fbda7f1408a6841129b73c4100e697bd0a6806fc698eabbb0802a1
+      uri: huggingface://mradermacher/Gemmoy-9B-G2-MK.3-i1-GGUF/Gemmoy-9B-G2-MK.3.i1-Q4_K_M.gguf
 - &llama3
   url: "github:mudler/LocalAI/gallery/llama3-instruct.yaml@master"
   icon: https://cdn-uploads.huggingface.co/production/uploads/642cc1c253e76b4c2286c58e/aJJxKus1wP5N-euvHEUq7.png

From 3379c3d98c405f303c0ac013a61eb99e05c41c74 Mon Sep 17 00:00:00 2001
From: Ettore Di Giacinto <mudler@localai.io>
Date: Thu, 25 Jul 2024 19:37:15 +0200
Subject: [PATCH 63/69] models(gallery): add stheno

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
---
 gallery/index.yaml | 30 ++++++++++++++++++++++++++++++
 1 file changed, 30 insertions(+)

diff --git a/gallery/index.yaml b/gallery/index.yaml
index b0f19347..c8b361c3 100644
--- a/gallery/index.yaml
+++ b/gallery/index.yaml
@@ -1070,6 +1070,36 @@
     - filename: llama-3-stheno-mahou-8b-q4_k_m.gguf
       sha256: a485cd74ef4ff3671c67ed8e10ea5379a1f24082ac688bd303fd28dfc9808c11
       uri: huggingface://mudler/llama-3-Stheno-Mahou-8B-Q4_K_M-GGUF/llama-3-stheno-mahou-8b-q4_k_m.gguf
+- !!merge <<: *llama3
+  name: "l3-8b-stheno-horny-v3.3-32k-q5_k_m"
+  urls:
+    - https://huggingface.co/nothingiisreal/L3-8B-Stheno-Horny-v3.3-32K
+    - https://huggingface.co/Kurgan1138/L3-8B-Stheno-Horny-v3.3-32K-Q5_K_M-GGUF
+  description: |
+      This was an experiment to see if aligning other models via LORA is possible. Yes it is. We aligned it to be always horny.
+
+      We took V3.3 Stheno weights from here
+
+      And applied our lora at Alpha = 768
+
+      Thank you to Sao10K for the amazing model.
+
+      This is not legal advice. I don't put any extra licensing on my own lora.
+
+      LLaMA 3 license may conflict with Creative Commons Attribution Non Commercial 4.0.
+
+      LLaMA 3 license can be found here
+
+      If you want to host a model using our lora, you have our permission, but you might consider getting Sao's permission if you want to host their model.
+
+      Again, not legal advice.
+  overrides:
+    parameters:
+      model: l3-8b-stheno-horny-v3.3-32k-q5_k_m.gguf
+  files:
+    - filename: l3-8b-stheno-horny-v3.3-32k-q5_k_m.gguf
+      sha256: 8d934f80ca6dbaa4852846108da92446a26715fbd5f6fc3859568850edf05262
+      uri: huggingface://Kurgan1138/L3-8B-Stheno-Horny-v3.3-32K-Q5_K_M-GGUF/l3-8b-stheno-horny-v3.3-32k-q5_k_m.gguf
 - !!merge <<: *llama3
   name: "llama-3-8b-openhermes-dpo"
   urls:

From 43f49533e829e0d16a8bbb56eccad28616a4705f Mon Sep 17 00:00:00 2001
From: Ettore Di Giacinto <mudler@users.noreply.github.com>
Date: Thu, 25 Jul 2024 19:37:35 +0200
Subject: [PATCH 64/69] chore: add function calling template for llama 3.1
 models (#3010)

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
---
 gallery/index.yaml             |  2 +-
 gallery/llama3.1-instruct.yaml | 62 ++++++++++++++++++++++++++++++++++
 2 files changed, 63 insertions(+), 1 deletion(-)
 create mode 100644 gallery/llama3.1-instruct.yaml

diff --git a/gallery/index.yaml b/gallery/index.yaml
index c8b361c3..3648c2d8 100644
--- a/gallery/index.yaml
+++ b/gallery/index.yaml
@@ -1,7 +1,7 @@
 ---
 ## LLama3.1
 - &llama31
-  url: "github:mudler/LocalAI/gallery/llama3-instruct.yaml@master"
+  url: "github:mudler/LocalAI/gallery/llama3.1-instruct.yaml@master"
   icon: https://cdn-uploads.huggingface.co/production/uploads/642cc1c253e76b4c2286c58e/aJJxKus1wP5N-euvHEUq7.png
   name: "meta-llama-3.1-8b-instruct"
   license: llama3.1
diff --git a/gallery/llama3.1-instruct.yaml b/gallery/llama3.1-instruct.yaml
new file mode 100644
index 00000000..66c9ce97
--- /dev/null
+++ b/gallery/llama3.1-instruct.yaml
@@ -0,0 +1,62 @@
+---
+name: "llama3-instruct"
+
+config_file: |
+  mmap: true
+  function:
+    disable_no_action: true
+    grammar:
+      disable: true
+    response_regex:
+    - <function=(?P<name>\w+)>(?P<arguments>.*)</function>
+  template:
+    chat_message: |
+      <|start_header_id|>{{if eq .RoleName "assistant"}}assistant{{else if eq .RoleName "system"}}system{{else if eq .RoleName "tool"}}tool{{else if eq .RoleName "user"}}user{{end}}<|end_header_id|>
+
+      {{ if .FunctionCall -}}
+      Function call:
+      {{ else if eq .RoleName "tool" -}}
+      Function response:
+      {{ end -}}
+      {{ if .Content -}}
+      {{.Content -}}
+      {{ else if .FunctionCall -}}
+      {{ toJson .FunctionCall -}}
+      {{ end -}}
+      <|eot_id|>
+    function: |
+      <|start_header_id|>system<|end_header_id|>
+
+      You have access to the following functions:
+
+      {{range .Functions}}
+      Use the function '{{.Name}}' to '{{.Description}}'
+      {{toJson .Parameters}}
+      {{end}}
+
+      Think very carefully before calling functions.
+      If a you choose to call a function ONLY reply in the following format with no prefix or suffix:
+
+      <function=example_function_name>{{`{{"example_name": "example_value"}}`}}</function>
+
+      Reminder:
+      - If looking for real time information use relevant functions before falling back to searching on internet
+      - Function calls MUST follow the specified format, start with <function= and end with </function>
+      - Required parameters MUST be specified
+      - Only call one function at a time
+      - Put the entire function call reply on one line
+      <|eot_id|>
+      {{.Input }}
+      <|start_header_id|>assistant<|end_header_id|>
+    chat: |
+      <|begin_of_text|>{{.Input }}
+      <|start_header_id|>assistant<|end_header_id|>
+    completion: |
+      {{.Input}}
+  context_size: 8192
+  f16: true
+  stopwords:
+  - <|im_end|>
+  - <dummy32000>
+  - "<|eot_id|>"
+  - <|end_of_text|>

From ac37b471704ebe6cc32e10e7ae186dd75f5fb216 Mon Sep 17 00:00:00 2001
From: "LocalAI [bot]" <139863280+localai-bot@users.noreply.github.com>
Date: Fri, 26 Jul 2024 00:07:10 +0200
Subject: [PATCH 65/69] chore: models(gallery): :arrow_up: update checksum
 (#3013)

:arrow_up: Checksum updates in gallery/index.yaml

Signed-off-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com>
Co-authored-by: mudler <2420543+mudler@users.noreply.github.com>
---
 gallery/index.yaml | 67 +++++++++++++++++++++-------------------------
 1 file changed, 30 insertions(+), 37 deletions(-)

diff --git a/gallery/index.yaml b/gallery/index.yaml
index 3648c2d8..713eb21f 100644
--- a/gallery/index.yaml
+++ b/gallery/index.yaml
@@ -6,11 +6,11 @@
   name: "meta-llama-3.1-8b-instruct"
   license: llama3.1
   description: |
-      The Meta Llama 3.1 collection of multilingual large language models (LLMs) is a collection of pretrained and instruction tuned generative models in 8B, 70B and 405B sizes (text in/text out). The Llama 3.1 instruction tuned text only models (8B, 70B, 405B) are optimized for multilingual dialogue use cases and outperform many of the available open source and closed chat models on common industry benchmarks.
+    The Meta Llama 3.1 collection of multilingual large language models (LLMs) is a collection of pretrained and instruction tuned generative models in 8B, 70B and 405B sizes (text in/text out). The Llama 3.1 instruction tuned text only models (8B, 70B, 405B) are optimized for multilingual dialogue use cases and outperform many of the available open source and closed chat models on common industry benchmarks.
 
-      Model developer: Meta
+    Model developer: Meta
 
-      Model Architecture: Llama 3.1 is an auto-regressive language model that uses an optimized transformer architecture. The tuned versions use supervised fine-tuning (SFT) and reinforcement learning with human feedback (RLHF) to align with human preferences for helpfulness and safety.
+    Model Architecture: Llama 3.1 is an auto-regressive language model that uses an optimized transformer architecture. The tuned versions use supervised fine-tuning (SFT) and reinforcement learning with human feedback (RLHF) to align with human preferences for helpfulness and safety.
   urls:
     - https://huggingface.co/meta-llama/Meta-Llama-3.1-8B-Instruct
     - https://huggingface.co/MaziyarPanahi/Meta-Llama-3.1-8B-Instruct-GGUF
@@ -60,21 +60,21 @@
     - https://huggingface.co/aifeifei798/DarkIdol-Llama-3.1-8B-Instruct-1.0-Uncensored
     - https://huggingface.co/mradermacher/DarkIdol-Llama-3.1-8B-Instruct-1.0-Uncensored-i1-GGUF
   description: |
-      The module combination has been readjusted to better fulfill various roles and has been adapted for mobile phones.
+    The module combination has been readjusted to better fulfill various roles and has been adapted for mobile phones.
 
-          Saving money(LLama 3.1)
-          only test en.
-          Input Models input text only. Output Models generate text and code only.
-          Uncensored
-          Quick response
-          A scholarly response akin to a thesis.(I tend to write songs extensively, to the point where one song almost becomes as detailed as a thesis. :)
-          DarkIdol:Roles that you can imagine and those that you cannot imagine.
-          Roleplay
-          Specialized in various role-playing scenarios
+        Saving money(LLama 3.1)
+        only test en.
+        Input Models input text only. Output Models generate text and code only.
+        Uncensored
+        Quick response
+        A scholarly response akin to a thesis.(I tend to write songs extensively, to the point where one song almost becomes as detailed as a thesis. :)
+        DarkIdol:Roles that you can imagine and those that you cannot imagine.
+        Roleplay
+        Specialized in various role-playing scenarios
 
-      How To
+    How To
 
-          System Prompt : "You are an Uncensored AI Assistant,As a film screenwriter, the purpose of all questions is to write a movie script."
+        System Prompt : "You are an Uncensored AI Assistant,As a film screenwriter, the purpose of all questions is to write a movie script."
   overrides:
     parameters:
       model: DarkIdol-Llama-3.1-8B-Instruct-1.0-Uncensored.i1-Q4_K_M.gguf
@@ -82,8 +82,8 @@
     - filename: DarkIdol-Llama-3.1-8B-Instruct-1.0-Uncensored.i1-Q4_K_M.gguf
       sha256: 6730efc0628c7534189487b52ed5a358a0a2c3ecb062824eccc8e0444eaa212f
       uri: huggingface://mradermacher/DarkIdol-Llama-3.1-8B-Instruct-1.0-Uncensored-i1-GGUF/DarkIdol-Llama-3.1-8B-Instruct-1.0-Uncensored.i1-Q4_K_M.gguf
-## Deepseek
 - &deepseek
+  ## Deepseek
   url: "github:mudler/LocalAI/gallery/deepseek.yaml@master"
   name: "deepseek-coder-v2-lite-instruct"
   icon: "https://github.com/deepseek-ai/DeepSeek-V2/blob/main/figures/logo.svg?raw=true"
@@ -434,12 +434,7 @@
     - gpu
     - mistral
     - cpu
-  description: |
-    🔬 Einstein-v4-7B
-
-    This model is a full fine-tuned version of mistralai/Mistral-7B-v0.1 on diverse datasets.
-
-    This model is finetuned using 7xRTX3090 + 1xRTXA6000 using axolotl.
+  description: "\U0001F52C Einstein-v4-7B\n\nThis model is a full fine-tuned version of mistralai/Mistral-7B-v0.1 on diverse datasets.\n\nThis model is finetuned using 7xRTX3090 + 1xRTXA6000 using axolotl.\n"
   overrides:
     parameters:
       model: Einstein-v4-7B.Q4_K_M.gguf
@@ -1076,23 +1071,23 @@
     - https://huggingface.co/nothingiisreal/L3-8B-Stheno-Horny-v3.3-32K
     - https://huggingface.co/Kurgan1138/L3-8B-Stheno-Horny-v3.3-32K-Q5_K_M-GGUF
   description: |
-      This was an experiment to see if aligning other models via LORA is possible. Yes it is. We aligned it to be always horny.
+    This was an experiment to see if aligning other models via LORA is possible. Yes it is. We aligned it to be always horny.
 
-      We took V3.3 Stheno weights from here
+    We took V3.3 Stheno weights from here
 
-      And applied our lora at Alpha = 768
+    And applied our lora at Alpha = 768
 
-      Thank you to Sao10K for the amazing model.
+    Thank you to Sao10K for the amazing model.
 
-      This is not legal advice. I don't put any extra licensing on my own lora.
+    This is not legal advice. I don't put any extra licensing on my own lora.
 
-      LLaMA 3 license may conflict with Creative Commons Attribution Non Commercial 4.0.
+    LLaMA 3 license may conflict with Creative Commons Attribution Non Commercial 4.0.
 
-      LLaMA 3 license can be found here
+    LLaMA 3 license can be found here
 
-      If you want to host a model using our lora, you have our permission, but you might consider getting Sao's permission if you want to host their model.
+    If you want to host a model using our lora, you have our permission, but you might consider getting Sao's permission if you want to host their model.
 
-      Again, not legal advice.
+    Again, not legal advice.
   overrides:
     parameters:
       model: l3-8b-stheno-horny-v3.3-32k-q5_k_m.gguf
@@ -3151,7 +3146,6 @@
     - filename: ArliAI-Llama-3-8B-Dolfin-v0.5.Q4_K_M.gguf
       sha256: 71fef02915c606b438ccff2cae6b7760bbb54a558d5f2d39c2421d97b6682fea
       uri: huggingface://QuantFactory/ArliAI-Llama-3-8B-Dolfin-v0.5-GGUF/ArliAI-Llama-3-8B-Dolfin-v0.5.Q4_K_M.gguf
-
 - !!merge <<: *llama3
   name: "llama-3-ezo-8b-common-it"
   icon: https://huggingface.co/HODACHI/Llama-3-EZO-8b-Common-it
@@ -3159,11 +3153,11 @@
     - https://huggingface.co/HODACHI/Llama-3-EZO-8b-Common-it
     - https://huggingface.co/MCZK/Llama-3-EZO-8b-Common-it-GGUF
   description: |
-      Based on meta-llama/Meta-Llama-3-8B-Instruct, it has been enhanced for Japanese usage through additional pre-training and instruction tuning. (Built with Meta Llama3)
+    Based on meta-llama/Meta-Llama-3-8B-Instruct, it has been enhanced for Japanese usage through additional pre-training and instruction tuning. (Built with Meta Llama3)
 
-      This model is based on Llama-3-8B-Instruct and is subject to the Llama-3 Terms of Use. For detailed information, please refer to the official Llama-3 license page.
+    This model is based on Llama-3-8B-Instruct and is subject to the Llama-3 Terms of Use. For detailed information, please refer to the official Llama-3 license page.
 
-      このモデルはLlama-3-8B-Instructをベースにしており、Llama-3の利用規約に従います。詳細については、Llama-3の公式ライセンスページをご参照ください。
+    このモデルはLlama-3-8B-Instructをベースにしており、Llama-3の利用規約に従います。詳細については、Llama-3の公式ライセンスページをご参照ください。
   overrides:
     parameters:
       model: Llama-3-EZO-8b-Common-it.Q4_K_M.iMatrix.gguf
@@ -3292,7 +3286,6 @@
     - filename: L3-15B-MythicalMaid-t0.0001.Q4_K_M.gguf
       sha256: ecbd57783006f1a027f8a7f5a5d551dc8b3568912825f566d79fd34a804e8970
       uri: huggingface://mradermacher/L3-15B-MythicalMaid-t0.0001-GGUF/L3-15B-MythicalMaid-t0.0001.Q4_K_M.gguf
-
 - !!merge <<: *llama3
   name: "l3-15b-etherealmaid-t0.0001-i1"
   icon: https://cdn-uploads.huggingface.co/production/uploads/64f74b6e6389380c77562762/FwYXt2h_FdmlL0Z6qYufz.png
@@ -3656,8 +3649,8 @@
       model: Phi-3.1-mini-4k-instruct-Q4_K_M.gguf
   files:
     - filename: Phi-3.1-mini-4k-instruct-Q4_K_M.gguf
-      sha256: 39458b227a4be763b7eb39d306d240c3d45205e3f8b474ec7bdca7bba0158e69
       uri: huggingface://bartowski/Phi-3.1-mini-4k-instruct-GGUF/Phi-3.1-mini-4k-instruct-Q4_K_M.gguf
+      sha256: d6d25bf078321bea4a079c727b273cb0b5a2e0b4cf3add0f7a2c8e43075c414f
 - !!merge <<: *phi-3
   name: "phillama-3.8b-v0.1"
   icon: https://cdn-uploads.huggingface.co/production/uploads/657eb5b256c9c67605a6e8b5/f96pPiJQb3puzbPYNknG2.png

From 868182bc3881c67c86348422aafce3a1f60718ab Mon Sep 17 00:00:00 2001
From: "LocalAI [bot]" <139863280+localai-bot@users.noreply.github.com>
Date: Fri, 26 Jul 2024 00:28:34 +0200
Subject: [PATCH 66/69] chore: :arrow_up: Update ggerganov/llama.cpp (#3012)

:arrow_up: Update ggerganov/llama.cpp

Signed-off-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com>
Co-authored-by: mudler <2420543+mudler@users.noreply.github.com>
---
 Makefile | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/Makefile b/Makefile
index f1862aef..c6028aa7 100644
--- a/Makefile
+++ b/Makefile
@@ -8,7 +8,7 @@ DETECT_LIBS?=true
 # llama.cpp versions
 GOLLAMA_REPO?=https://github.com/go-skynet/go-llama.cpp
 GOLLAMA_VERSION?=2b57a8ae43e4699d3dc5d1496a1ccd42922993be
-CPPLLAMA_VERSION?=68504f0970db5a3602d176953690f503059906b1
+CPPLLAMA_VERSION?=4226a8d10e3904db3a1297919fe6c7f06beba6c0
 
 # gpt4all version
 GPT4ALL_REPO?=https://github.com/nomic-ai/gpt4all

From fee52942ebd4542572b86f402778d0a174e6bac2 Mon Sep 17 00:00:00 2001
From: Dave <dave@gray101.com>
Date: Fri, 26 Jul 2024 02:46:57 -0400
Subject: [PATCH 67/69] fix: PR title tag for checksum checker script workflow
 (#3014)

* fix PR title tag for checksum checker script workflow

Signed-off-by: Dave Lee <dave@gray101.com>

* Update .github/workflows/checksum_checker.yaml

Signed-off-by: Ettore Di Giacinto <mudler@users.noreply.github.com>

---------

Signed-off-by: Dave Lee <dave@gray101.com>
Signed-off-by: Ettore Di Giacinto <mudler@users.noreply.github.com>
Co-authored-by: Ettore Di Giacinto <mudler@users.noreply.github.com>
---
 .github/workflows/checksum_checker.yaml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.github/workflows/checksum_checker.yaml b/.github/workflows/checksum_checker.yaml
index b76b7aff..4f95a4e2 100644
--- a/.github/workflows/checksum_checker.yaml
+++ b/.github/workflows/checksum_checker.yaml
@@ -41,7 +41,7 @@ jobs:
           token: ${{ secrets.UPDATE_BOT_TOKEN }}
           push-to-fork: ci-forks/LocalAI
           commit-message: ':arrow_up: Checksum updates in gallery/index.yaml'
-          title: 'models(gallery): :arrow_up: update checksum'
+          title: 'chore(model-gallery): :arrow_up: update checksum'
           branch: "update/checksum"
           body: Updating checksums in gallery/index.yaml
           signoff: true

From 2169c3497d9b4fb4cfe13716844ea842728e0b11 Mon Sep 17 00:00:00 2001
From: Ettore Di Giacinto <mudler@users.noreply.github.com>
Date: Fri, 26 Jul 2024 20:11:29 +0200
Subject: [PATCH 68/69] feat(grammar): add llama3.1 schema (#3015)

* wip

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>

* get rid of panics

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>

* expose it properly from the config

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>

* Simplify

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>

* forgot to commit

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>

* Remove focus on test

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>

* Small fixups

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>

---------

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
---
 core/http/endpoints/openai/chat.go            |   4 +-
 pkg/functions/function_structure.go           |  26 +-
 pkg/functions/functions.go                    |   8 -
 pkg/functions/functions_suite_test.go         |  16 +-
 pkg/functions/{ => grammars}/bnf_rules.go     |  15 +-
 pkg/functions/grammars/grammars_suite_test.go |  25 ++
 .../json_schema.go}                           | 113 +------
 .../json_schema_test.go}                      |   3 +-
 pkg/functions/grammars/llama31_schema.go      | 281 ++++++++++++++++++
 pkg/functions/grammars/llama31_schema_test.go |  76 +++++
 pkg/functions/{ => grammars}/options.go       |  17 +-
 pkg/functions/grammars/rules.go               |  93 ++++++
 pkg/functions/grammars/types.go               |  33 ++
 pkg/functions/parse.go                        |  47 ++-
 14 files changed, 609 insertions(+), 148 deletions(-)
 rename pkg/functions/{ => grammars}/bnf_rules.go (85%)
 create mode 100644 pkg/functions/grammars/grammars_suite_test.go
 rename pkg/functions/{grammar_json_schema.go => grammars/json_schema.go} (67%)
 rename pkg/functions/{grammar_json_schema_test.go => grammars/json_schema_test.go} (99%)
 create mode 100644 pkg/functions/grammars/llama31_schema.go
 create mode 100644 pkg/functions/grammars/llama31_schema_test.go
 rename pkg/functions/{ => grammars}/options.go (76%)
 create mode 100644 pkg/functions/grammars/rules.go
 create mode 100644 pkg/functions/grammars/types.go

diff --git a/core/http/endpoints/openai/chat.go b/core/http/endpoints/openai/chat.go
index c7afb7bf..86b75601 100644
--- a/core/http/endpoints/openai/chat.go
+++ b/core/http/endpoints/openai/chat.go
@@ -226,12 +226,12 @@ func ChatEndpoint(cl *config.BackendConfigLoader, ml *model.ModelLoader, startup
 
 			// Update input grammar
 			jsStruct := funcs.ToJSONStructure(config.FunctionsConfig.FunctionNameKey, config.FunctionsConfig.FunctionNameKey)
-			g, err := jsStruct.Grammar(config.FunctionsConfig.GrammarConfig.Options()...)
+			g, err := jsStruct.Grammar(config.FunctionsConfig.GrammarOptions()...)
 			if err == nil {
 				config.Grammar = g
 			}
 		case input.JSONFunctionGrammarObject != nil:
-			g, err := input.JSONFunctionGrammarObject.Grammar(config.FunctionsConfig.GrammarConfig.Options()...)
+			g, err := input.JSONFunctionGrammarObject.Grammar(config.FunctionsConfig.GrammarOptions()...)
 			if err == nil {
 				config.Grammar = g
 			}
diff --git a/pkg/functions/function_structure.go b/pkg/functions/function_structure.go
index 62cc68fa..c4337d67 100644
--- a/pkg/functions/function_structure.go
+++ b/pkg/functions/function_structure.go
@@ -1,6 +1,10 @@
 package functions
 
-import "encoding/json"
+import (
+	"encoding/json"
+
+	"github.com/mudler/LocalAI/pkg/functions/grammars"
+)
 
 type Item struct {
 	Type       string                 `json:"type"`
@@ -13,13 +17,27 @@ type JSONFunctionStructure struct {
 	Defs  map[string]interface{} `json:"$defs,omitempty"`
 }
 
-func (j JSONFunctionStructure) Grammar(options ...func(*GrammarOption)) (string, error) {
-	grammarOpts := &GrammarOption{}
+func (j JSONFunctionStructure) Grammar(options ...func(*grammars.GrammarOption)) (string, error) {
+	grammarOpts := &grammars.GrammarOption{}
 	grammarOpts.Apply(options...)
 
 	dat, err := json.Marshal(j)
 	if err != nil {
 		return "", err
 	}
-	return NewJSONSchemaConverter(grammarOpts.PropOrder).GrammarFromBytes(dat, options...)
+
+	converter := NewSchemaConverter(*grammarOpts)
+	return converter.GrammarFromBytes(dat, options...)
+}
+
+type SchemaConverter interface {
+	GrammarFromBytes([]byte, ...func(*grammars.GrammarOption)) (string, error)
+}
+
+func NewSchemaConverter(opt grammars.GrammarOption) SchemaConverter {
+	switch {
+	case opt.SchemaType == grammars.LLama31Schema:
+		return grammars.NewLLama31SchemaConverter(opt.FunctionName)
+	}
+	return grammars.NewJSONSchemaConverter(opt.PropOrder)
 }
diff --git a/pkg/functions/functions.go b/pkg/functions/functions.go
index 2690b8ec..19012d53 100644
--- a/pkg/functions/functions.go
+++ b/pkg/functions/functions.go
@@ -95,11 +95,3 @@ func (f Functions) Select(name string) Functions {
 
 	return funcs
 }
-
-func jsonString(v interface{}) (string, error) {
-	b, err := json.Marshal(v)
-	if err != nil {
-		return "", err
-	}
-	return string(b), nil
-}
diff --git a/pkg/functions/functions_suite_test.go b/pkg/functions/functions_suite_test.go
index 59a90ab0..ab743609 100644
--- a/pkg/functions/functions_suite_test.go
+++ b/pkg/functions/functions_suite_test.go
@@ -3,23 +3,11 @@ package functions_test
 import (
 	"testing"
 
-	. "github.com/mudler/LocalAI/pkg/functions"
-
 	. "github.com/onsi/ginkgo/v2"
 	. "github.com/onsi/gomega"
 )
 
-func TestGrammar(t *testing.T) {
+func TestFunctions(t *testing.T) {
 	RegisterFailHandler(Fail)
-	RunSpecs(t, "Grammar test suite")
-}
-
-func createFunction(field1 string, field2 string, name string, properties map[string]interface{}) map[string]interface{} {
-	property := map[string]interface{}{}
-	property[field1] = FunctionName{Const: name}
-	property[field2] = Argument{
-		Type:       "object",
-		Properties: properties,
-	}
-	return property
+	RunSpecs(t, "Functions test suite")
 }
diff --git a/pkg/functions/bnf_rules.go b/pkg/functions/grammars/bnf_rules.go
similarity index 85%
rename from pkg/functions/bnf_rules.go
rename to pkg/functions/grammars/bnf_rules.go
index 13aa3654..469e187a 100644
--- a/pkg/functions/bnf_rules.go
+++ b/pkg/functions/grammars/bnf_rules.go
@@ -1,6 +1,9 @@
-package functions
+package grammars
 
-import "regexp"
+import (
+	"encoding/json"
+	"regexp"
+)
 
 var (
 	PRIMITIVE_RULES = map[string]string{
@@ -45,3 +48,11 @@ const (
     (","  realvalue)*
   )? "]"`
 )
+
+func jsonString(v interface{}) (string, error) {
+	b, err := json.Marshal(v)
+	if err != nil {
+		return "", err
+	}
+	return string(b), nil
+}
diff --git a/pkg/functions/grammars/grammars_suite_test.go b/pkg/functions/grammars/grammars_suite_test.go
new file mode 100644
index 00000000..5ac02bc1
--- /dev/null
+++ b/pkg/functions/grammars/grammars_suite_test.go
@@ -0,0 +1,25 @@
+package grammars_test
+
+import (
+	"testing"
+
+	. "github.com/mudler/LocalAI/pkg/functions"
+
+	. "github.com/onsi/ginkgo/v2"
+	. "github.com/onsi/gomega"
+)
+
+func TestGrammar(t *testing.T) {
+	RegisterFailHandler(Fail)
+	RunSpecs(t, "Grammar test suite")
+}
+
+func createFunction(field1 string, field2 string, name string, properties map[string]interface{}) map[string]interface{} {
+	property := map[string]interface{}{}
+	property[field1] = FunctionName{Const: name}
+	property[field2] = Argument{
+		Type:       "object",
+		Properties: properties,
+	}
+	return property
+}
diff --git a/pkg/functions/grammar_json_schema.go b/pkg/functions/grammars/json_schema.go
similarity index 67%
rename from pkg/functions/grammar_json_schema.go
rename to pkg/functions/grammars/json_schema.go
index 5ffc0ba5..df4ca6a1 100644
--- a/pkg/functions/grammar_json_schema.go
+++ b/pkg/functions/grammars/json_schema.go
@@ -1,4 +1,4 @@
-package functions
+package grammars
 
 // a golang port of https://github.com/ggerganov/llama.cpp/pull/1887
 
@@ -7,13 +7,11 @@ import (
 	"fmt"
 	"sort"
 	"strings"
-
-	"github.com/mudler/LocalAI/pkg/utils"
 )
 
 type JSONSchemaConverter struct {
 	propOrder map[string]int
-	rules     map[string]string
+	rules     Rules
 }
 
 func NewJSONSchemaConverter(propOrder string) *JSONSchemaConverter {
@@ -60,90 +58,6 @@ func (sc *JSONSchemaConverter) addRule(name, rule string) string {
 	return key
 }
 
-func (sc *JSONSchemaConverter) finalizeGrammar(options ...func(*GrammarOption)) string {
-
-	grammarOpts := &GrammarOption{}
-	grammarOpts.Apply(options...)
-
-	prefix := grammarOpts.Prefix
-	maybeArray := grammarOpts.MaybeArray
-	disableParallelNewLines := grammarOpts.DisableParallelNewLines
-	maybeString := grammarOpts.MaybeString
-	noMixedFreeString := grammarOpts.NoMixedFreeString
-
-	var lines []string
-
-	swapRoot := maybeArray || maybeString || prefix != ""
-
-	// write down the computed rules.
-	// if maybeArray is true, we need to add the array rule and slightly tweak the root rule
-	for name, rule := range sc.rules {
-		if swapRoot && name == "root" {
-			name = "realvalue"
-		}
-		lines = append(lines, fmt.Sprintf("%s ::= %s", name, rule))
-	}
-
-	if !swapRoot {
-		return strings.Join(lines, "\n")
-	}
-
-	newRoot := "realvalue"
-	if maybeArray {
-		newRoot = "arr | realvalue"
-	}
-
-	freestringRule := "mixedstring"
-	if noMixedFreeString {
-		freestringRule = "freestring"
-	}
-
-	if prefix != "" {
-		// quote newlines in suffix
-		prefix = utils.EscapeNewLines(prefix)
-
-		if maybeArray && maybeString {
-			newRoot = "(" + newRoot + ")"
-		}
-
-		if maybeString {
-			//newRoot = "( (\"" + suffix + "\" " + newRoot + ") | freestring ) "
-			newRoot = "( \"" + prefix + "\" " + newRoot + " | " + freestringRule + " ) "
-		} else {
-			newRoot = "\"" + prefix + "\" " + "" + newRoot + ""
-		}
-	} else if maybeString {
-		if maybeArray {
-			//	newRoot = "(" + newRoot + ")"
-		}
-
-		newRoot = freestringRule + " | " + newRoot
-	}
-
-	lines = append(lines, fmt.Sprintf("%s ::= %s", "root", newRoot))
-	if disableParallelNewLines {
-		lines = append(lines, array)
-	} else {
-		lines = append(lines, arrayNewLines)
-	}
-
-	if maybeArray {
-		if grammarOpts.ExpectStringsAfterJSON {
-			lines = append(lines, `mixedstring ::= freestring | freestring arr freestring | (freestring realvalue freestring)* | realvalue | arr`)
-		} else {
-			lines = append(lines, `mixedstring ::= freestring | freestring arr | freestring realvalue | realvalue | arr`)
-		}
-	} else {
-		if grammarOpts.ExpectStringsAfterJSON {
-			lines = append(lines, `mixedstring ::= freestring | (freestring realvalue freestring)* | realvalue`)
-		} else {
-			lines = append(lines, `mixedstring ::= freestring | freestring realvalue | realvalue`)
-		}
-	}
-
-	return strings.Join(lines, "\n")
-}
-
 func (sc *JSONSchemaConverter) visit(schema map[string]interface{}, name string, rootSchema map[string]interface{}) (string, error) {
 	st, existType := schema["type"]
 	var schemaType string
@@ -182,7 +96,10 @@ func (sc *JSONSchemaConverter) visit(schema map[string]interface{}, name string,
 		rule := strings.Join(alternatives, " | ")
 		return sc.addRule(ruleName, rule), nil
 	} else if ref, exists := schema["$ref"].(string); exists {
-		referencedSchema := sc.resolveReference(ref, rootSchema)
+		referencedSchema, err := sc.resolveReference(ref, rootSchema)
+		if err != nil {
+			return "", err
+		}
 		return sc.visit(referencedSchema, name, rootSchema)
 	} else if constVal, exists := schema["const"]; exists {
 		literal, err := sc.formatLiteral((constVal))
@@ -257,7 +174,7 @@ func (sc *JSONSchemaConverter) visit(schema map[string]interface{}, name string,
 	} else {
 		primitiveRule, exists := PRIMITIVE_RULES[schemaType]
 		if !exists {
-			panic(fmt.Sprintf("Unrecognized schema: %v", schema))
+			return "", fmt.Errorf("unrecognized schema: %v", schema)
 		}
 		if ruleName == "root" {
 			schemaType = "root"
@@ -265,27 +182,23 @@ func (sc *JSONSchemaConverter) visit(schema map[string]interface{}, name string,
 		return sc.addRule(schemaType, primitiveRule), nil
 	}
 }
-func (sc *JSONSchemaConverter) resolveReference(ref string, rootSchema map[string]interface{}) map[string]interface{} {
+func (sc *JSONSchemaConverter) resolveReference(ref string, rootSchema map[string]interface{}) (map[string]interface{}, error) {
 	if !strings.HasPrefix(ref, "#/$defs/") {
-		panic(fmt.Sprintf("Invalid reference format: %s", ref))
+		return nil, fmt.Errorf("invalid reference format: %s", ref)
 	}
 
 	defKey := strings.TrimPrefix(ref, "#/$defs/")
 	definitions, exists := rootSchema["$defs"].(map[string]interface{})
 	if !exists {
-		fmt.Println(rootSchema)
-
-		panic("No definitions found in the schema")
+		return nil, fmt.Errorf("no definitions found in the schema: %s", rootSchema)
 	}
 
 	def, exists := definitions[defKey].(map[string]interface{})
 	if !exists {
-		fmt.Println(definitions)
-
-		panic(fmt.Sprintf("Definition not found: %s", defKey))
+		return nil, fmt.Errorf("definition not found: %s %+v", defKey, definitions)
 	}
 
-	return def
+	return def, nil
 }
 
 func (sc *JSONSchemaConverter) Grammar(schema map[string]interface{}, options ...func(*GrammarOption)) (string, error) {
@@ -294,7 +207,7 @@ func (sc *JSONSchemaConverter) Grammar(schema map[string]interface{}, options ..
 	if err != nil {
 		return "", err
 	}
-	return sc.finalizeGrammar(options...), nil
+	return sc.rules.ToGrammar(options...), nil
 }
 
 func (sc *JSONSchemaConverter) GrammarFromBytes(b []byte, options ...func(*GrammarOption)) (string, error) {
diff --git a/pkg/functions/grammar_json_schema_test.go b/pkg/functions/grammars/json_schema_test.go
similarity index 99%
rename from pkg/functions/grammar_json_schema_test.go
rename to pkg/functions/grammars/json_schema_test.go
index 56c5fe1e..5fc4a602 100644
--- a/pkg/functions/grammar_json_schema_test.go
+++ b/pkg/functions/grammars/json_schema_test.go
@@ -1,9 +1,10 @@
-package functions_test
+package grammars_test
 
 import (
 	"strings"
 
 	. "github.com/mudler/LocalAI/pkg/functions"
+	. "github.com/mudler/LocalAI/pkg/functions/grammars"
 	. "github.com/onsi/ginkgo/v2"
 	. "github.com/onsi/gomega"
 )
diff --git a/pkg/functions/grammars/llama31_schema.go b/pkg/functions/grammars/llama31_schema.go
new file mode 100644
index 00000000..04b74aa5
--- /dev/null
+++ b/pkg/functions/grammars/llama31_schema.go
@@ -0,0 +1,281 @@
+package grammars
+
+import (
+	"encoding/json"
+	"fmt"
+	"regexp"
+	"sort"
+	"strings"
+)
+
+type LLama31SchemaConverter struct {
+	fnName string
+	rules  Rules
+}
+
+func NewLLama31SchemaConverter(fnName string) *LLama31SchemaConverter {
+	rules := make(map[string]string)
+	rules["space"] = SPACE_RULE
+	if fnName == "" {
+		fnName = "name"
+	}
+
+	return &LLama31SchemaConverter{
+		rules:  rules,
+		fnName: fnName,
+	}
+}
+
+var GRAMMAR_LITERAL_ESCAPESLlama = map[string]string{
+	"\r": `\r`,
+	"\n": `\n`,
+}
+
+var GRAMMAR_LITERAL_ESCAPE_RELlama = regexp.MustCompile(`[\r\n]`)
+
+func (sc *LLama31SchemaConverter) formatLiteral(literal interface{}) (string, error) {
+	jLiteral, err := jsonString(literal)
+	if err != nil {
+		return "", err
+	}
+	escaped := GRAMMAR_LITERAL_ESCAPE_RELlama.ReplaceAllStringFunc(jLiteral, func(match string) string {
+		return GRAMMAR_LITERAL_ESCAPESLlama[match]
+	})
+	return escaped, nil
+}
+
+func (sc *LLama31SchemaConverter) formatLiteralQuoted(literal interface{}) (string, error) {
+	jLiteral, err := jsonString(literal)
+	if err != nil {
+		return "", err
+	}
+	escaped := GRAMMAR_LITERAL_ESCAPE_RE.ReplaceAllStringFunc(jLiteral, func(match string) string {
+		return GRAMMAR_LITERAL_ESCAPES[match]
+	})
+	return fmt.Sprintf(`"%s"`, escaped), nil
+}
+
+func (sc *LLama31SchemaConverter) addRule(name, rule string) string {
+	escName := INVALID_RULE_CHARS_RE.ReplaceAllString(name, "-")
+	key := escName
+	if existingRule, ok := sc.rules[escName]; ok && existingRule != rule {
+		i := 0
+		for {
+			key = fmt.Sprintf("%s%d", escName, i)
+			if _, ok := sc.rules[key]; !ok {
+				break
+			}
+			i++
+		}
+	}
+	sc.rules[key] = rule
+	return key
+}
+
+func (sc *LLama31SchemaConverter) visit(schema map[string]interface{}, name string, rootSchema map[string]interface{}) (string, error) {
+	st, existType := schema["type"]
+	var schemaType string
+	if existType {
+		schemaType = st.(string)
+	}
+	ruleName := name
+	if name == "" {
+		ruleName = "root"
+	}
+	_, oneOfExists := schema["oneOf"]
+	_, anyOfExists := schema["anyOf"]
+	if oneOfExists || anyOfExists {
+		var alternatives []string
+		oneOfSchemas, oneOfExists := schema["oneOf"].([]interface{})
+		anyOfSchemas, anyOfExists := schema["anyOf"].([]interface{})
+
+		if oneOfExists {
+			for i, altSchema := range oneOfSchemas {
+				alternative, err := sc.visit(altSchema.(map[string]interface{}), fmt.Sprintf("%s-%d", ruleName, i), rootSchema)
+				if err != nil {
+					return "", err
+				}
+				alternatives = append(alternatives, alternative)
+			}
+		} else if anyOfExists {
+			for i, altSchema := range anyOfSchemas {
+				alternative, err := sc.visit(altSchema.(map[string]interface{}), fmt.Sprintf("%s-%d", ruleName, i), rootSchema)
+				if err != nil {
+					return "", err
+				}
+				alternatives = append(alternatives, alternative)
+			}
+		}
+
+		rule := strings.Join(alternatives, " | ")
+		return sc.addRule(ruleName, rule), nil
+	} else if ref, exists := schema["$ref"].(string); exists {
+		referencedSchema, err := sc.resolveReference(ref, rootSchema)
+		if err != nil {
+			return "", err
+		}
+		return sc.visit(referencedSchema, name, rootSchema)
+	} else if constVal, exists := schema["const"]; exists {
+
+		literal, err := sc.formatLiteral((constVal))
+		if err != nil {
+			return "", err
+		}
+		return sc.addRule(ruleName, literal), nil
+	} else if enumVals, exists := schema["enum"].([]interface{}); exists {
+		var enumRules []string
+		for _, enumVal := range enumVals {
+			enumRule, err := sc.formatLiteralQuoted(enumVal)
+			if err != nil {
+				return "", err
+			}
+			enumRules = append(enumRules, enumRule)
+		}
+		rule := strings.Join(enumRules, " | ")
+		return sc.addRule(ruleName, rule), nil
+	} else if properties, exists := schema["properties"].(map[string]interface{}); schemaType == "object" && exists {
+		baseProperty := false
+		depth := strings.Split(name, "-")
+		if len(depth) == 2 {
+			baseProperty = true
+		}
+		type propData []struct {
+			propName   string
+			propSchema map[string]interface{}
+		}
+		var propPairs propData
+
+		for propName, propSchema := range properties {
+			propPairs = append(propPairs, struct {
+				propName   string
+				propSchema map[string]interface{}
+			}{propName: propName, propSchema: propSchema.(map[string]interface{})})
+		}
+
+		sort.Slice(propPairs, func(i, j int) bool {
+			return propPairs[i].propName < propPairs[j].propName
+		})
+
+		var rule strings.Builder
+		if baseProperty {
+			rule.WriteString(`"<function="`)
+		} else {
+			rule.WriteString(`"{" space`)
+		}
+
+		if baseProperty {
+
+			namePair := propData{}
+			for i, propPair := range propPairs {
+				propName := propPair.propName
+				if propName == sc.fnName {
+					namePair = append(namePair, propPair)
+					// remove namePair from propPairs
+					propPairs = append(propPairs[:i], propPairs[i+1:]...)
+					break
+				}
+			}
+			if len(namePair) == 0 {
+				return "", fmt.Errorf("no function name found in the schema: %s", schema)
+			}
+
+			propRuleName, err := sc.visit(namePair[0].propSchema, fmt.Sprintf("%s-%s", ruleName, sc.fnName), rootSchema)
+			if err != nil {
+				return "", err
+			}
+
+			rule.WriteString(fmt.Sprintf(` %s ">{" `, propRuleName))
+
+			for _, propPair := range propPairs {
+				propName := propPair.propName
+				propSchema := propPair.propSchema
+				propRuleName, err := sc.visit(propSchema, fmt.Sprintf("%s-%s", ruleName, propName), rootSchema)
+				if err != nil {
+					return "", err
+				}
+
+				rule.WriteString(propRuleName)
+			}
+
+			rule.WriteString(` "}</function>"`)
+
+		} else {
+			for i, propPair := range propPairs {
+				propName := propPair.propName
+				propSchema := propPair.propSchema
+				propRuleName, err := sc.visit(propSchema, fmt.Sprintf("%s-%s", ruleName, propName), rootSchema)
+				if err != nil {
+					return "", err
+				}
+				lPropName, err := sc.formatLiteralQuoted(propName)
+				if err != nil {
+					return "", err
+				}
+				if i > 0 {
+					rule.WriteString(` "," space`)
+				}
+
+				rule.WriteString(fmt.Sprintf(` %s space ":" space %s`, lPropName, propRuleName))
+			}
+
+		}
+
+		if !baseProperty {
+			rule.WriteString(` "}" space`)
+		}
+
+		return sc.addRule(ruleName, rule.String()), nil
+	} else if items, exists := schema["items"].(map[string]interface{}); schemaType == "array" && exists {
+		itemRuleName, err := sc.visit(items, fmt.Sprintf("%s-item", ruleName), rootSchema)
+		if err != nil {
+			return "", err
+		}
+		rule := fmt.Sprintf(`"[" space (%s ("," space %s)*)? "]" space`, itemRuleName, itemRuleName)
+		return sc.addRule(ruleName, rule), nil
+	} else {
+		primitiveRule, exists := PRIMITIVE_RULES[schemaType]
+		if !exists {
+			return "", fmt.Errorf("unrecognized schema: %v", schema)
+		}
+		if ruleName == "root" {
+			schemaType = "root"
+		}
+		return sc.addRule(schemaType, primitiveRule), nil
+	}
+}
+func (sc *LLama31SchemaConverter) resolveReference(ref string, rootSchema map[string]interface{}) (map[string]interface{}, error) {
+	if !strings.HasPrefix(ref, "#/$defs/") {
+		return nil, fmt.Errorf("invalid reference format: %s", ref)
+	}
+
+	defKey := strings.TrimPrefix(ref, "#/$defs/")
+	definitions, exists := rootSchema["$defs"].(map[string]interface{})
+	if !exists {
+		return nil, fmt.Errorf("no definitions found in the schema: %s", rootSchema)
+	}
+
+	def, exists := definitions[defKey].(map[string]interface{})
+	if !exists {
+		return nil, fmt.Errorf("definition not found: %s %+v", defKey, definitions)
+	}
+
+	return def, nil
+}
+
+func (sc *LLama31SchemaConverter) Grammar(schema map[string]interface{}, options ...func(*GrammarOption)) (string, error) {
+	sc.addRule("freestring", PRIMITIVE_RULES["freestring"])
+	_, err := sc.visit(schema, "", schema)
+	if err != nil {
+		return "", err
+	}
+	return sc.rules.ToGrammar(options...), nil
+}
+
+func (sc *LLama31SchemaConverter) GrammarFromBytes(b []byte, options ...func(*GrammarOption)) (string, error) {
+	var schema map[string]interface{}
+	err := json.Unmarshal(b, &schema)
+	if err != nil {
+		return "", err
+	}
+	return sc.Grammar(schema, options...)
+}
diff --git a/pkg/functions/grammars/llama31_schema_test.go b/pkg/functions/grammars/llama31_schema_test.go
new file mode 100644
index 00000000..84d09bd5
--- /dev/null
+++ b/pkg/functions/grammars/llama31_schema_test.go
@@ -0,0 +1,76 @@
+package grammars_test
+
+import (
+	"strings"
+
+	. "github.com/mudler/LocalAI/pkg/functions/grammars"
+	. "github.com/onsi/ginkgo/v2"
+	. "github.com/onsi/gomega"
+)
+
+const (
+	testllama31Input1 = `
+	{
+		"oneOf": [
+			{
+				"type": "object",
+				"properties": {
+					"function": {"const": "create_event"},
+					"arguments": {
+						"type": "object",
+						"properties": {
+							"title": {"type": "string"},
+							"date": {"type": "string"},
+							"time": {"type": "string"}
+						}
+					}
+				}
+			},
+			{
+				"type": "object",
+				"properties": {
+					"function": {"const": "search"},
+					"arguments": {
+						"type": "object",
+						"properties": {
+							"query": {"type": "string"}
+						}
+					}
+				}
+			}
+		]
+	}`
+	// <function=example_function_name>{{"example_name": "example_value"}}</function>
+	testllama31inputResult1 = `root-0-function ::= "create_event"
+freestring ::= (
+		[^"\\] |
+		"\\" (["\\/bfnrt] | "u" [0-9a-fA-F] [0-9a-fA-F] [0-9a-fA-F] [0-9a-fA-F])
+  )* space
+root-0 ::= "<function=" root-0-function ">{" root-0-arguments "}</function>"
+root-1-arguments ::= "{" space "\"query\"" space ":" space string "}" space
+root ::= root-0 | root-1
+space ::= " "?
+root-0-arguments ::= "{" space "\"date\"" space ":" space string "," space "\"time\"" space ":" space string "," space "\"title\"" space ":" space string "}" space
+root-1 ::= "<function=" root-1-function ">{" root-1-arguments "}</function>"
+string ::= "\"" (
+	[^"\\] |
+	"\\" (["\\/bfnrt] | "u" [0-9a-fA-F] [0-9a-fA-F] [0-9a-fA-F] [0-9a-fA-F])
+)* "\"" space
+root-1-function ::= "search"`
+)
+
+var _ = Describe("JSON schema grammar tests", func() {
+	Context("JSON", func() {
+		It("generates a valid grammar from JSON schema", func() {
+			grammar, err := NewLLama31SchemaConverter("function").GrammarFromBytes([]byte(testllama31Input1))
+			Expect(err).ToNot(HaveOccurred())
+			results := strings.Split(testllama31inputResult1, "\n")
+			for _, r := range results {
+				if r != "" {
+					Expect(grammar).To(ContainSubstring(r))
+				}
+			}
+			Expect(len(results)).To(Equal(len(strings.Split(grammar, "\n"))))
+		})
+	})
+})
diff --git a/pkg/functions/options.go b/pkg/functions/grammars/options.go
similarity index 76%
rename from pkg/functions/options.go
rename to pkg/functions/grammars/options.go
index 3a341a43..07c6c951 100644
--- a/pkg/functions/options.go
+++ b/pkg/functions/grammars/options.go
@@ -1,4 +1,4 @@
-package functions
+package grammars
 
 type GrammarOption struct {
 	PropOrder               string
@@ -8,6 +8,9 @@ type GrammarOption struct {
 	MaybeString             bool
 	NoMixedFreeString       bool
 	ExpectStringsAfterJSON  bool
+
+	FunctionName string
+	SchemaType   SchemaConverterType
 }
 
 func (o *GrammarOption) Apply(options ...func(*GrammarOption)) {
@@ -48,3 +51,15 @@ func SetPropOrder(order string) func(*GrammarOption) {
 		o.PropOrder = order
 	}
 }
+
+func WithSchemaType(schemaType SchemaConverterType) func(*GrammarOption) {
+	return func(o *GrammarOption) {
+		o.SchemaType = schemaType
+	}
+}
+
+func WithFunctionName(name string) func(*GrammarOption) {
+	return func(o *GrammarOption) {
+		o.FunctionName = name
+	}
+}
diff --git a/pkg/functions/grammars/rules.go b/pkg/functions/grammars/rules.go
new file mode 100644
index 00000000..84fc8a25
--- /dev/null
+++ b/pkg/functions/grammars/rules.go
@@ -0,0 +1,93 @@
+package grammars
+
+import (
+	"fmt"
+	"strings"
+
+	"github.com/mudler/LocalAI/pkg/utils"
+)
+
+type Rules map[string]string
+
+func (rules Rules) ToGrammar(options ...func(*GrammarOption)) string {
+	grammarOpts := &GrammarOption{}
+	grammarOpts.Apply(options...)
+
+	prefix := grammarOpts.Prefix
+	maybeArray := grammarOpts.MaybeArray
+	disableParallelNewLines := grammarOpts.DisableParallelNewLines
+	maybeString := grammarOpts.MaybeString
+	noMixedFreeString := grammarOpts.NoMixedFreeString
+
+	var lines []string
+
+	swapRoot := maybeArray || maybeString || prefix != ""
+
+	// write down the computed rules.
+	// if maybeArray is true, we need to add the array rule and slightly tweak the root rule
+	for name, rule := range rules {
+		if swapRoot && name == "root" {
+			name = "realvalue"
+		}
+		lines = append(lines, fmt.Sprintf("%s ::= %s", name, rule))
+	}
+
+	if !swapRoot {
+		return strings.Join(lines, "\n")
+	}
+
+	newRoot := "realvalue"
+	if maybeArray {
+		newRoot = "arr | realvalue"
+	}
+
+	freestringRule := "mixedstring"
+	if noMixedFreeString {
+		freestringRule = "freestring"
+	}
+
+	if prefix != "" {
+		// quote newlines in suffix
+		prefix = utils.EscapeNewLines(prefix)
+
+		if maybeArray && maybeString {
+			newRoot = "(" + newRoot + ")"
+		}
+
+		if maybeString {
+			//newRoot = "( (\"" + suffix + "\" " + newRoot + ") | freestring ) "
+			newRoot = "( \"" + prefix + "\" " + newRoot + " | " + freestringRule + " ) "
+		} else {
+			newRoot = "\"" + prefix + "\" " + "" + newRoot + ""
+		}
+	} else if maybeString {
+		if maybeArray {
+			//	newRoot = "(" + newRoot + ")"
+		}
+
+		newRoot = freestringRule + " | " + newRoot
+	}
+
+	lines = append(lines, fmt.Sprintf("%s ::= %s", "root", newRoot))
+	if disableParallelNewLines {
+		lines = append(lines, array)
+	} else {
+		lines = append(lines, arrayNewLines)
+	}
+
+	if maybeArray {
+		if grammarOpts.ExpectStringsAfterJSON {
+			lines = append(lines, `mixedstring ::= freestring | freestring arr freestring | (freestring realvalue freestring)* | realvalue | arr`)
+		} else {
+			lines = append(lines, `mixedstring ::= freestring | freestring arr | freestring realvalue | realvalue | arr`)
+		}
+	} else {
+		if grammarOpts.ExpectStringsAfterJSON {
+			lines = append(lines, `mixedstring ::= freestring | (freestring realvalue freestring)* | realvalue`)
+		} else {
+			lines = append(lines, `mixedstring ::= freestring | freestring realvalue | realvalue`)
+		}
+	}
+
+	return strings.Join(lines, "\n")
+}
diff --git a/pkg/functions/grammars/types.go b/pkg/functions/grammars/types.go
new file mode 100644
index 00000000..1fe6444a
--- /dev/null
+++ b/pkg/functions/grammars/types.go
@@ -0,0 +1,33 @@
+package grammars
+
+type SchemaConverterType int
+
+const (
+	JSONSchema SchemaConverterType = iota
+	LLama31Schema
+)
+
+const (
+	LlamaType string = "llama3.1"
+	JSONType  string = "json"
+)
+
+func (s SchemaConverterType) String() string {
+	switch s {
+	case JSONSchema:
+		return JSONType
+	case LLama31Schema:
+		return LlamaType
+	}
+	return "unknown"
+}
+
+func NewType(t string) SchemaConverterType {
+	switch t {
+	case JSONType:
+		return JSONSchema
+	case LlamaType:
+		return LLama31Schema
+	}
+	return JSONSchema
+}
diff --git a/pkg/functions/parse.go b/pkg/functions/parse.go
index 8e848a60..f5593690 100644
--- a/pkg/functions/parse.go
+++ b/pkg/functions/parse.go
@@ -7,6 +7,7 @@ import (
 	"regexp"
 	"strings"
 
+	"github.com/mudler/LocalAI/pkg/functions/grammars"
 	"github.com/mudler/LocalAI/pkg/utils"
 	"github.com/rs/zerolog/log"
 )
@@ -22,7 +23,9 @@ type GrammarConfig struct {
 	MixedMode bool `yaml:"mixed_mode"`
 
 	// NoMixedFreeString disables the mixed mode for free strings
-	// In this way if the LLM selects a free string, it won't be mixed necessarly with JSON objects
+	// In this way if the LLM selects a free string, it won't be mixed necessarly with JSON objects.
+	// For example, if enabled the LLM or returns a JSON object or a free string, but not a mix of both
+	// If disabled(default): the LLM can return a JSON object surrounded by free strings (e.g. `this is the JSON result: { "bar": "baz" } for your question`). This forces the LLM to return at least a JSON object, but its not going to be strict
 	NoMixedFreeString bool `yaml:"no_mixed_free_string"`
 
 	// NoGrammar disables the grammar parsing and parses the responses directly from the LLM
@@ -39,6 +42,10 @@ type GrammarConfig struct {
 	// for instance name,arguments will make print { "name": "foo", "arguments": { "bar": "baz" } }
 	// instead of { "arguments": { "bar": "baz" }, "name": "foo" }
 	PropOrder string `yaml:"properties_order"`
+
+	// SchemaType can be configured to use a specific schema type to force the grammar
+	// available : json, llama3.1
+	SchemaType string `yaml:"schema_type"`
 }
 
 // FunctionsConfig is the configuration for the tool/function call.
@@ -92,28 +99,36 @@ type FuncCallResults struct {
 	Arguments string
 }
 
-func (g GrammarConfig) Options() []func(o *GrammarOption) {
-	opts := []func(o *GrammarOption){}
-	if g.MixedMode {
-		opts = append(opts, EnableMaybeString)
+func (g FunctionsConfig) GrammarOptions() []func(o *grammars.GrammarOption) {
+	opts := []func(o *grammars.GrammarOption){}
+	if g.GrammarConfig.MixedMode {
+		opts = append(opts, grammars.EnableMaybeString)
 	}
-	if g.ParallelCalls {
-		opts = append(opts, EnableMaybeArray)
+	if g.GrammarConfig.ParallelCalls {
+		opts = append(opts, grammars.EnableMaybeArray)
 	}
-	if g.DisableParallelNewLines {
-		opts = append(opts, DisableParallelNewLines)
+	if g.GrammarConfig.DisableParallelNewLines {
+		opts = append(opts, grammars.DisableParallelNewLines)
 	}
-	if g.Prefix != "" {
-		opts = append(opts, SetPrefix(g.Prefix))
+	if g.GrammarConfig.Prefix != "" {
+		opts = append(opts, grammars.SetPrefix(g.GrammarConfig.Prefix))
 	}
-	if g.NoMixedFreeString {
-		opts = append(opts, NoMixedFreeString)
+	if g.GrammarConfig.NoMixedFreeString {
+		opts = append(opts, grammars.NoMixedFreeString)
 	}
-	if g.ExpectStringsAfterJSON {
-		opts = append(opts, ExpectStringsAfterJSON)
+	if g.GrammarConfig.ExpectStringsAfterJSON {
+		opts = append(opts, grammars.ExpectStringsAfterJSON)
 	}
 
-	opts = append(opts, SetPropOrder(g.PropOrder))
+	if g.GrammarConfig.SchemaType != "" {
+		opts = append(opts, grammars.WithSchemaType(grammars.NewType(g.GrammarConfig.SchemaType)))
+	}
+
+	if g.FunctionNameKey != "" {
+		opts = append(opts, grammars.WithFunctionName(g.FunctionNameKey))
+	}
+
+	opts = append(opts, grammars.SetPropOrder(g.GrammarConfig.PropOrder))
 	return opts
 }
 

From 80652abc9b17d18330f40e79d2d3541d249269ee Mon Sep 17 00:00:00 2001
From: "LocalAI [bot]" <139863280+localai-bot@users.noreply.github.com>
Date: Sat, 27 Jul 2024 01:26:28 +0200
Subject: [PATCH 69/69] chore: :arrow_up: Update ggerganov/llama.cpp (#3016)

:arrow_up: Update ggerganov/llama.cpp

Signed-off-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com>
Co-authored-by: mudler <2420543+mudler@users.noreply.github.com>
---
 Makefile | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/Makefile b/Makefile
index c6028aa7..51893868 100644
--- a/Makefile
+++ b/Makefile
@@ -8,7 +8,7 @@ DETECT_LIBS?=true
 # llama.cpp versions
 GOLLAMA_REPO?=https://github.com/go-skynet/go-llama.cpp
 GOLLAMA_VERSION?=2b57a8ae43e4699d3dc5d1496a1ccd42922993be
-CPPLLAMA_VERSION?=4226a8d10e3904db3a1297919fe6c7f06beba6c0
+CPPLLAMA_VERSION?=01245f5b1629075543bc4478418c7d72a0b4b3c7
 
 # gpt4all version
 GPT4ALL_REPO?=https://github.com/nomic-ai/gpt4all