From 02d4eeffc840f9517c6ce479777e6716b2027640 Mon Sep 17 00:00:00 2001
From: Ettore Di Giacinto <mudler@users.noreply.github.com>
Date: Sat, 27 Jul 2024 10:24:42 +0200
Subject: [PATCH 001/235] models(gallery): add mistral-nemo (#3019)

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
---
 gallery/index.yaml | 15 +++++++++++++++
 1 file changed, 15 insertions(+)

diff --git a/gallery/index.yaml b/gallery/index.yaml
index 713eb21f..bedc05f9 100644
--- a/gallery/index.yaml
+++ b/gallery/index.yaml
@@ -442,6 +442,21 @@
     - filename: Einstein-v4-7B.Q4_K_M.gguf
       sha256: 78bd573de2a9eb3c6e213132858164e821145f374fcaa4b19dfd6502c05d990d
       uri: huggingface://mradermacher/Einstein-v4-7B-GGUF/Einstein-v4-7B.Q4_K_M.gguf
+- !!merge <<: *mistral03
+  name: "mistral-nemo-instruct-2407"
+  urls:
+    - https://huggingface.co/mistralai/Mistral-Nemo-Instruct-2407
+    - https://huggingface.co/bartowski/Mistral-Nemo-Instruct-2407-GGUF
+    - https://mistral.ai/news/mistral-nemo/
+  description: |
+    The Mistral-Nemo-Instruct-2407 Large Language Model (LLM) is an instruct fine-tuned version of the Mistral-Nemo-Base-2407. Trained jointly by Mistral AI and NVIDIA, it significantly outperforms existing models smaller or similar in size.
+  overrides:
+    parameters:
+      model: Mistral-Nemo-Instruct-2407-Q4_K_M.gguf
+  files:
+    - filename: Mistral-Nemo-Instruct-2407-Q4_K_M.gguf
+      sha256: 1a8b92fb546a80dce78151e4908f7bdb2c11fb3ef52af960e4bbe319a9cc5052
+      uri: huggingface://bartowski/Mistral-Nemo-Instruct-2407-GGUF/Mistral-Nemo-Instruct-2407-Q4_K_M.gguf
 - &mudler
   ### START mudler's LocalAI specific-models
   url: "github:mudler/LocalAI/gallery/mudler.yaml@master"

From fe4c8c825170ddbffd4e88e4c6a53a7dc8c058ae Mon Sep 17 00:00:00 2001
From: Ettore Di Giacinto <mudler@users.noreply.github.com>
Date: Sat, 27 Jul 2024 10:24:56 +0200
Subject: [PATCH 002/235] models(gallery): add llama3.1-8b-fireplace2 (#3018)

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
---
 gallery/index.yaml | 31 +++++++++++++++++++++++++++++++
 1 file changed, 31 insertions(+)

diff --git a/gallery/index.yaml b/gallery/index.yaml
index bedc05f9..c9d1e2b2 100644
--- a/gallery/index.yaml
+++ b/gallery/index.yaml
@@ -82,6 +82,37 @@
     - filename: DarkIdol-Llama-3.1-8B-Instruct-1.0-Uncensored.i1-Q4_K_M.gguf
       sha256: 6730efc0628c7534189487b52ed5a358a0a2c3ecb062824eccc8e0444eaa212f
       uri: huggingface://mradermacher/DarkIdol-Llama-3.1-8B-Instruct-1.0-Uncensored-i1-GGUF/DarkIdol-Llama-3.1-8B-Instruct-1.0-Uncensored.i1-Q4_K_M.gguf
+- !!merge <<: *llama31
+  name: "llama3.1-8b-fireplace2"
+  icon: https://cdn-uploads.huggingface.co/production/uploads/64f267a8a4f79a118e0fcc89/JYkaXrk2DqpXhaL9WymKY.jpeg
+  urls:
+    - https://huggingface.co/ValiantLabs/Llama3.1-8B-Fireplace2
+    - https://huggingface.co/mudler/Llama3.1-8B-Fireplace2-Q4_K_M-GGUF
+  description: |
+    Fireplace 2 is a chat model, adding helpful structured outputs to Llama 3.1 8b Instruct.
+
+        an expansion pack of supplementary outputs - request them at will within your chat:
+            Inline function calls
+            SQL queries
+            JSON objects
+            Data visualization with matplotlib
+        Mix normal chat and structured outputs within the same conversation.
+        Fireplace 2 supplements the existing strengths of Llama 3.1, providing inline capabilities within the Llama 3 Instruct format.
+
+    Version
+
+    This is the 2024-07-23 release of Fireplace 2 for Llama 3.1 8b.
+
+    We're excited to bring further upgrades and releases to Fireplace 2 in the future.
+
+    Help us and recommend Fireplace 2 to your friends!
+  overrides:
+    parameters:
+      model: llama3.1-8b-fireplace2-q4_k_m.gguf
+  files:
+    - filename: llama3.1-8b-fireplace2-q4_k_m.gguf
+      sha256: 54527fd2474b576086ea31e759214ab240abe2429ae623a02d7ba825cc8cb13e
+      uri: huggingface://mudler/Llama3.1-8B-Fireplace2-Q4_K_M-GGUF/llama3.1-8b-fireplace2-q4_k_m.gguf
 - &deepseek
   ## Deepseek
   url: "github:mudler/LocalAI/gallery/deepseek.yaml@master"

From 81c4b722582959295b3149e5df0ad564a3104901 Mon Sep 17 00:00:00 2001
From: Ettore Di Giacinto <mudler@users.noreply.github.com>
Date: Sat, 27 Jul 2024 10:28:47 +0200
Subject: [PATCH 003/235] models(gallery): add lumimaid-v0.2-12b (#3020)

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
---
 gallery/index.yaml | 25 +++++++++++++++++++++++++
 1 file changed, 25 insertions(+)

diff --git a/gallery/index.yaml b/gallery/index.yaml
index c9d1e2b2..7526befa 100644
--- a/gallery/index.yaml
+++ b/gallery/index.yaml
@@ -488,6 +488,31 @@
     - filename: Mistral-Nemo-Instruct-2407-Q4_K_M.gguf
       sha256: 1a8b92fb546a80dce78151e4908f7bdb2c11fb3ef52af960e4bbe319a9cc5052
       uri: huggingface://bartowski/Mistral-Nemo-Instruct-2407-GGUF/Mistral-Nemo-Instruct-2407-Q4_K_M.gguf
+- !!merge <<: *mistral03
+  name: "lumimaid-v0.2-12b"
+  icon: https://cdn-uploads.huggingface.co/production/uploads/63ab1241ad514ca8d1430003/ep3ojmuMkFS-GmgRuI9iB.png
+  urls:
+    - https://huggingface.co/NeverSleep/Lumimaid-v0.2-12B
+    - https://huggingface.co/mudler/Lumimaid-v0.2-12B-Q4_K_M-GGUF
+  description: |
+    This model is based on: Mistral-Nemo-Instruct-2407
+
+    Wandb: https://wandb.ai/undis95/Lumi-Mistral-Nemo?nw=nwuserundis95
+
+    NOTE: As explained on Mistral-Nemo-Instruct-2407 repo, it's recommended to use a low temperature, please experiment!
+
+    Lumimaid 0.1 -> 0.2 is a HUGE step up dataset wise.
+
+    As some people have told us our models are sloppy, Ikari decided to say fuck it and literally nuke all chats out with most slop.
+
+    Our dataset stayed the same since day one, we added data over time, cleaned them, and repeat. After not releasing model for a while because we were never satisfied, we think it's time to come back!
+  overrides:
+    parameters:
+      model: lumimaid-v0.2-12b-q4_k_m.gguf
+  files:
+    - filename: lumimaid-v0.2-12b-q4_k_m.gguf
+      sha256: f72299858a07e52be920b86d42ddcfcd5008b961d601ef6fd6a98a3377adccbf
+      uri: huggingface://mudler/Lumimaid-v0.2-12B-Q4_K_M-GGUF/lumimaid-v0.2-12b-q4_k_m.gguf
 - &mudler
   ### START mudler's LocalAI specific-models
   url: "github:mudler/LocalAI/gallery/mudler.yaml@master"

From 7ef8edda32b1b5136ec48766ef0b5431177cf493 Mon Sep 17 00:00:00 2001
From: Ettore Di Giacinto <mudler@users.noreply.github.com>
Date: Sat, 27 Jul 2024 10:59:06 +0200
Subject: [PATCH 004/235] =?UTF-8?q?models(gallery):=20add=20darkidol-llama?=
 =?UTF-8?q?-3.1-8b-instruct-1.1-uncensored-iq=E2=80=A6=20(#3021)?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

models(gallery): add darkidol-llama-3.1-8b-instruct-1.1-uncensored-iq-imatrix-request

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
---
 gallery/index.yaml | 23 +++++++++++++++++++++++
 1 file changed, 23 insertions(+)

diff --git a/gallery/index.yaml b/gallery/index.yaml
index 7526befa..22855a2b 100644
--- a/gallery/index.yaml
+++ b/gallery/index.yaml
@@ -113,6 +113,29 @@
     - filename: llama3.1-8b-fireplace2-q4_k_m.gguf
       sha256: 54527fd2474b576086ea31e759214ab240abe2429ae623a02d7ba825cc8cb13e
       uri: huggingface://mudler/Llama3.1-8B-Fireplace2-Q4_K_M-GGUF/llama3.1-8b-fireplace2-q4_k_m.gguf
+- !!merge <<: *llama31
+  name: "darkidol-llama-3.1-8b-instruct-1.1-uncensored-iq-imatrix-request"
+  icon: https://cdn-uploads.huggingface.co/production/uploads/65d4cf2693a0a3744a27536c/iDV5GTVJbjkvMp1set-ZC.png
+  urls:
+    - https://huggingface.co/LWDCLS/DarkIdol-Llama-3.1-8B-Instruct-1.1-Uncensored-GGUF-IQ-Imatrix-Request
+  description: |
+    Uncensored
+    virtual idol Twitter
+
+        https://x.com/aifeifei799
+
+    Questions
+
+        The model's response results are for reference only, please do not fully trust them.
+        This model is solely for learning and testing purposes, and errors in output are inevitable. We do not take responsibility for the output results. If the output content is to be used, it must be modified; if not modified, we will assume it has been altered.
+        For commercial licensing, please refer to the Llama 3.1 agreement.
+  overrides:
+    parameters:
+      model: DarkIdol-Llama-3.1-8B-Instruct-1.1-Uncensored-Q4_K_M-imat.gguf
+  files:
+    - filename: DarkIdol-Llama-3.1-8B-Instruct-1.1-Uncensored-Q4_K_M-imat.gguf
+      sha256: fa9fc56de7d902b755c43f1a5d0867d961675174a1b3e73a10d822836c3390e6
+      uri: huggingface://LWDCLS/DarkIdol-Llama-3.1-8B-Instruct-1.1-Uncensored-GGUF-IQ-Imatrix-Request/DarkIdol-Llama-3.1-8B-Instruct-1.1-Uncensored-Q4_K_M-imat.gguf
 - &deepseek
   ## Deepseek
   url: "github:mudler/LocalAI/gallery/deepseek.yaml@master"

From d5a6c1e4f62619df77ebd9afa02fa9699ecaca47 Mon Sep 17 00:00:00 2001
From: Ettore Di Giacinto <mudler@users.noreply.github.com>
Date: Sat, 27 Jul 2024 11:00:21 +0200
Subject: [PATCH 005/235] models(gallery): add
 meta-llama-3.1-8b-instruct-abliterated (#3022)

* models(gallery): add meta-llama-3.1-8b-instruct-abliterated

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>

* Update gallery/index.yaml

Signed-off-by: Ettore Di Giacinto <mudler@users.noreply.github.com>

---------

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
Signed-off-by: Ettore Di Giacinto <mudler@users.noreply.github.com>
---
 gallery/index.yaml | 15 +++++++++++++++
 1 file changed, 15 insertions(+)

diff --git a/gallery/index.yaml b/gallery/index.yaml
index 22855a2b..edd9f5d8 100644
--- a/gallery/index.yaml
+++ b/gallery/index.yaml
@@ -113,6 +113,21 @@
     - filename: llama3.1-8b-fireplace2-q4_k_m.gguf
       sha256: 54527fd2474b576086ea31e759214ab240abe2429ae623a02d7ba825cc8cb13e
       uri: huggingface://mudler/Llama3.1-8B-Fireplace2-Q4_K_M-GGUF/llama3.1-8b-fireplace2-q4_k_m.gguf
+- !!merge <<: *llama31
+  name: "meta-llama-3.1-8b-instruct-abliterated"
+  icon: https://i.imgur.com/KhorYYG.png
+  urls:
+    - https://huggingface.co/mlabonne/Meta-Llama-3.1-8B-Instruct-abliterated
+    - https://huggingface.co/mlabonne/Meta-Llama-3.1-8B-Instruct-abliterated-GGUF
+  description: |
+    This is an uncensored version of Llama 3.1 8B Instruct created with abliteration.
+  overrides:
+    parameters:
+      model: meta-llama-3.1-8b-instruct-abliterated.Q4_K_M.gguf
+  files:
+    - filename: meta-llama-3.1-8b-instruct-abliterated.Q4_K_M.gguf
+      sha256: 18cca47adfb3954af2b49e3aa2ce1604158337aff45fab2e7654039b65c7683e
+      uri: huggingface://mlabonne/Meta-Llama-3.1-8B-Instruct-abliterated-GGUF/meta-llama-3.1-8b-instruct-abliterated.Q4_K_M.gguf
 - !!merge <<: *llama31
   name: "darkidol-llama-3.1-8b-instruct-1.1-uncensored-iq-imatrix-request"
   icon: https://cdn-uploads.huggingface.co/production/uploads/65d4cf2693a0a3744a27536c/iDV5GTVJbjkvMp1set-ZC.png

From d59bcd539ed8def49957747aab338c9ea7c7aa86 Mon Sep 17 00:00:00 2001
From: Ettore Di Giacinto <mudler@users.noreply.github.com>
Date: Sat, 27 Jul 2024 12:18:55 +0200
Subject: [PATCH 006/235] models(gallery): add
 llama-3.1-70b-japanese-instruct-2407 (#3023)

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
---
 gallery/index.yaml | 14 ++++++++++++++
 1 file changed, 14 insertions(+)

diff --git a/gallery/index.yaml b/gallery/index.yaml
index edd9f5d8..6a76241a 100644
--- a/gallery/index.yaml
+++ b/gallery/index.yaml
@@ -128,6 +128,20 @@
     - filename: meta-llama-3.1-8b-instruct-abliterated.Q4_K_M.gguf
       sha256: 18cca47adfb3954af2b49e3aa2ce1604158337aff45fab2e7654039b65c7683e
       uri: huggingface://mlabonne/Meta-Llama-3.1-8B-Instruct-abliterated-GGUF/meta-llama-3.1-8b-instruct-abliterated.Q4_K_M.gguf
+- !!merge <<: *llama31
+  name: "llama-3.1-70b-japanese-instruct-2407"
+  urls:
+    - https://huggingface.co/cyberagent/Llama-3.1-70B-Japanese-Instruct-2407
+    - https://huggingface.co/mmnga/Llama-3.1-70B-Japanese-Instruct-2407-gguf
+  description: |
+    The Llama-3.1-70B-Japanese-Instruct-2407-gguf model is a Japanese language model that uses the Instruct prompt tuning method. It is based on the LLaMa-3.1-70B model and has been fine-tuned on the imatrix dataset for Japanese. The model is trained to generate informative and coherent responses to given instructions or prompts. It is available in the gguf format and can be used for a variety of tasks such as question answering, text generation, and more.
+  overrides:
+    parameters:
+      model: Llama-3.1-70B-Japanese-Instruct-2407-Q4_K_M.gguf
+  files:
+    - filename: Llama-3.1-70B-Japanese-Instruct-2407-Q4_K_M.gguf
+      sha256: f2a6f0fb5040d3a28479c9f9fc555a5ea7b906dfb9964539f1a68c0676a9c604
+      uri: huggingface://mmnga/Llama-3.1-70B-Japanese-Instruct-2407-gguf/Llama-3.1-70B-Japanese-Instruct-2407-Q4_K_M.gguf
 - !!merge <<: *llama31
   name: "darkidol-llama-3.1-8b-instruct-1.1-uncensored-iq-imatrix-request"
   icon: https://cdn-uploads.huggingface.co/production/uploads/65d4cf2693a0a3744a27536c/iDV5GTVJbjkvMp1set-ZC.png

From 7aa7f13095db2f1cad09ce9fa79edde47615dae5 Mon Sep 17 00:00:00 2001
From: Ettore Di Giacinto <mudler@users.noreply.github.com>
Date: Sat, 27 Jul 2024 12:22:30 +0200
Subject: [PATCH 007/235] models(gallery): add
 llama-3.1-8b-instruct-fei-v1-uncensored (#3024)

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
---
 gallery/index.yaml | 16 ++++++++++++++++
 1 file changed, 16 insertions(+)

diff --git a/gallery/index.yaml b/gallery/index.yaml
index 6a76241a..bac51af7 100644
--- a/gallery/index.yaml
+++ b/gallery/index.yaml
@@ -165,6 +165,22 @@
     - filename: DarkIdol-Llama-3.1-8B-Instruct-1.1-Uncensored-Q4_K_M-imat.gguf
       sha256: fa9fc56de7d902b755c43f1a5d0867d961675174a1b3e73a10d822836c3390e6
       uri: huggingface://LWDCLS/DarkIdol-Llama-3.1-8B-Instruct-1.1-Uncensored-GGUF-IQ-Imatrix-Request/DarkIdol-Llama-3.1-8B-Instruct-1.1-Uncensored-Q4_K_M-imat.gguf
+- !!merge <<: *llama31
+  name: "llama-3.1-8b-instruct-fei-v1-uncensored"
+  icon: https://huggingface.co/aifeifei799/Llama-3.1-8B-Instruct-Fei-v1-Uncensored/resolve/main/Llama-3.1-8B-Instruct-Fei-v1-Uncensored.png
+  urls:
+    - https://huggingface.co/aifeifei799/Llama-3.1-8B-Instruct-Fei-v1-Uncensored
+    - https://huggingface.co/mradermacher/Llama-3.1-8B-Instruct-Fei-v1-Uncensored-GGUF
+  description: |
+      Llama-3.1-8B-Instruct Uncensored
+      more informtion look at Llama-3.1-8B-Instruct
+  overrides:
+    parameters:
+      model: Llama-3.1-8B-Instruct-Fei-v1-Uncensored.Q4_K_M.gguf
+  files:
+    - filename: Llama-3.1-8B-Instruct-Fei-v1-Uncensored.Q4_K_M.gguf
+      sha256: 12fef8ff0a5c4cf6988523d33d89287edb7531f0d1644707548f45f1387e398a
+      uri: huggingface://mradermacher/Llama-3.1-8B-Instruct-Fei-v1-Uncensored-GGUF/Llama-3.1-8B-Instruct-Fei-v1-Uncensored.Q4_K_M.gguf
 - &deepseek
   ## Deepseek
   url: "github:mudler/LocalAI/gallery/deepseek.yaml@master"

From 7021c02d45385fc39ab828b7066919eccf88aec9 Mon Sep 17 00:00:00 2001
From: Ettore Di Giacinto <mudler@users.noreply.github.com>
Date: Sat, 27 Jul 2024 12:24:45 +0200
Subject: [PATCH 008/235] models(gallery): add openbuddy-llama3.1-8b-v22.1-131k
 (#3025)

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
---
 gallery/index.yaml | 14 ++++++++++++++
 1 file changed, 14 insertions(+)

diff --git a/gallery/index.yaml b/gallery/index.yaml
index bac51af7..5b99bd72 100644
--- a/gallery/index.yaml
+++ b/gallery/index.yaml
@@ -181,6 +181,20 @@
     - filename: Llama-3.1-8B-Instruct-Fei-v1-Uncensored.Q4_K_M.gguf
       sha256: 12fef8ff0a5c4cf6988523d33d89287edb7531f0d1644707548f45f1387e398a
       uri: huggingface://mradermacher/Llama-3.1-8B-Instruct-Fei-v1-Uncensored-GGUF/Llama-3.1-8B-Instruct-Fei-v1-Uncensored.Q4_K_M.gguf
+- !!merge <<: *llama31
+  name: "openbuddy-llama3.1-8b-v22.1-131k"
+  icon: https://raw.githubusercontent.com/OpenBuddy/OpenBuddy/main/media/demo.png
+  urls:
+    - https://huggingface.co/sunnyyy/openbuddy-llama3.1-8b-v22.1-131k-Q4_K_M-GGUF
+  description: |
+    OpenBuddy - Open Multilingual Chatbot
+  overrides:
+    parameters:
+      model: openbuddy-llama3.1-8b-v22.1-131k-q4_k_m.gguf
+  files:
+    - filename: openbuddy-llama3.1-8b-v22.1-131k-q4_k_m.gguf
+      sha256: c87a273785759f2d044046b7a7b42f05706baed7dc0650ed883a3bee2a097d86
+      uri: huggingface://sunnyyy/openbuddy-llama3.1-8b-v22.1-131k-Q4_K_M-GGUF/openbuddy-llama3.1-8b-v22.1-131k-q4_k_m.gguf
 - &deepseek
   ## Deepseek
   url: "github:mudler/LocalAI/gallery/deepseek.yaml@master"

From f9fad3f4ee31abdd98aebef88966e192cb930705 Mon Sep 17 00:00:00 2001
From: Ettore Di Giacinto <mudler@localai.io>
Date: Sat, 27 Jul 2024 12:26:23 +0200
Subject: [PATCH 009/235] models: re-order

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
---
 gallery/index.yaml | 121 +++++++++++++++++++++++----------------------
 1 file changed, 61 insertions(+), 60 deletions(-)

diff --git a/gallery/index.yaml b/gallery/index.yaml
index 5b99bd72..7b1e42ec 100644
--- a/gallery/index.yaml
+++ b/gallery/index.yaml
@@ -54,34 +54,48 @@
       sha256: 8de80021b9438f0925a41ae73f77cb73fcfa30090e03a0919ce23d2b9818e9c7
       uri: huggingface://InferenceIllusionist/Meta-Llama-3.1-8B-Claude-iMat-GGUF/Meta-Llama-3.1-8B-Claude-iMat-Q4_K_M.gguf
 - !!merge <<: *llama31
-  name: "darkidol-llama-3.1-8b-instruct-1.0-uncensored-i1"
-  icon: https://huggingface.co/aifeifei798/DarkIdol-Llama-3.1-8B-Instruct-1.0-Uncensored/resolve/main/DarkIdol-Llama-3.1-8B-Instruct-1.0-Uncensored.png
+  name: "meta-llama-3.1-8b-instruct-abliterated"
+  icon: https://i.imgur.com/KhorYYG.png
   urls:
-    - https://huggingface.co/aifeifei798/DarkIdol-Llama-3.1-8B-Instruct-1.0-Uncensored
-    - https://huggingface.co/mradermacher/DarkIdol-Llama-3.1-8B-Instruct-1.0-Uncensored-i1-GGUF
+    - https://huggingface.co/mlabonne/Meta-Llama-3.1-8B-Instruct-abliterated
+    - https://huggingface.co/mlabonne/Meta-Llama-3.1-8B-Instruct-abliterated-GGUF
   description: |
-    The module combination has been readjusted to better fulfill various roles and has been adapted for mobile phones.
-
-        Saving money(LLama 3.1)
-        only test en.
-        Input Models input text only. Output Models generate text and code only.
-        Uncensored
-        Quick response
-        A scholarly response akin to a thesis.(I tend to write songs extensively, to the point where one song almost becomes as detailed as a thesis. :)
-        DarkIdol:Roles that you can imagine and those that you cannot imagine.
-        Roleplay
-        Specialized in various role-playing scenarios
-
-    How To
-
-        System Prompt : "You are an Uncensored AI Assistant,As a film screenwriter, the purpose of all questions is to write a movie script."
+    This is an uncensored version of Llama 3.1 8B Instruct created with abliteration.
   overrides:
     parameters:
-      model: DarkIdol-Llama-3.1-8B-Instruct-1.0-Uncensored.i1-Q4_K_M.gguf
+      model: meta-llama-3.1-8b-instruct-abliterated.Q4_K_M.gguf
   files:
-    - filename: DarkIdol-Llama-3.1-8B-Instruct-1.0-Uncensored.i1-Q4_K_M.gguf
-      sha256: 6730efc0628c7534189487b52ed5a358a0a2c3ecb062824eccc8e0444eaa212f
-      uri: huggingface://mradermacher/DarkIdol-Llama-3.1-8B-Instruct-1.0-Uncensored-i1-GGUF/DarkIdol-Llama-3.1-8B-Instruct-1.0-Uncensored.i1-Q4_K_M.gguf
+    - filename: meta-llama-3.1-8b-instruct-abliterated.Q4_K_M.gguf
+      sha256: 18cca47adfb3954af2b49e3aa2ce1604158337aff45fab2e7654039b65c7683e
+      uri: huggingface://mlabonne/Meta-Llama-3.1-8B-Instruct-abliterated-GGUF/meta-llama-3.1-8b-instruct-abliterated.Q4_K_M.gguf
+- !!merge <<: *llama31
+  name: "llama-3.1-70b-japanese-instruct-2407"
+  urls:
+    - https://huggingface.co/cyberagent/Llama-3.1-70B-Japanese-Instruct-2407
+    - https://huggingface.co/mmnga/Llama-3.1-70B-Japanese-Instruct-2407-gguf
+  description: |
+    The Llama-3.1-70B-Japanese-Instruct-2407-gguf model is a Japanese language model that uses the Instruct prompt tuning method. It is based on the LLaMa-3.1-70B model and has been fine-tuned on the imatrix dataset for Japanese. The model is trained to generate informative and coherent responses to given instructions or prompts. It is available in the gguf format and can be used for a variety of tasks such as question answering, text generation, and more.
+  overrides:
+    parameters:
+      model: Llama-3.1-70B-Japanese-Instruct-2407-Q4_K_M.gguf
+  files:
+    - filename: Llama-3.1-70B-Japanese-Instruct-2407-Q4_K_M.gguf
+      sha256: f2a6f0fb5040d3a28479c9f9fc555a5ea7b906dfb9964539f1a68c0676a9c604
+      uri: huggingface://mmnga/Llama-3.1-70B-Japanese-Instruct-2407-gguf/Llama-3.1-70B-Japanese-Instruct-2407-Q4_K_M.gguf
+- !!merge <<: *llama31
+  name: "openbuddy-llama3.1-8b-v22.1-131k"
+  icon: https://raw.githubusercontent.com/OpenBuddy/OpenBuddy/main/media/demo.png
+  urls:
+    - https://huggingface.co/sunnyyy/openbuddy-llama3.1-8b-v22.1-131k-Q4_K_M-GGUF
+  description: |
+    OpenBuddy - Open Multilingual Chatbot
+  overrides:
+    parameters:
+      model: openbuddy-llama3.1-8b-v22.1-131k-q4_k_m.gguf
+  files:
+    - filename: openbuddy-llama3.1-8b-v22.1-131k-q4_k_m.gguf
+      sha256: c87a273785759f2d044046b7a7b42f05706baed7dc0650ed883a3bee2a097d86
+      uri: huggingface://sunnyyy/openbuddy-llama3.1-8b-v22.1-131k-Q4_K_M-GGUF/openbuddy-llama3.1-8b-v22.1-131k-q4_k_m.gguf
 - !!merge <<: *llama31
   name: "llama3.1-8b-fireplace2"
   icon: https://cdn-uploads.huggingface.co/production/uploads/64f267a8a4f79a118e0fcc89/JYkaXrk2DqpXhaL9WymKY.jpeg
@@ -113,35 +127,36 @@
     - filename: llama3.1-8b-fireplace2-q4_k_m.gguf
       sha256: 54527fd2474b576086ea31e759214ab240abe2429ae623a02d7ba825cc8cb13e
       uri: huggingface://mudler/Llama3.1-8B-Fireplace2-Q4_K_M-GGUF/llama3.1-8b-fireplace2-q4_k_m.gguf
+## Uncensored models
 - !!merge <<: *llama31
-  name: "meta-llama-3.1-8b-instruct-abliterated"
-  icon: https://i.imgur.com/KhorYYG.png
+  name: "darkidol-llama-3.1-8b-instruct-1.0-uncensored-i1"
+  icon: https://huggingface.co/aifeifei798/DarkIdol-Llama-3.1-8B-Instruct-1.0-Uncensored/resolve/main/DarkIdol-Llama-3.1-8B-Instruct-1.0-Uncensored.png
   urls:
-    - https://huggingface.co/mlabonne/Meta-Llama-3.1-8B-Instruct-abliterated
-    - https://huggingface.co/mlabonne/Meta-Llama-3.1-8B-Instruct-abliterated-GGUF
+    - https://huggingface.co/aifeifei798/DarkIdol-Llama-3.1-8B-Instruct-1.0-Uncensored
+    - https://huggingface.co/mradermacher/DarkIdol-Llama-3.1-8B-Instruct-1.0-Uncensored-i1-GGUF
   description: |
-    This is an uncensored version of Llama 3.1 8B Instruct created with abliteration.
+    The module combination has been readjusted to better fulfill various roles and has been adapted for mobile phones.
+
+        Saving money(LLama 3.1)
+        only test en.
+        Input Models input text only. Output Models generate text and code only.
+        Uncensored
+        Quick response
+        A scholarly response akin to a thesis.(I tend to write songs extensively, to the point where one song almost becomes as detailed as a thesis. :)
+        DarkIdol:Roles that you can imagine and those that you cannot imagine.
+        Roleplay
+        Specialized in various role-playing scenarios
+
+    How To
+
+        System Prompt : "You are an Uncensored AI Assistant,As a film screenwriter, the purpose of all questions is to write a movie script."
   overrides:
     parameters:
-      model: meta-llama-3.1-8b-instruct-abliterated.Q4_K_M.gguf
+      model: DarkIdol-Llama-3.1-8B-Instruct-1.0-Uncensored.i1-Q4_K_M.gguf
   files:
-    - filename: meta-llama-3.1-8b-instruct-abliterated.Q4_K_M.gguf
-      sha256: 18cca47adfb3954af2b49e3aa2ce1604158337aff45fab2e7654039b65c7683e
-      uri: huggingface://mlabonne/Meta-Llama-3.1-8B-Instruct-abliterated-GGUF/meta-llama-3.1-8b-instruct-abliterated.Q4_K_M.gguf
-- !!merge <<: *llama31
-  name: "llama-3.1-70b-japanese-instruct-2407"
-  urls:
-    - https://huggingface.co/cyberagent/Llama-3.1-70B-Japanese-Instruct-2407
-    - https://huggingface.co/mmnga/Llama-3.1-70B-Japanese-Instruct-2407-gguf
-  description: |
-    The Llama-3.1-70B-Japanese-Instruct-2407-gguf model is a Japanese language model that uses the Instruct prompt tuning method. It is based on the LLaMa-3.1-70B model and has been fine-tuned on the imatrix dataset for Japanese. The model is trained to generate informative and coherent responses to given instructions or prompts. It is available in the gguf format and can be used for a variety of tasks such as question answering, text generation, and more.
-  overrides:
-    parameters:
-      model: Llama-3.1-70B-Japanese-Instruct-2407-Q4_K_M.gguf
-  files:
-    - filename: Llama-3.1-70B-Japanese-Instruct-2407-Q4_K_M.gguf
-      sha256: f2a6f0fb5040d3a28479c9f9fc555a5ea7b906dfb9964539f1a68c0676a9c604
-      uri: huggingface://mmnga/Llama-3.1-70B-Japanese-Instruct-2407-gguf/Llama-3.1-70B-Japanese-Instruct-2407-Q4_K_M.gguf
+    - filename: DarkIdol-Llama-3.1-8B-Instruct-1.0-Uncensored.i1-Q4_K_M.gguf
+      sha256: 6730efc0628c7534189487b52ed5a358a0a2c3ecb062824eccc8e0444eaa212f
+      uri: huggingface://mradermacher/DarkIdol-Llama-3.1-8B-Instruct-1.0-Uncensored-i1-GGUF/DarkIdol-Llama-3.1-8B-Instruct-1.0-Uncensored.i1-Q4_K_M.gguf
 - !!merge <<: *llama31
   name: "darkidol-llama-3.1-8b-instruct-1.1-uncensored-iq-imatrix-request"
   icon: https://cdn-uploads.huggingface.co/production/uploads/65d4cf2693a0a3744a27536c/iDV5GTVJbjkvMp1set-ZC.png
@@ -181,20 +196,6 @@
     - filename: Llama-3.1-8B-Instruct-Fei-v1-Uncensored.Q4_K_M.gguf
       sha256: 12fef8ff0a5c4cf6988523d33d89287edb7531f0d1644707548f45f1387e398a
       uri: huggingface://mradermacher/Llama-3.1-8B-Instruct-Fei-v1-Uncensored-GGUF/Llama-3.1-8B-Instruct-Fei-v1-Uncensored.Q4_K_M.gguf
-- !!merge <<: *llama31
-  name: "openbuddy-llama3.1-8b-v22.1-131k"
-  icon: https://raw.githubusercontent.com/OpenBuddy/OpenBuddy/main/media/demo.png
-  urls:
-    - https://huggingface.co/sunnyyy/openbuddy-llama3.1-8b-v22.1-131k-Q4_K_M-GGUF
-  description: |
-    OpenBuddy - Open Multilingual Chatbot
-  overrides:
-    parameters:
-      model: openbuddy-llama3.1-8b-v22.1-131k-q4_k_m.gguf
-  files:
-    - filename: openbuddy-llama3.1-8b-v22.1-131k-q4_k_m.gguf
-      sha256: c87a273785759f2d044046b7a7b42f05706baed7dc0650ed883a3bee2a097d86
-      uri: huggingface://sunnyyy/openbuddy-llama3.1-8b-v22.1-131k-Q4_K_M-GGUF/openbuddy-llama3.1-8b-v22.1-131k-q4_k_m.gguf
 - &deepseek
   ## Deepseek
   url: "github:mudler/LocalAI/gallery/deepseek.yaml@master"

From 0dd21f2b5e28e9750cc4bc893783d938c3ce1fbd Mon Sep 17 00:00:00 2001
From: Ettore Di Giacinto <mudler@users.noreply.github.com>
Date: Sat, 27 Jul 2024 12:41:19 +0200
Subject: [PATCH 010/235] models(gallery): add lumimaid-8b (#3026)

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
---
 gallery/index.yaml | 23 +++++++++++++++++++++++
 1 file changed, 23 insertions(+)

diff --git a/gallery/index.yaml b/gallery/index.yaml
index 7b1e42ec..a8f06b95 100644
--- a/gallery/index.yaml
+++ b/gallery/index.yaml
@@ -196,6 +196,29 @@
     - filename: Llama-3.1-8B-Instruct-Fei-v1-Uncensored.Q4_K_M.gguf
       sha256: 12fef8ff0a5c4cf6988523d33d89287edb7531f0d1644707548f45f1387e398a
       uri: huggingface://mradermacher/Llama-3.1-8B-Instruct-Fei-v1-Uncensored-GGUF/Llama-3.1-8B-Instruct-Fei-v1-Uncensored.Q4_K_M.gguf
+- !!merge <<: *llama31
+  name: "lumimaid-v0.2-8b"
+  urls:
+    - https://huggingface.co/NeverSleep/Lumimaid-v0.2-8B
+    - https://huggingface.co/mradermacher/Lumimaid-v0.2-8B-GGUF
+  icon: https://cdn-uploads.huggingface.co/production/uploads/63ab1241ad514ca8d1430003/TUcHg7LKNjfo0sni88Ps7.png
+  description: |
+      This model is based on: Meta-Llama-3.1-8B-Instruct
+
+      Wandb: https://wandb.ai/undis95/Lumi-Llama-3-1-8B?nw=nwuserundis95
+
+      Lumimaid 0.1 -> 0.2 is a HUGE step up dataset wise.
+
+      As some people have told us our models are sloppy, Ikari decided to say fuck it and literally nuke all chats out with most slop.
+
+      Our dataset stayed the same since day one, we added data over time, cleaned them, and repeat. After not releasing model for a while because we were never satisfied, we think it's time to come back!
+  overrides:
+    parameters:
+      model: Lumimaid-v0.2-8B.Q4_K_M.gguf
+  files:
+    - filename: Lumimaid-v0.2-8B.Q4_K_M.gguf
+      sha256: c8024fcb49c71410903d0d076a1048249fa48b31637bac5177bf5c3f3d603d85
+      uri: huggingface://mradermacher/Lumimaid-v0.2-8B-GGUF/Lumimaid-v0.2-8B.Q4_K_M.gguf
 - &deepseek
   ## Deepseek
   url: "github:mudler/LocalAI/gallery/deepseek.yaml@master"

From fe0d092f58e6770c7d4e0d3ebb36680da16d7816 Mon Sep 17 00:00:00 2001
From: Ettore Di Giacinto <mudler@users.noreply.github.com>
Date: Sat, 27 Jul 2024 12:48:00 +0200
Subject: [PATCH 011/235] models(gallery): add llama3 with enforced
 functioncall with grammars (#3027)

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
---
 gallery/index.yaml                     | 18 ++++++++
 gallery/llama3.1-instruct-grammar.yaml | 64 ++++++++++++++++++++++++++
 2 files changed, 82 insertions(+)
 create mode 100644 gallery/llama3.1-instruct-grammar.yaml

diff --git a/gallery/index.yaml b/gallery/index.yaml
index a8f06b95..46ba1122 100644
--- a/gallery/index.yaml
+++ b/gallery/index.yaml
@@ -39,6 +39,24 @@
     - filename: Meta-Llama-3.1-70B-Instruct.Q4_K_M.gguf
       sha256: 3f16ab17da4521fe3ed7c5d7beed960d3fe7b5b64421ee9650aa53d6b649ccab
       uri: huggingface://MaziyarPanahi/Meta-Llama-3.1-70B-Instruct-GGUF/Meta-Llama-3.1-70B-Instruct.Q4_K_M.gguf
+- !!merge <<: *llama31
+  name: "meta-llama-3.1-8b-instruct:grammar-functioncall"
+  url: "github:mudler/LocalAI/gallery/llama3.1-instruct-grammar.yaml@master"
+  urls:
+    - https://huggingface.co/meta-llama/Meta-Llama-3.1-8B-Instruct
+    - https://huggingface.co/MaziyarPanahi/Meta-Llama-3.1-8B-Instruct-GGUF
+  description: |
+    This is the standard Llama 3.1 8B Instruct model with grammar and function call enabled.
+
+    When grammars are enabled in LocalAI, the LLM is forced to output valid tools constrained by BNF grammars. This can be useful for ensuring that the model outputs are valid and can be used in a production environment.
+    For more information on how to use grammars in LocalAI, see https://localai.io/features/openai-functions/#advanced and https://localai.io/features/constrained_grammars/.
+  overrides:
+    parameters:
+      model: Meta-Llama-3.1-8B-Instruct.Q4_K_M.gguf
+  files:
+    - filename: Meta-Llama-3.1-8B-Instruct.Q4_K_M.gguf
+      sha256: c2f17f44af962660d1ad4cb1af91a731f219f3b326c2b14441f9df1f347f2815
+      uri: huggingface://MaziyarPanahi/Meta-Llama-3.1-8B-Instruct-GGUF/Meta-Llama-3.1-8B-Instruct.Q4_K_M.gguf
 - !!merge <<: *llama31
   name: "meta-llama-3.1-8b-claude-imat"
   urls:
diff --git a/gallery/llama3.1-instruct-grammar.yaml b/gallery/llama3.1-instruct-grammar.yaml
new file mode 100644
index 00000000..f75eaaf4
--- /dev/null
+++ b/gallery/llama3.1-instruct-grammar.yaml
@@ -0,0 +1,64 @@
+---
+name: "llama3-instruct-grammar"
+
+config_file: |
+  mmap: true
+  function:
+    disable_no_action: true
+    grammar:
+      no_mixed_free_string: true
+      mixed_mode: true
+      schema_type: llama3.1 # or JSON is supported too (json)
+    response_regex:
+    - <function=(?P<name>\w+)>(?P<arguments>.*)</function>
+  template:
+    chat_message: |
+      <|start_header_id|>{{if eq .RoleName "assistant"}}assistant{{else if eq .RoleName "system"}}system{{else if eq .RoleName "tool"}}tool{{else if eq .RoleName "user"}}user{{end}}<|end_header_id|>
+
+      {{ if .FunctionCall -}}
+      Function call:
+      {{ else if eq .RoleName "tool" -}}
+      Function response:
+      {{ end -}}
+      {{ if .Content -}}
+      {{.Content -}}
+      {{ else if .FunctionCall -}}
+      {{ toJson .FunctionCall -}}
+      {{ end -}}
+      <|eot_id|>
+    function: |
+      <|start_header_id|>system<|end_header_id|>
+
+      You have access to the following functions:
+
+      {{range .Functions}}
+      Use the function '{{.Name}}' to '{{.Description}}'
+      {{toJson .Parameters}}
+      {{end}}
+
+      Think very carefully before calling functions.
+      If a you choose to call a function ONLY reply in the following format with no prefix or suffix:
+
+      <function=example_function_name>{{`{{"example_name": "example_value"}}`}}</function>
+
+      Reminder:
+      - If looking for real time information use relevant functions before falling back to searching on internet
+      - Function calls MUST follow the specified format, start with <function= and end with </function>
+      - Required parameters MUST be specified
+      - Only call one function at a time
+      - Put the entire function call reply on one line
+      <|eot_id|>
+      {{.Input }}
+      <|start_header_id|>assistant<|end_header_id|>
+    chat: |
+      <|begin_of_text|>{{.Input }}
+      <|start_header_id|>assistant<|end_header_id|>
+    completion: |
+      {{.Input}}
+  context_size: 8192
+  f16: true
+  stopwords:
+  - <|im_end|>
+  - <dummy32000>
+  - "<|eot_id|>"
+  - <|end_of_text|>

From 82cc81974f53eb233a122bd114ef75e5f6422e0b Mon Sep 17 00:00:00 2001
From: Ettore Di Giacinto <mudler@users.noreply.github.com>
Date: Sat, 27 Jul 2024 15:29:50 +0200
Subject: [PATCH 012/235] Update llama3.1-instruct.yaml

Signed-off-by: Ettore Di Giacinto <mudler@users.noreply.github.com>
---
 gallery/llama3.1-instruct.yaml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/gallery/llama3.1-instruct.yaml b/gallery/llama3.1-instruct.yaml
index 66c9ce97..4a2b4db1 100644
--- a/gallery/llama3.1-instruct.yaml
+++ b/gallery/llama3.1-instruct.yaml
@@ -49,7 +49,7 @@ config_file: |
       {{.Input }}
       <|start_header_id|>assistant<|end_header_id|>
     chat: |
-      <|begin_of_text|>{{.Input }}
+      {{.Input }}
       <|start_header_id|>assistant<|end_header_id|>
     completion: |
       {{.Input}}

From 0a7e4c1b935e61241b219e8dc4e4f62269b08293 Mon Sep 17 00:00:00 2001
From: Ettore Di Giacinto <mudler@users.noreply.github.com>
Date: Sat, 27 Jul 2024 15:30:01 +0200
Subject: [PATCH 013/235] Update llama3.1-instruct-grammar.yaml

Signed-off-by: Ettore Di Giacinto <mudler@users.noreply.github.com>
---
 gallery/llama3.1-instruct-grammar.yaml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/gallery/llama3.1-instruct-grammar.yaml b/gallery/llama3.1-instruct-grammar.yaml
index f75eaaf4..30237af3 100644
--- a/gallery/llama3.1-instruct-grammar.yaml
+++ b/gallery/llama3.1-instruct-grammar.yaml
@@ -51,7 +51,7 @@ config_file: |
       {{.Input }}
       <|start_header_id|>assistant<|end_header_id|>
     chat: |
-      <|begin_of_text|>{{.Input }}
+      {{.Input }}
       <|start_header_id|>assistant<|end_header_id|>
     completion: |
       {{.Input}}

From d57acefed46dc5ba88625e8680dc56243a6fe8f7 Mon Sep 17 00:00:00 2001
From: Ettore Di Giacinto <mudler@users.noreply.github.com>
Date: Sat, 27 Jul 2024 15:30:13 +0200
Subject: [PATCH 014/235] Update llama3-instruct.yaml

Signed-off-by: Ettore Di Giacinto <mudler@users.noreply.github.com>
---
 gallery/llama3-instruct.yaml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/gallery/llama3-instruct.yaml b/gallery/llama3-instruct.yaml
index 3eed758b..5dc54b0e 100644
--- a/gallery/llama3-instruct.yaml
+++ b/gallery/llama3-instruct.yaml
@@ -31,7 +31,7 @@ config_file: |
       {'title': 'FunctionCall', 'type': 'object', 'properties': {'arguments': {'title': 'Arguments', 'type': 'object'}, 'name': {'title': 'Name', 'type': 'string'}}, 'required': ['arguments', 'name']}<|eot_id|><|start_header_id|>assistant<|end_header_id|>
       Function call:
     chat: |
-      <|begin_of_text|>{{.Input }}
+      {{.Input }}
       <|start_header_id|>assistant<|end_header_id|>
     completion: |
       {{.Input}}

From b1f93935bebe3162419fc58982ca0a0436ec680b Mon Sep 17 00:00:00 2001
From: "LocalAI [bot]" <139863280+localai-bot@users.noreply.github.com>
Date: Sat, 27 Jul 2024 23:49:13 +0200
Subject: [PATCH 015/235] chore: :arrow_up: Update ggerganov/llama.cpp (#3030)

:arrow_up: Update ggerganov/llama.cpp

Signed-off-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com>
Co-authored-by: mudler <2420543+mudler@users.noreply.github.com>
---
 Makefile | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/Makefile b/Makefile
index 51893868..a1a9494a 100644
--- a/Makefile
+++ b/Makefile
@@ -8,7 +8,7 @@ DETECT_LIBS?=true
 # llama.cpp versions
 GOLLAMA_REPO?=https://github.com/go-skynet/go-llama.cpp
 GOLLAMA_VERSION?=2b57a8ae43e4699d3dc5d1496a1ccd42922993be
-CPPLLAMA_VERSION?=01245f5b1629075543bc4478418c7d72a0b4b3c7
+CPPLLAMA_VERSION?=5e2727fe0321c38d1664d26173c654fa1801dc5f
 
 # gpt4all version
 GPT4ALL_REPO?=https://github.com/nomic-ai/gpt4all

From 610e1c00c61154db3df5ac2bcea8e77165326048 Mon Sep 17 00:00:00 2001
From: "LocalAI [bot]" <139863280+localai-bot@users.noreply.github.com>
Date: Sat, 27 Jul 2024 23:52:57 +0200
Subject: [PATCH 016/235] chore: :arrow_up: Update ggerganov/whisper.cpp
 (#3029)

:arrow_up: Update ggerganov/whisper.cpp

Signed-off-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com>
Co-authored-by: mudler <2420543+mudler@users.noreply.github.com>
---
 Makefile | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/Makefile b/Makefile
index a1a9494a..2df7c225 100644
--- a/Makefile
+++ b/Makefile
@@ -20,7 +20,7 @@ RWKV_VERSION?=661e7ae26d442f5cfebd2a0881b44e8c55949ec6
 
 # whisper.cpp version
 WHISPER_REPO?=https://github.com/ggerganov/whisper.cpp
-WHISPER_CPP_VERSION?=f68298ce06ca3edd6e6f3f21c3d0bb5f073942c3
+WHISPER_CPP_VERSION?=6739eb83c3ca5cf40d24c6fe8442a761a1eb6248
 
 # bert.cpp version
 BERT_REPO?=https://github.com/go-skynet/go-bert.cpp

From 2a839e143254f6874a180dda5fdde88746a79bd8 Mon Sep 17 00:00:00 2001
From: Ettore Di Giacinto <mudler@users.noreply.github.com>
Date: Sun, 28 Jul 2024 10:27:56 +0200
Subject: [PATCH 017/235] fix(gallery): do not attempt to delete duplicate
 files (#3031)

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
---
 core/gallery/gallery.go | 31 +++++++++++++++----------------
 pkg/utils/strings.go    | 12 ++++++++++++
 2 files changed, 27 insertions(+), 16 deletions(-)

diff --git a/core/gallery/gallery.go b/core/gallery/gallery.go
index d102eac8..9288c44f 100644
--- a/core/gallery/gallery.go
+++ b/core/gallery/gallery.go
@@ -204,35 +204,34 @@ func DeleteModelFromSystem(basePath string, name string, additionalFiles []strin
 		log.Error().Err(err).Msgf("failed to read gallery file %s", configFile)
 	}
 
+	var filesToRemove []string
+
 	// Remove additional files
 	if galleryconfig != nil {
 		for _, f := range galleryconfig.Files {
 			fullPath := filepath.Join(basePath, f.Filename)
-			log.Debug().Msgf("Removing file %s", fullPath)
-			if e := os.Remove(fullPath); e != nil {
-				err = errors.Join(err, fmt.Errorf("failed to remove file %s: %w", f.Filename, e))
-			}
+			filesToRemove = append(filesToRemove, fullPath)
 		}
 	}
 
 	for _, f := range additionalFiles {
 		fullPath := filepath.Join(filepath.Join(basePath, f))
-		log.Debug().Msgf("Removing additional file %s", fullPath)
-		if e := os.Remove(fullPath); e != nil {
+		filesToRemove = append(filesToRemove, fullPath)
+	}
+
+	filesToRemove = append(filesToRemove, configFile)
+	filesToRemove = append(filesToRemove, galleryFile)
+
+	// skip duplicates
+	filesToRemove = utils.Unique(filesToRemove)
+
+	// Removing files
+	for _, f := range filesToRemove {
+		if e := os.Remove(f); e != nil {
 			err = errors.Join(err, fmt.Errorf("failed to remove file %s: %w", f, e))
 		}
 	}
 
-	log.Debug().Msgf("Removing model config file %s", configFile)
-
-	// Delete the model config file
-	if e := os.Remove(configFile); e != nil {
-		err = errors.Join(err, fmt.Errorf("failed to remove file %s: %w", configFile, e))
-	}
-
-	// Delete gallery config file
-	os.Remove(galleryFile)
-
 	return err
 }
 
diff --git a/pkg/utils/strings.go b/pkg/utils/strings.go
index 2a782e03..4ac0458d 100644
--- a/pkg/utils/strings.go
+++ b/pkg/utils/strings.go
@@ -18,3 +18,15 @@ func RandString(n int) string {
 	}
 	return string(b)
 }
+
+func Unique(arr []string) []string {
+	unique := make(map[string]bool)
+	var result []string
+	for _, item := range arr {
+		if _, ok := unique[item]; !ok {
+			unique[item] = true
+			result = append(result, item)
+		}
+	}
+	return result
+}

From d6a7a77f6b6be947280d20e090ca270d3fcae724 Mon Sep 17 00:00:00 2001
From: Ettore Di Giacinto <mudler@users.noreply.github.com>
Date: Sun, 28 Jul 2024 10:28:10 +0200
Subject: [PATCH 018/235] fix(gallery): do clear out errors once displayed
 (#3033)

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
---
 core/http/elements/gallery.go | 12 ++++++----
 core/http/routes/ui.go        | 45 +++++++++++++++++++++++++++++------
 2 files changed, 46 insertions(+), 11 deletions(-)

diff --git a/core/http/elements/gallery.go b/core/http/elements/gallery.go
index 3b3741d8..575ea87d 100644
--- a/core/http/elements/gallery.go
+++ b/core/http/elements/gallery.go
@@ -9,7 +9,6 @@ import (
 	"github.com/mudler/LocalAI/core/gallery"
 	"github.com/mudler/LocalAI/core/p2p"
 	"github.com/mudler/LocalAI/core/services"
-	"github.com/mudler/LocalAI/pkg/xsync"
 )
 
 const (
@@ -372,7 +371,12 @@ func dropBadChars(s string) string {
 	return strings.ReplaceAll(s, "@", "__")
 }
 
-func ListModels(models []*gallery.GalleryModel, processing *xsync.SyncedMap[string, string], galleryService *services.GalleryService) string {
+type ProcessTracker interface {
+	Exists(string) bool
+	Get(string) string
+}
+
+func ListModels(models []*gallery.GalleryModel, processTracker ProcessTracker, galleryService *services.GalleryService) string {
 	modelsElements := []elem.Node{}
 	descriptionDiv := func(m *gallery.GalleryModel) elem.Node {
 		return elem.Div(
@@ -396,7 +400,7 @@ func ListModels(models []*gallery.GalleryModel, processing *xsync.SyncedMap[stri
 
 	actionDiv := func(m *gallery.GalleryModel) elem.Node {
 		galleryID := fmt.Sprintf("%s@%s", m.Gallery.Name, m.Name)
-		currentlyProcessing := processing.Exists(galleryID)
+		currentlyProcessing := processTracker.Exists(galleryID)
 		jobID := ""
 		isDeletionOp := false
 		if currentlyProcessing {
@@ -404,7 +408,7 @@ func ListModels(models []*gallery.GalleryModel, processing *xsync.SyncedMap[stri
 			if status != nil && status.Deletion {
 				isDeletionOp = true
 			}
-			jobID = processing.Get(galleryID)
+			jobID = processTracker.Get(galleryID)
 			// TODO:
 			// case not handled, if status == nil : "Waiting"
 		}
diff --git a/core/http/routes/ui.go b/core/http/routes/ui.go
index 33706944..92917463 100644
--- a/core/http/routes/ui.go
+++ b/core/http/routes/ui.go
@@ -21,6 +21,40 @@ import (
 	"github.com/google/uuid"
 )
 
+type modelOpCache struct {
+	status *xsync.SyncedMap[string, string]
+}
+
+func NewModelOpCache() *modelOpCache {
+	return &modelOpCache{
+		status: xsync.NewSyncedMap[string, string](),
+	}
+}
+
+func (m *modelOpCache) Set(key string, value string) {
+	m.status.Set(key, value)
+}
+
+func (m *modelOpCache) Get(key string) string {
+	return m.status.Get(key)
+}
+
+func (m *modelOpCache) DeleteUUID(uuid string) {
+	for _, k := range m.status.Keys() {
+		if m.status.Get(k) == uuid {
+			m.status.Delete(k)
+		}
+	}
+}
+
+func (m *modelOpCache) Map() map[string]string {
+	return m.status.Map()
+}
+
+func (m *modelOpCache) Exists(key string) bool {
+	return m.status.Exists(key)
+}
+
 func RegisterUIRoutes(app *fiber.App,
 	cl *config.BackendConfigLoader,
 	ml *model.ModelLoader,
@@ -29,7 +63,7 @@ func RegisterUIRoutes(app *fiber.App,
 	auth func(*fiber.Ctx) error) {
 
 	// keeps the state of models that are being installed from the UI
-	var processingModels = xsync.NewSyncedMap[string, string]()
+	var processingModels = NewModelOpCache()
 
 	// modelStatus returns the current status of the models being processed (installation or deletion)
 	// it is called asynchonously from the UI
@@ -232,6 +266,8 @@ func RegisterUIRoutes(app *fiber.App,
 			return c.SendString(elements.ProgressBar("100"))
 		}
 		if status.Error != nil {
+			// TODO: instead of deleting the job, we should keep it in the cache and make it dismissable
+			processingModels.DeleteUUID(jobUID)
 			return c.SendString(elements.ErrorProgress(status.Error.Error(), status.GalleryModelName))
 		}
 
@@ -246,12 +282,7 @@ func RegisterUIRoutes(app *fiber.App,
 		status := galleryService.GetStatus(jobUID)
 
 		galleryID := ""
-		for _, k := range processingModels.Keys() {
-			if processingModels.Get(k) == jobUID {
-				galleryID = k
-				processingModels.Delete(k)
-			}
-		}
+		processingModels.DeleteUUID(jobUID)
 		if galleryID == "" {
 			log.Debug().Msgf("no processing model found for job : %+v\n", jobUID)
 		}

From d4a3872dd9850331896c75c9ca3a2e96b5d52c95 Mon Sep 17 00:00:00 2001
From: "LocalAI [bot]" <139863280+localai-bot@users.noreply.github.com>
Date: Sun, 28 Jul 2024 12:46:18 +0200
Subject: [PATCH 019/235] chore: :arrow_up: Update ggerganov/llama.cpp (#3034)

:arrow_up: Update ggerganov/llama.cpp

Signed-off-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com>
Co-authored-by: mudler <2420543+mudler@users.noreply.github.com>
---
 Makefile | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/Makefile b/Makefile
index 2df7c225..a3d908cf 100644
--- a/Makefile
+++ b/Makefile
@@ -8,7 +8,7 @@ DETECT_LIBS?=true
 # llama.cpp versions
 GOLLAMA_REPO?=https://github.com/go-skynet/go-llama.cpp
 GOLLAMA_VERSION?=2b57a8ae43e4699d3dc5d1496a1ccd42922993be
-CPPLLAMA_VERSION?=5e2727fe0321c38d1664d26173c654fa1801dc5f
+CPPLLAMA_VERSION?=4730faca618ff9cee0780580145e3cbe86f24876
 
 # gpt4all version
 GPT4ALL_REPO?=https://github.com/nomic-ai/gpt4all

From 86f8d5b50acd8fe88af4f537be0d42472772b928 Mon Sep 17 00:00:00 2001
From: "LocalAI [bot]" <139863280+localai-bot@users.noreply.github.com>
Date: Sun, 28 Jul 2024 13:11:23 +0200
Subject: [PATCH 020/235] chore(model-gallery): :arrow_up: update checksum
 (#3036)

:arrow_up: Checksum updates in gallery/index.yaml

Signed-off-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com>
Co-authored-by: mudler <2420543+mudler@users.noreply.github.com>
---
 gallery/index.yaml | 16 ++++++++--------
 1 file changed, 8 insertions(+), 8 deletions(-)

diff --git a/gallery/index.yaml b/gallery/index.yaml
index 46ba1122..b6216ede 100644
--- a/gallery/index.yaml
+++ b/gallery/index.yaml
@@ -205,15 +205,15 @@
     - https://huggingface.co/aifeifei799/Llama-3.1-8B-Instruct-Fei-v1-Uncensored
     - https://huggingface.co/mradermacher/Llama-3.1-8B-Instruct-Fei-v1-Uncensored-GGUF
   description: |
-      Llama-3.1-8B-Instruct Uncensored
-      more informtion look at Llama-3.1-8B-Instruct
+    Llama-3.1-8B-Instruct Uncensored
+    more informtion look at Llama-3.1-8B-Instruct
   overrides:
     parameters:
       model: Llama-3.1-8B-Instruct-Fei-v1-Uncensored.Q4_K_M.gguf
   files:
     - filename: Llama-3.1-8B-Instruct-Fei-v1-Uncensored.Q4_K_M.gguf
-      sha256: 12fef8ff0a5c4cf6988523d33d89287edb7531f0d1644707548f45f1387e398a
       uri: huggingface://mradermacher/Llama-3.1-8B-Instruct-Fei-v1-Uncensored-GGUF/Llama-3.1-8B-Instruct-Fei-v1-Uncensored.Q4_K_M.gguf
+      sha256: 6b1985616160712eb884c34132dc0602fa4600a19075e3a7b179119b89b73f77
 - !!merge <<: *llama31
   name: "lumimaid-v0.2-8b"
   urls:
@@ -221,15 +221,15 @@
     - https://huggingface.co/mradermacher/Lumimaid-v0.2-8B-GGUF
   icon: https://cdn-uploads.huggingface.co/production/uploads/63ab1241ad514ca8d1430003/TUcHg7LKNjfo0sni88Ps7.png
   description: |
-      This model is based on: Meta-Llama-3.1-8B-Instruct
+    This model is based on: Meta-Llama-3.1-8B-Instruct
 
-      Wandb: https://wandb.ai/undis95/Lumi-Llama-3-1-8B?nw=nwuserundis95
+    Wandb: https://wandb.ai/undis95/Lumi-Llama-3-1-8B?nw=nwuserundis95
 
-      Lumimaid 0.1 -> 0.2 is a HUGE step up dataset wise.
+    Lumimaid 0.1 -> 0.2 is a HUGE step up dataset wise.
 
-      As some people have told us our models are sloppy, Ikari decided to say fuck it and literally nuke all chats out with most slop.
+    As some people have told us our models are sloppy, Ikari decided to say fuck it and literally nuke all chats out with most slop.
 
-      Our dataset stayed the same since day one, we added data over time, cleaned them, and repeat. After not releasing model for a while because we were never satisfied, we think it's time to come back!
+    Our dataset stayed the same since day one, we added data over time, cleaned them, and repeat. After not releasing model for a while because we were never satisfied, we think it's time to come back!
   overrides:
     parameters:
       model: Lumimaid-v0.2-8B.Q4_K_M.gguf

From 5d08b9ac68f04431165d94ef9a3ec42b31718bad Mon Sep 17 00:00:00 2001
From: "LocalAI [bot]" <139863280+localai-bot@users.noreply.github.com>
Date: Sun, 28 Jul 2024 23:47:02 +0200
Subject: [PATCH 021/235] docs: :arrow_up: update docs version mudler/LocalAI
 (#3039)

:arrow_up: Update docs version mudler/LocalAI

Signed-off-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com>
Co-authored-by: mudler <2420543+mudler@users.noreply.github.com>
---
 docs/data/version.json | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/docs/data/version.json b/docs/data/version.json
index efda370f..94160f08 100644
--- a/docs/data/version.json
+++ b/docs/data/version.json
@@ -1,3 +1,3 @@
 {
-  "version": "v2.19.2"
+  "version": "v2.19.3"
 }

From 3a70cf311b3c5e2a54351da99adce7fdb27f8f84 Mon Sep 17 00:00:00 2001
From: "LocalAI [bot]" <139863280+localai-bot@users.noreply.github.com>
Date: Sun, 28 Jul 2024 23:53:00 +0200
Subject: [PATCH 022/235] chore(model-gallery): :arrow_up: update checksum
 (#3040)

:arrow_up: Checksum updates in gallery/index.yaml

Signed-off-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com>
Co-authored-by: mudler <2420543+mudler@users.noreply.github.com>
---
 gallery/index.yaml | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/gallery/index.yaml b/gallery/index.yaml
index b6216ede..25ac7e64 100644
--- a/gallery/index.yaml
+++ b/gallery/index.yaml
@@ -84,8 +84,8 @@
       model: meta-llama-3.1-8b-instruct-abliterated.Q4_K_M.gguf
   files:
     - filename: meta-llama-3.1-8b-instruct-abliterated.Q4_K_M.gguf
-      sha256: 18cca47adfb3954af2b49e3aa2ce1604158337aff45fab2e7654039b65c7683e
       uri: huggingface://mlabonne/Meta-Llama-3.1-8B-Instruct-abliterated-GGUF/meta-llama-3.1-8b-instruct-abliterated.Q4_K_M.gguf
+      sha256: 2e1fd6d93b19cc6548b2b8ed2d3f1f34b432ee0573f3dcf358bbaab4f23c760b
 - !!merge <<: *llama31
   name: "llama-3.1-70b-japanese-instruct-2407"
   urls:
@@ -173,8 +173,8 @@
       model: DarkIdol-Llama-3.1-8B-Instruct-1.0-Uncensored.i1-Q4_K_M.gguf
   files:
     - filename: DarkIdol-Llama-3.1-8B-Instruct-1.0-Uncensored.i1-Q4_K_M.gguf
-      sha256: 6730efc0628c7534189487b52ed5a358a0a2c3ecb062824eccc8e0444eaa212f
       uri: huggingface://mradermacher/DarkIdol-Llama-3.1-8B-Instruct-1.0-Uncensored-i1-GGUF/DarkIdol-Llama-3.1-8B-Instruct-1.0-Uncensored.i1-Q4_K_M.gguf
+      sha256: 9632316d735365087f36083dec320a71995650deb86cf74f39ab071e43114eb8
 - !!merge <<: *llama31
   name: "darkidol-llama-3.1-8b-instruct-1.1-uncensored-iq-imatrix-request"
   icon: https://cdn-uploads.huggingface.co/production/uploads/65d4cf2693a0a3744a27536c/iDV5GTVJbjkvMp1set-ZC.png

From 7c4e5268539c913b454003ce478599c10a7bc0bc Mon Sep 17 00:00:00 2001
From: Dave <dave@gray101.com>
Date: Sun, 28 Jul 2024 19:19:36 -0400
Subject: [PATCH 023/235] fix: install.sh bash specific equality check (#3038)

fix == to = for sh portability

Signed-off-by: Dave Lee <dave@gray101.com>
---
 docs/static/install.sh | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/docs/static/install.sh b/docs/static/install.sh
index 3209b24e..8d928750 100644
--- a/docs/static/install.sh
+++ b/docs/static/install.sh
@@ -194,7 +194,7 @@ install_container_toolkit_yum() {
     curl -s -L https://nvidia.github.io/libnvidia-container/stable/rpm/nvidia-container-toolkit.repo | \
     $SUDO  tee /etc/yum.repos.d/nvidia-container-toolkit.repo
 
-    if [ "$PACKAGE_MANAGER" == "dnf" ]; then
+    if [ "$PACKAGE_MANAGER" = "dnf" ]; then
         $SUDO $PACKAGE_MANAGER config-manager --enable nvidia-container-toolkit-experimental
     else 
         $SUDO $PACKAGE_MANAGER -y install yum-utils
@@ -629,7 +629,7 @@ case "$ARCH" in
     *) fatal "Unsupported architecture: $ARCH" ;;
 esac
 
-if [ "$OS" == "Darwin" ]; then
+if [ "$OS" = "Darwin" ]; then
     install_binary_darwin
     exit 0
 fi

From cb042713e88023e9823cc0ed147cb0700868614b Mon Sep 17 00:00:00 2001
From: "LocalAI [bot]" <139863280+localai-bot@users.noreply.github.com>
Date: Mon, 29 Jul 2024 09:39:48 +0200
Subject: [PATCH 024/235] chore(model-gallery): :arrow_up: update checksum
 (#3043)

:arrow_up: Checksum updates in gallery/index.yaml

Signed-off-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com>
Co-authored-by: mudler <2420543+mudler@users.noreply.github.com>
---
 gallery/index.yaml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/gallery/index.yaml b/gallery/index.yaml
index 25ac7e64..923107bd 100644
--- a/gallery/index.yaml
+++ b/gallery/index.yaml
@@ -69,8 +69,8 @@
       model: Meta-Llama-3.1-8B-Claude-iMat-Q4_K_M.gguf
   files:
     - filename: Meta-Llama-3.1-8B-Claude-iMat-Q4_K_M.gguf
-      sha256: 8de80021b9438f0925a41ae73f77cb73fcfa30090e03a0919ce23d2b9818e9c7
       uri: huggingface://InferenceIllusionist/Meta-Llama-3.1-8B-Claude-iMat-GGUF/Meta-Llama-3.1-8B-Claude-iMat-Q4_K_M.gguf
+      sha256: 6d175432f66d10dfed9737f73a5073d513d18e1ee7bd4b9cf2a59deb359f36ff
 - !!merge <<: *llama31
   name: "meta-llama-3.1-8b-instruct-abliterated"
   icon: https://i.imgur.com/KhorYYG.png

From e7df875db36605f1ec2f6f3c0517b2890b49bc09 Mon Sep 17 00:00:00 2001
From: Ettore Di Giacinto <mudler@users.noreply.github.com>
Date: Mon, 29 Jul 2024 10:17:49 +0200
Subject: [PATCH 025/235] models(gallery): add magnum-32b-v1 (#3044)

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
---
 gallery/index.yaml | 15 +++++++++++++++
 1 file changed, 15 insertions(+)

diff --git a/gallery/index.yaml b/gallery/index.yaml
index 923107bd..2b7cef4e 100644
--- a/gallery/index.yaml
+++ b/gallery/index.yaml
@@ -515,6 +515,21 @@
     - filename: StellarDong-72b.i1-Q4_K_M.gguf
       sha256: 4c5012f0a034f40a044904891343ade2594f29c28a8a9d8052916de4dc5a61df
       uri: huggingface://mradermacher/StellarDong-72b-i1-GGUF/StellarDong-72b.i1-Q4_K_M.gguf
+- !!merge <<: *qwen2
+  name: "magnum-32b-v1-i1"
+  icon: https://cdn-uploads.huggingface.co/production/uploads/635567189c72a7e742f1419c/PK7xRSd18Du0bX-w_t-9c.png
+  urls:
+    - https://huggingface.co/anthracite-org/magnum-32b-v1
+    - https://huggingface.co/mradermacher/magnum-32b-v1-i1-GGUF
+  description: |
+    This is the second in a series of models designed to replicate the prose quality of the Claude 3 models, specifically Sonnet and Opus. This model is fine-tuned on top of Qwen1.5 32B.
+  overrides:
+    parameters:
+      model: magnum-32b-v1.i1-Q4_K_M.gguf
+  files:
+    - filename: magnum-32b-v1.i1-Q4_K_M.gguf
+      sha256: a31704ce0d7e5b774f155522b9ab7ef6015a4ece4e9056bf4dfc6cac561ff0a3
+      uri: huggingface://mradermacher/magnum-32b-v1-i1-GGUF/magnum-32b-v1.i1-Q4_K_M.gguf
 - &mistral03
   ## START Mistral
   url: "github:mudler/LocalAI/gallery/mistral-0.3.yaml@master"

From 8a39707b367063663bbb58675f6bc1a0e0d1234c Mon Sep 17 00:00:00 2001
From: Ettore Di Giacinto <mudler@users.noreply.github.com>
Date: Mon, 29 Jul 2024 16:44:48 +0200
Subject: [PATCH 026/235] models(gallery): add lumimaid-v0.2-70b-i1 (#3045)

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
---
 gallery/index.yaml | 23 +++++++++++++++++++++++
 1 file changed, 23 insertions(+)

diff --git a/gallery/index.yaml b/gallery/index.yaml
index 2b7cef4e..31848f2a 100644
--- a/gallery/index.yaml
+++ b/gallery/index.yaml
@@ -237,6 +237,29 @@
     - filename: Lumimaid-v0.2-8B.Q4_K_M.gguf
       sha256: c8024fcb49c71410903d0d076a1048249fa48b31637bac5177bf5c3f3d603d85
       uri: huggingface://mradermacher/Lumimaid-v0.2-8B-GGUF/Lumimaid-v0.2-8B.Q4_K_M.gguf
+- !!merge <<: *llama31
+  name: "lumimaid-v0.2-70b-i1"
+  icon: https://cdn-uploads.huggingface.co/production/uploads/63ab1241ad514ca8d1430003/HY1KTq6FMAm-CwmY8-ndO.png
+  urls:
+    - https://huggingface.co/NeverSleep/Lumimaid-v0.2-70B
+    - https://huggingface.co/mradermacher/Lumimaid-v0.2-70B-i1-GGUF
+  description: |
+    This model is based on: Meta-Llama-3.1-8B-Instruct
+
+    Wandb: https://wandb.ai/undis95/Lumi-Llama-3-1-8B?nw=nwuserundis95
+
+    Lumimaid 0.1 -> 0.2 is a HUGE step up dataset wise.
+
+    As some people have told us our models are sloppy, Ikari decided to say fuck it and literally nuke all chats out with most slop.
+
+    Our dataset stayed the same since day one, we added data over time, cleaned them, and repeat. After not releasing model for a while because we were never satisfied, we think it's time to come back!
+  overrides:
+    parameters:
+      model: Lumimaid-v0.2-70B.i1-Q4_K_M.gguf
+  files:
+    - filename: Lumimaid-v0.2-70B.i1-Q4_K_M.gguf
+      sha256: 4857da8685cb0f3d2b8b8c91fb0c07b35b863eb7c185e93ed83ac338e095cbb5
+      uri: huggingface://mradermacher/Lumimaid-v0.2-70B-i1-GGUF/Lumimaid-v0.2-70B.i1-Q4_K_M.gguf
 - &deepseek
   ## Deepseek
   url: "github:mudler/LocalAI/gallery/deepseek.yaml@master"

From 6f8d6f601abfd203c405952f8d25fac192163615 Mon Sep 17 00:00:00 2001
From: Ettore Di Giacinto <mudler@users.noreply.github.com>
Date: Mon, 29 Jul 2024 16:45:00 +0200
Subject: [PATCH 027/235] models(gallery): add sekhmet_aleph-l3.1-8b-v0.1-i1
 (#3046)

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
---
 gallery/index.yaml | 13 +++++++++++++
 1 file changed, 13 insertions(+)

diff --git a/gallery/index.yaml b/gallery/index.yaml
index 31848f2a..b6b8aba4 100644
--- a/gallery/index.yaml
+++ b/gallery/index.yaml
@@ -145,6 +145,19 @@
     - filename: llama3.1-8b-fireplace2-q4_k_m.gguf
       sha256: 54527fd2474b576086ea31e759214ab240abe2429ae623a02d7ba825cc8cb13e
       uri: huggingface://mudler/Llama3.1-8B-Fireplace2-Q4_K_M-GGUF/llama3.1-8b-fireplace2-q4_k_m.gguf
+- !!merge <<: *llama31
+  name: "sekhmet_aleph-l3.1-8b-v0.1-i1"
+  icon: https://cdn-uploads.huggingface.co/production/uploads/642265bc01c62c1e4102dc36/SVyiW4mu495ngqszJGWRl.png
+  urls:
+    - https://huggingface.co/Nitral-Archive/Sekhmet_Aleph-L3.1-8B-v0.1
+    - https://huggingface.co/mradermacher/Sekhmet_Aleph-L3.1-8B-v0.1-i1-GGUF
+  overrides:
+    parameters:
+      model: Sekhmet_Aleph-L3.1-8B-v0.1.i1-Q4_K_M.gguf
+  files:
+    - filename: Sekhmet_Aleph-L3.1-8B-v0.1.i1-Q4_K_M.gguf
+      sha256: 5b6f4eaa2091bf13a2b563a54a3f87b22efa7f2862362537c956c70da6e11cea
+      uri: huggingface://mradermacher/Sekhmet_Aleph-L3.1-8B-v0.1-i1-GGUF/Sekhmet_Aleph-L3.1-8B-v0.1.i1-Q4_K_M.gguf
 ## Uncensored models
 - !!merge <<: *llama31
   name: "darkidol-llama-3.1-8b-instruct-1.0-uncensored-i1"

From 4700c9df929ba41f6a3c1c171d561064e155b50b Mon Sep 17 00:00:00 2001
From: Ettore Di Giacinto <mudler@users.noreply.github.com>
Date: Mon, 29 Jul 2024 20:15:53 +0200
Subject: [PATCH 028/235] models(gallery): add l3.1-8b-llamoutcast-i1 (#3047)

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
---
 gallery/index.yaml | 20 ++++++++++++++++++++
 1 file changed, 20 insertions(+)

diff --git a/gallery/index.yaml b/gallery/index.yaml
index b6b8aba4..fb61defe 100644
--- a/gallery/index.yaml
+++ b/gallery/index.yaml
@@ -158,6 +158,26 @@
     - filename: Sekhmet_Aleph-L3.1-8B-v0.1.i1-Q4_K_M.gguf
       sha256: 5b6f4eaa2091bf13a2b563a54a3f87b22efa7f2862362537c956c70da6e11cea
       uri: huggingface://mradermacher/Sekhmet_Aleph-L3.1-8B-v0.1-i1-GGUF/Sekhmet_Aleph-L3.1-8B-v0.1.i1-Q4_K_M.gguf
+- !!merge <<: *llama31
+  name: "l3.1-8b-llamoutcast-i1"
+  icon: https://files.catbox.moe/ecgn0m.jpg
+  urls:
+    - https://huggingface.co/Envoid/L3.1-8B-Llamoutcast
+    - https://huggingface.co/mradermacher/L3.1-8B-Llamoutcast-i1-GGUF
+  description: |
+    Warning: this model is utterly cursed.
+    Llamoutcast
+
+    This model was originally intended to be a DADA finetune of Llama-3.1-8B-Instruct but the results were unsatisfactory. So it received some additional finetuning on a rawtext dataset and now it is utterly cursed.
+
+    It responds to Llama-3 Instruct formatting.
+  overrides:
+    parameters:
+      model: L3.1-8B-Llamoutcast.i1-Q4_K_M.gguf
+  files:
+    - filename: L3.1-8B-Llamoutcast.i1-Q4_K_M.gguf
+      sha256: 438ca0a7e9470f5ee40f3b14dc2da41b1cafc4ad4315dead3eb57924109d5cf6
+      uri: huggingface://mradermacher/L3.1-8B-Llamoutcast-i1-GGUF/L3.1-8B-Llamoutcast.i1-Q4_K_M.gguf
 ## Uncensored models
 - !!merge <<: *llama31
   name: "darkidol-llama-3.1-8b-instruct-1.0-uncensored-i1"

From e5f91fbba2a08c286a4746bbd981433edaabeb47 Mon Sep 17 00:00:00 2001
From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com>
Date: Mon, 29 Jul 2024 21:28:38 +0000
Subject: [PATCH 029/235] chore(deps): Bump langchain from 0.2.10 to 0.2.11 in
 /examples/langchain/langchainpy-localai-example (#3053)

chore(deps): Bump langchain

Bumps [langchain](https://github.com/langchain-ai/langchain) from 0.2.10 to 0.2.11.
- [Release notes](https://github.com/langchain-ai/langchain/releases)
- [Commits](https://github.com/langchain-ai/langchain/compare/langchain==0.2.10...langchain==0.2.11)

---
updated-dependencies:
- dependency-name: langchain
  dependency-type: direct:production
  update-type: version-update:semver-patch
...

Signed-off-by: dependabot[bot] <support@github.com>
Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>
---
 examples/langchain/langchainpy-localai-example/requirements.txt | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/examples/langchain/langchainpy-localai-example/requirements.txt b/examples/langchain/langchainpy-localai-example/requirements.txt
index 0e03d543..66a1b70f 100644
--- a/examples/langchain/langchainpy-localai-example/requirements.txt
+++ b/examples/langchain/langchainpy-localai-example/requirements.txt
@@ -10,7 +10,7 @@ debugpy==1.8.2
 frozenlist==1.4.1
 greenlet==3.0.3
 idna==3.7
-langchain==0.2.10
+langchain==0.2.11
 langchain-community==0.2.9
 marshmallow==3.21.3
 marshmallow-enum==1.5.1

From 3dfed64a1569a6905468d2145493ebc7d37d7ddd Mon Sep 17 00:00:00 2001
From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com>
Date: Mon, 29 Jul 2024 21:29:08 +0000
Subject: [PATCH 030/235] chore(deps): Bump openai from 1.37.0 to 1.37.1 in
 /examples/langchain/langchainpy-localai-example (#3051)

chore(deps): Bump openai

Bumps [openai](https://github.com/openai/openai-python) from 1.37.0 to 1.37.1.
- [Release notes](https://github.com/openai/openai-python/releases)
- [Changelog](https://github.com/openai/openai-python/blob/main/CHANGELOG.md)
- [Commits](https://github.com/openai/openai-python/compare/v1.37.0...v1.37.1)

---
updated-dependencies:
- dependency-name: openai
  dependency-type: direct:production
  update-type: version-update:semver-patch
...

Signed-off-by: dependabot[bot] <support@github.com>
Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>
---
 examples/langchain/langchainpy-localai-example/requirements.txt | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/examples/langchain/langchainpy-localai-example/requirements.txt b/examples/langchain/langchainpy-localai-example/requirements.txt
index 66a1b70f..f29cb78a 100644
--- a/examples/langchain/langchainpy-localai-example/requirements.txt
+++ b/examples/langchain/langchainpy-localai-example/requirements.txt
@@ -18,7 +18,7 @@ multidict==6.0.5
 mypy-extensions==1.0.0
 numexpr==2.10.1
 numpy==2.0.1
-openai==1.37.0
+openai==1.37.1
 openapi-schema-pydantic==1.2.4
 packaging>=23.2
 pydantic==2.8.2

From 40604e877c9e9ec4a4c99a4f92cc7b8bd3fb4b49 Mon Sep 17 00:00:00 2001
From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com>
Date: Mon, 29 Jul 2024 21:45:52 +0000
Subject: [PATCH 031/235] chore(deps): Bump setuptools from 70.3.0 to 72.1.0 in
 /backend/python/autogptq (#3048)

chore(deps): Bump setuptools in /backend/python/autogptq

Bumps [setuptools](https://github.com/pypa/setuptools) from 70.3.0 to 72.1.0.
- [Release notes](https://github.com/pypa/setuptools/releases)
- [Changelog](https://github.com/pypa/setuptools/blob/main/NEWS.rst)
- [Commits](https://github.com/pypa/setuptools/compare/v70.3.0...v72.1.0)

---
updated-dependencies:
- dependency-name: setuptools
  dependency-type: direct:production
  update-type: version-update:semver-major
...

Signed-off-by: dependabot[bot] <support@github.com>
Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>
---
 backend/python/autogptq/requirements-intel.txt | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/backend/python/autogptq/requirements-intel.txt b/backend/python/autogptq/requirements-intel.txt
index 635b4c31..755e19d8 100644
--- a/backend/python/autogptq/requirements-intel.txt
+++ b/backend/python/autogptq/requirements-intel.txt
@@ -2,4 +2,4 @@
 intel-extension-for-pytorch
 torch
 optimum[openvino]
-setuptools==70.3.0 # https://github.com/mudler/LocalAI/issues/2406
\ No newline at end of file
+setuptools==72.1.0 # https://github.com/mudler/LocalAI/issues/2406
\ No newline at end of file

From 5c747a16c4dddfff5687eeaf3464a2bdd232eea1 Mon Sep 17 00:00:00 2001
From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com>
Date: Tue, 30 Jul 2024 00:43:12 +0000
Subject: [PATCH 032/235] chore(deps): Bump setuptools from 70.3.0 to 72.1.0 in
 /backend/python/vllm (#3061)

chore(deps): Bump setuptools in /backend/python/vllm

Bumps [setuptools](https://github.com/pypa/setuptools) from 70.3.0 to 72.1.0.
- [Release notes](https://github.com/pypa/setuptools/releases)
- [Changelog](https://github.com/pypa/setuptools/blob/main/NEWS.rst)
- [Commits](https://github.com/pypa/setuptools/compare/v70.3.0...v72.1.0)

---
updated-dependencies:
- dependency-name: setuptools
  dependency-type: direct:production
  update-type: version-update:semver-major
...

Signed-off-by: dependabot[bot] <support@github.com>
Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>
---
 backend/python/vllm/requirements-intel.txt | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/backend/python/vllm/requirements-intel.txt b/backend/python/vllm/requirements-intel.txt
index 635b4c31..755e19d8 100644
--- a/backend/python/vllm/requirements-intel.txt
+++ b/backend/python/vllm/requirements-intel.txt
@@ -2,4 +2,4 @@
 intel-extension-for-pytorch
 torch
 optimum[openvino]
-setuptools==70.3.0 # https://github.com/mudler/LocalAI/issues/2406
\ No newline at end of file
+setuptools==72.1.0 # https://github.com/mudler/LocalAI/issues/2406
\ No newline at end of file

From 0da042dc2b6d5f855e7958859c1bbd979afef6d3 Mon Sep 17 00:00:00 2001
From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com>
Date: Tue, 30 Jul 2024 01:11:05 +0000
Subject: [PATCH 033/235] chore(deps): Bump chromadb from 0.5.4 to 0.5.5 in
 /examples/langchain-chroma (#3060)

chore(deps): Bump chromadb in /examples/langchain-chroma

Bumps [chromadb](https://github.com/chroma-core/chroma) from 0.5.4 to 0.5.5.
- [Release notes](https://github.com/chroma-core/chroma/releases)
- [Changelog](https://github.com/chroma-core/chroma/blob/main/RELEASE_PROCESS.md)
- [Commits](https://github.com/chroma-core/chroma/compare/0.5.4...0.5.5)

---
updated-dependencies:
- dependency-name: chromadb
  dependency-type: direct:production
  update-type: version-update:semver-patch
...

Signed-off-by: dependabot[bot] <support@github.com>
Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>
---
 examples/langchain-chroma/requirements.txt | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/examples/langchain-chroma/requirements.txt b/examples/langchain-chroma/requirements.txt
index 89ca2db7..50d6dc4f 100644
--- a/examples/langchain-chroma/requirements.txt
+++ b/examples/langchain-chroma/requirements.txt
@@ -1,4 +1,4 @@
 langchain==0.2.10
 openai==1.37.0
-chromadb==0.5.4
+chromadb==0.5.5
 llama-index==0.10.56
\ No newline at end of file

From 9948ff27157cc1403b5a26de448e6ece68132d91 Mon Sep 17 00:00:00 2001
From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com>
Date: Tue, 30 Jul 2024 01:21:56 +0000
Subject: [PATCH 034/235] chore(deps): Bump setuptools from 70.3.0 to 72.1.0 in
 /backend/python/parler-tts (#3062)

chore(deps): Bump setuptools in /backend/python/parler-tts

Bumps [setuptools](https://github.com/pypa/setuptools) from 70.3.0 to 72.1.0.
- [Release notes](https://github.com/pypa/setuptools/releases)
- [Changelog](https://github.com/pypa/setuptools/blob/main/NEWS.rst)
- [Commits](https://github.com/pypa/setuptools/compare/v70.3.0...v72.1.0)

---
updated-dependencies:
- dependency-name: setuptools
  dependency-type: direct:production
  update-type: version-update:semver-major
...

Signed-off-by: dependabot[bot] <support@github.com>
Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>
---
 backend/python/parler-tts/requirements-intel.txt | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/backend/python/parler-tts/requirements-intel.txt b/backend/python/parler-tts/requirements-intel.txt
index 5c4aa6a5..58a2a1dd 100644
--- a/backend/python/parler-tts/requirements-intel.txt
+++ b/backend/python/parler-tts/requirements-intel.txt
@@ -3,4 +3,4 @@ intel-extension-for-pytorch
 torch
 torchaudio
 optimum[openvino]
-setuptools==70.3.0 # https://github.com/mudler/LocalAI/issues/2406
\ No newline at end of file
+setuptools==72.1.0 # https://github.com/mudler/LocalAI/issues/2406
\ No newline at end of file

From 0dd02b2ad77d5c80e6090b4dc0e42fac14352d9a Mon Sep 17 00:00:00 2001
From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com>
Date: Tue, 30 Jul 2024 02:15:53 +0000
Subject: [PATCH 035/235] chore(deps): Bump setuptools from 70.3.0 to 72.1.0 in
 /backend/python/rerankers (#3067)

chore(deps): Bump setuptools in /backend/python/rerankers

Bumps [setuptools](https://github.com/pypa/setuptools) from 70.3.0 to 72.1.0.
- [Release notes](https://github.com/pypa/setuptools/releases)
- [Changelog](https://github.com/pypa/setuptools/blob/main/NEWS.rst)
- [Commits](https://github.com/pypa/setuptools/compare/v70.3.0...v72.1.0)

---
updated-dependencies:
- dependency-name: setuptools
  dependency-type: direct:production
  update-type: version-update:semver-major
...

Signed-off-by: dependabot[bot] <support@github.com>
Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>
---
 backend/python/rerankers/requirements-intel.txt | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/backend/python/rerankers/requirements-intel.txt b/backend/python/rerankers/requirements-intel.txt
index 635b4c31..755e19d8 100644
--- a/backend/python/rerankers/requirements-intel.txt
+++ b/backend/python/rerankers/requirements-intel.txt
@@ -2,4 +2,4 @@
 intel-extension-for-pytorch
 torch
 optimum[openvino]
-setuptools==70.3.0 # https://github.com/mudler/LocalAI/issues/2406
\ No newline at end of file
+setuptools==72.1.0 # https://github.com/mudler/LocalAI/issues/2406
\ No newline at end of file

From f822bebfd8b55d4c12d0200805fe610f8823bb5d Mon Sep 17 00:00:00 2001
From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com>
Date: Tue, 30 Jul 2024 02:29:39 +0000
Subject: [PATCH 036/235] chore(deps): Bump setuptools from 69.5.1 to 72.1.0 in
 /backend/python/transformers-musicgen (#3066)

chore(deps): Bump setuptools in /backend/python/transformers-musicgen

Bumps [setuptools](https://github.com/pypa/setuptools) from 69.5.1 to 72.1.0.
- [Release notes](https://github.com/pypa/setuptools/releases)
- [Changelog](https://github.com/pypa/setuptools/blob/main/NEWS.rst)
- [Commits](https://github.com/pypa/setuptools/compare/v69.5.1...v72.1.0)

---
updated-dependencies:
- dependency-name: setuptools
  dependency-type: direct:production
  update-type: version-update:semver-major
...

Signed-off-by: dependabot[bot] <support@github.com>
Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>
---
 backend/python/transformers-musicgen/requirements-intel.txt | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/backend/python/transformers-musicgen/requirements-intel.txt b/backend/python/transformers-musicgen/requirements-intel.txt
index 95d4848c..755e19d8 100644
--- a/backend/python/transformers-musicgen/requirements-intel.txt
+++ b/backend/python/transformers-musicgen/requirements-intel.txt
@@ -2,4 +2,4 @@
 intel-extension-for-pytorch
 torch
 optimum[openvino]
-setuptools==69.5.1 # https://github.com/mudler/LocalAI/issues/2406
\ No newline at end of file
+setuptools==72.1.0 # https://github.com/mudler/LocalAI/issues/2406
\ No newline at end of file

From 45233937b74d0891f34cc82206a227c5ded1db2c Mon Sep 17 00:00:00 2001
From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com>
Date: Tue, 30 Jul 2024 03:06:11 +0000
Subject: [PATCH 037/235] chore(deps): Bump setuptools from 70.3.0 to 72.1.0 in
 /backend/python/coqui (#3068)

chore(deps): Bump setuptools in /backend/python/coqui

Bumps [setuptools](https://github.com/pypa/setuptools) from 70.3.0 to 72.1.0.
- [Release notes](https://github.com/pypa/setuptools/releases)
- [Changelog](https://github.com/pypa/setuptools/blob/main/NEWS.rst)
- [Commits](https://github.com/pypa/setuptools/compare/v70.3.0...v72.1.0)

---
updated-dependencies:
- dependency-name: setuptools
  dependency-type: direct:production
  update-type: version-update:semver-major
...

Signed-off-by: dependabot[bot] <support@github.com>
Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>
---
 backend/python/coqui/requirements-intel.txt | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/backend/python/coqui/requirements-intel.txt b/backend/python/coqui/requirements-intel.txt
index 5c4aa6a5..58a2a1dd 100644
--- a/backend/python/coqui/requirements-intel.txt
+++ b/backend/python/coqui/requirements-intel.txt
@@ -3,4 +3,4 @@ intel-extension-for-pytorch
 torch
 torchaudio
 optimum[openvino]
-setuptools==70.3.0 # https://github.com/mudler/LocalAI/issues/2406
\ No newline at end of file
+setuptools==72.1.0 # https://github.com/mudler/LocalAI/issues/2406
\ No newline at end of file

From 9c96a73d9355aaa636f7b5c21f7eef16587ec24f Mon Sep 17 00:00:00 2001
From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com>
Date: Tue, 30 Jul 2024 03:27:00 +0000
Subject: [PATCH 038/235] chore(deps): Bump setuptools from 70.3.0 to 72.1.0 in
 /backend/python/vall-e-x (#3069)

chore(deps): Bump setuptools in /backend/python/vall-e-x

Bumps [setuptools](https://github.com/pypa/setuptools) from 70.3.0 to 72.1.0.
- [Release notes](https://github.com/pypa/setuptools/releases)
- [Changelog](https://github.com/pypa/setuptools/blob/main/NEWS.rst)
- [Commits](https://github.com/pypa/setuptools/compare/v70.3.0...v72.1.0)

---
updated-dependencies:
- dependency-name: setuptools
  dependency-type: direct:production
  update-type: version-update:semver-major
...

Signed-off-by: dependabot[bot] <support@github.com>
Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>
---
 backend/python/vall-e-x/requirements-intel.txt | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/backend/python/vall-e-x/requirements-intel.txt b/backend/python/vall-e-x/requirements-intel.txt
index 5c4aa6a5..58a2a1dd 100644
--- a/backend/python/vall-e-x/requirements-intel.txt
+++ b/backend/python/vall-e-x/requirements-intel.txt
@@ -3,4 +3,4 @@ intel-extension-for-pytorch
 torch
 torchaudio
 optimum[openvino]
-setuptools==70.3.0 # https://github.com/mudler/LocalAI/issues/2406
\ No newline at end of file
+setuptools==72.1.0 # https://github.com/mudler/LocalAI/issues/2406
\ No newline at end of file

From f24fac43da5d0926c1eed88806d0bce270cd2771 Mon Sep 17 00:00:00 2001
From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com>
Date: Tue, 30 Jul 2024 03:58:11 +0000
Subject: [PATCH 039/235] chore(deps): Bump setuptools from 70.3.0 to 72.1.0 in
 /backend/python/petals (#3070)

chore(deps): Bump setuptools in /backend/python/petals

Bumps [setuptools](https://github.com/pypa/setuptools) from 70.3.0 to 72.1.0.
- [Release notes](https://github.com/pypa/setuptools/releases)
- [Changelog](https://github.com/pypa/setuptools/blob/main/NEWS.rst)
- [Commits](https://github.com/pypa/setuptools/compare/v70.3.0...v72.1.0)

---
updated-dependencies:
- dependency-name: setuptools
  dependency-type: direct:production
  update-type: version-update:semver-major
...

Signed-off-by: dependabot[bot] <support@github.com>
Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>
---
 backend/python/petals/requirements-intel.txt | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/backend/python/petals/requirements-intel.txt b/backend/python/petals/requirements-intel.txt
index 635b4c31..755e19d8 100644
--- a/backend/python/petals/requirements-intel.txt
+++ b/backend/python/petals/requirements-intel.txt
@@ -2,4 +2,4 @@
 intel-extension-for-pytorch
 torch
 optimum[openvino]
-setuptools==70.3.0 # https://github.com/mudler/LocalAI/issues/2406
\ No newline at end of file
+setuptools==72.1.0 # https://github.com/mudler/LocalAI/issues/2406
\ No newline at end of file

From 3feb8690250b6d0c958df41c0532eff1918f0c1f Mon Sep 17 00:00:00 2001
From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com>
Date: Tue, 30 Jul 2024 04:02:15 +0000
Subject: [PATCH 040/235] chore(deps): Bump setuptools from 69.5.1 to 72.1.0 in
 /backend/python/transformers (#3071)

chore(deps): Bump setuptools in /backend/python/transformers

Bumps [setuptools](https://github.com/pypa/setuptools) from 69.5.1 to 72.1.0.
- [Release notes](https://github.com/pypa/setuptools/releases)
- [Changelog](https://github.com/pypa/setuptools/blob/main/NEWS.rst)
- [Commits](https://github.com/pypa/setuptools/compare/v69.5.1...v72.1.0)

---
updated-dependencies:
- dependency-name: setuptools
  dependency-type: direct:production
  update-type: version-update:semver-major
...

Signed-off-by: dependabot[bot] <support@github.com>
Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>
---
 backend/python/transformers/requirements.txt | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/backend/python/transformers/requirements.txt b/backend/python/transformers/requirements.txt
index 55925b32..29d4f55e 100644
--- a/backend/python/transformers/requirements.txt
+++ b/backend/python/transformers/requirements.txt
@@ -6,4 +6,4 @@ torch
 certifi
 intel-extension-for-transformers
 bitsandbytes
-setuptools==69.5.1 # https://github.com/mudler/LocalAI/issues/2406
+setuptools==72.1.0 # https://github.com/mudler/LocalAI/issues/2406

From 198bc6d939c3175be1f589a80c5d92d5244dff17 Mon Sep 17 00:00:00 2001
From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com>
Date: Tue, 30 Jul 2024 04:39:56 +0000
Subject: [PATCH 041/235] chore(deps): Bump streamlit from 1.36.0 to 1.37.0 in
 /examples/streamlit-bot (#3072)

chore(deps): Bump streamlit in /examples/streamlit-bot

Bumps [streamlit](https://github.com/streamlit/streamlit) from 1.36.0 to 1.37.0.
- [Release notes](https://github.com/streamlit/streamlit/releases)
- [Commits](https://github.com/streamlit/streamlit/compare/1.36.0...1.37.0)

---
updated-dependencies:
- dependency-name: streamlit
  dependency-type: direct:production
  update-type: version-update:semver-minor
...

Signed-off-by: dependabot[bot] <support@github.com>
Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>
---
 examples/streamlit-bot/requirements.txt | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/examples/streamlit-bot/requirements.txt b/examples/streamlit-bot/requirements.txt
index ed2a5980..63291928 100644
--- a/examples/streamlit-bot/requirements.txt
+++ b/examples/streamlit-bot/requirements.txt
@@ -1,2 +1,2 @@
-streamlit==1.36.0
+streamlit==1.37.0
 requests
\ No newline at end of file

From 12b470f00ae5a9c74ac167fae42260745f645916 Mon Sep 17 00:00:00 2001
From: "LocalAI [bot]" <139863280+localai-bot@users.noreply.github.com>
Date: Tue, 30 Jul 2024 07:28:14 +0200
Subject: [PATCH 042/235] chore: :arrow_up: Update ggerganov/llama.cpp (#3075)

:arrow_up: Update ggerganov/llama.cpp

Signed-off-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com>
Co-authored-by: mudler <2420543+mudler@users.noreply.github.com>
---
 Makefile | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/Makefile b/Makefile
index a3d908cf..f939f715 100644
--- a/Makefile
+++ b/Makefile
@@ -8,7 +8,7 @@ DETECT_LIBS?=true
 # llama.cpp versions
 GOLLAMA_REPO?=https://github.com/go-skynet/go-llama.cpp
 GOLLAMA_VERSION?=2b57a8ae43e4699d3dc5d1496a1ccd42922993be
-CPPLLAMA_VERSION?=4730faca618ff9cee0780580145e3cbe86f24876
+CPPLLAMA_VERSION?=75af08c475e285888f66556d0f459c533b7deb95
 
 # gpt4all version
 GPT4ALL_REPO?=https://github.com/nomic-ai/gpt4all

From d50c72a657be23e574f26ecfb8f9fb7e470ef6e1 Mon Sep 17 00:00:00 2001
From: Ettore Di Giacinto <mudler@users.noreply.github.com>
Date: Tue, 30 Jul 2024 09:20:57 +0200
Subject: [PATCH 043/235] Revert "chore(deps): Bump setuptools from 69.5.1 to
 72.1.0 in /backend/python/transformers-musicgen" (#3077)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Revert "chore(deps): Bump setuptools from 69.5.1 to 72.1.0 in /backend/python…"

This reverts commit f822bebfd8b55d4c12d0200805fe610f8823bb5d.
---
 backend/python/transformers-musicgen/requirements-intel.txt | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/backend/python/transformers-musicgen/requirements-intel.txt b/backend/python/transformers-musicgen/requirements-intel.txt
index 755e19d8..95d4848c 100644
--- a/backend/python/transformers-musicgen/requirements-intel.txt
+++ b/backend/python/transformers-musicgen/requirements-intel.txt
@@ -2,4 +2,4 @@
 intel-extension-for-pytorch
 torch
 optimum[openvino]
-setuptools==72.1.0 # https://github.com/mudler/LocalAI/issues/2406
\ No newline at end of file
+setuptools==69.5.1 # https://github.com/mudler/LocalAI/issues/2406
\ No newline at end of file

From a7dbeb36ca0810009f28b342a7b53566796d0252 Mon Sep 17 00:00:00 2001
From: Ettore Di Giacinto <mudler@users.noreply.github.com>
Date: Tue, 30 Jul 2024 09:21:09 +0200
Subject: [PATCH 044/235] Revert "chore(deps): Bump setuptools from 69.5.1 to
 72.1.0 in /backend/python/transformers" (#3078)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Revert "chore(deps): Bump setuptools from 69.5.1 to 72.1.0 in /backend/python…"

This reverts commit 3feb8690250b6d0c958df41c0532eff1918f0c1f.
---
 backend/python/transformers/requirements.txt | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/backend/python/transformers/requirements.txt b/backend/python/transformers/requirements.txt
index 29d4f55e..55925b32 100644
--- a/backend/python/transformers/requirements.txt
+++ b/backend/python/transformers/requirements.txt
@@ -6,4 +6,4 @@ torch
 certifi
 intel-extension-for-transformers
 bitsandbytes
-setuptools==72.1.0 # https://github.com/mudler/LocalAI/issues/2406
+setuptools==69.5.1 # https://github.com/mudler/LocalAI/issues/2406

From f1e90575f333b17bb5644e2402ab2bc970e0312a Mon Sep 17 00:00:00 2001
From: Ettore Di Giacinto <mudler@users.noreply.github.com>
Date: Tue, 30 Jul 2024 09:21:45 +0200
Subject: [PATCH 045/235] Revert "chore(deps): Bump setuptools from 70.3.0 to
 72.1.0 in /backend/python/vllm" (#3079)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Revert "chore(deps): Bump setuptools from 70.3.0 to 72.1.0 in /backend/python…"

This reverts commit 5c747a16c4dddfff5687eeaf3464a2bdd232eea1.
---
 backend/python/vllm/requirements-intel.txt | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/backend/python/vllm/requirements-intel.txt b/backend/python/vllm/requirements-intel.txt
index 755e19d8..635b4c31 100644
--- a/backend/python/vllm/requirements-intel.txt
+++ b/backend/python/vllm/requirements-intel.txt
@@ -2,4 +2,4 @@
 intel-extension-for-pytorch
 torch
 optimum[openvino]
-setuptools==72.1.0 # https://github.com/mudler/LocalAI/issues/2406
\ No newline at end of file
+setuptools==70.3.0 # https://github.com/mudler/LocalAI/issues/2406
\ No newline at end of file

From abcbbbed2d83b1edc086fa05b8634e4f35e22918 Mon Sep 17 00:00:00 2001
From: Ettore Di Giacinto <mudler@users.noreply.github.com>
Date: Tue, 30 Jul 2024 10:04:47 +0200
Subject: [PATCH 046/235] models(gallery): add l3.1-8b-celeste-v1.5 (#3080)

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
---
 gallery/index.yaml | 16 ++++++++++++++++
 1 file changed, 16 insertions(+)

diff --git a/gallery/index.yaml b/gallery/index.yaml
index fb61defe..1fe7b6ee 100644
--- a/gallery/index.yaml
+++ b/gallery/index.yaml
@@ -293,6 +293,22 @@
     - filename: Lumimaid-v0.2-70B.i1-Q4_K_M.gguf
       sha256: 4857da8685cb0f3d2b8b8c91fb0c07b35b863eb7c185e93ed83ac338e095cbb5
       uri: huggingface://mradermacher/Lumimaid-v0.2-70B-i1-GGUF/Lumimaid-v0.2-70B.i1-Q4_K_M.gguf
+- !!merge <<: *llama31
+  name: "l3.1-8b-celeste-v1.5"
+  icon: https://cdn-uploads.huggingface.co/production/uploads/630cf5d14ca0a22768bbe10c/QcU3xEgVu18jeFtMFxIw-.webp
+  urls:
+    - https://huggingface.co/nothingiisreal/L3.1-8B-Celeste-V1.5
+    - https://huggingface.co/bartowski/L3.1-8B-Celeste-V1.5-GGUF
+  description: |
+    The LLM model is a large language model trained on a combination of datasets including nothingiisreal/c2-logs-cleaned, kalomaze/Opus_Instruct_25k, and nothingiisreal/Reddit-Dirty-And-WritingPrompts. The training was performed on a combination of English-language data using the Hugging Face Transformers library.
+    Trained on LLaMA 3.1 8B Instruct at 8K context using a new mix of Reddit Writing Prompts, Kalo's Opus 25K Instruct and c2 logs cleaned This version has the highest coherency and is very strong on OOC: instruct following.
+  overrides:
+    parameters:
+      model: L3.1-8B-Celeste-V1.5-Q4_K_M.gguf
+  files:
+    - filename: L3.1-8B-Celeste-V1.5-Q4_K_M.gguf
+      sha256: a408dfbbd91ed5561f70d3129af040dfd06704d6c7fa21146aa9f09714aafbc6
+      uri: huggingface://bartowski/L3.1-8B-Celeste-V1.5-GGUF/L3.1-8B-Celeste-V1.5-Q4_K_M.gguf
 - &deepseek
   ## Deepseek
   url: "github:mudler/LocalAI/gallery/deepseek.yaml@master"

From 2d59c99d31a422a55a7e95cc64e96614372fef20 Mon Sep 17 00:00:00 2001
From: Ettore Di Giacinto <mudler@users.noreply.github.com>
Date: Tue, 30 Jul 2024 12:07:52 +0200
Subject: [PATCH 047/235] models(gallery): add llama-guard-3-8b (#3082)

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
---
 gallery/index.yaml | 16 ++++++++++++++++
 1 file changed, 16 insertions(+)

diff --git a/gallery/index.yaml b/gallery/index.yaml
index 1fe7b6ee..d9f9e5b7 100644
--- a/gallery/index.yaml
+++ b/gallery/index.yaml
@@ -178,6 +178,22 @@
     - filename: L3.1-8B-Llamoutcast.i1-Q4_K_M.gguf
       sha256: 438ca0a7e9470f5ee40f3b14dc2da41b1cafc4ad4315dead3eb57924109d5cf6
       uri: huggingface://mradermacher/L3.1-8B-Llamoutcast-i1-GGUF/L3.1-8B-Llamoutcast.i1-Q4_K_M.gguf
+- !!merge <<: *llama31
+  name: "llama-guard-3-8b"
+  urls:
+    - https://huggingface.co/meta-llama/Llama-Guard-3-8B
+    - https://huggingface.co/QuantFactory/Llama-Guard-3-8B-GGUF
+  description: |
+    Llama Guard 3 is a Llama-3.1-8B pretrained model, fine-tuned for content safety classification. Similar to previous versions, it can be used to classify content in both LLM inputs (prompt classification) and in LLM responses (response classification). It acts as an LLM – it generates text in its output that indicates whether a given prompt or response is safe or unsafe, and if unsafe, it also lists the content categories violated.
+
+    Llama Guard 3 was aligned to safeguard against the MLCommons standardized hazards taxonomy and designed to support Llama 3.1 capabilities. Specifically, it provides content moderation in 8 languages, and was optimized to support safety and security for search and code interpreter tool calls.
+  overrides:
+    parameters:
+      model: Llama-Guard-3-8B.Q4_K_M.gguf
+  files:
+    - filename: Llama-Guard-3-8B.Q4_K_M.gguf
+      sha256: c5ea8760a1e544eea66a8915fcc3fbd2c67357ea2ee6871a9e6a6c33b64d4981
+      uri: huggingface://QuantFactory/Llama-Guard-3-8B-GGUF/Llama-Guard-3-8B.Q4_K_M.gguf
 ## Uncensored models
 - !!merge <<: *llama31
   name: "darkidol-llama-3.1-8b-instruct-1.0-uncensored-i1"

From 17634b394b5c2222586265d47a29d1bac929f39b Mon Sep 17 00:00:00 2001
From: Ettore Di Giacinto <mudler@users.noreply.github.com>
Date: Tue, 30 Jul 2024 12:12:55 +0200
Subject: [PATCH 048/235] models(gallery): add
 meta-llama-3-instruct-8.9b-brainstorm-5x-form-11 (#3083)

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
---
 gallery/index.yaml | 13 +++++++++++++
 1 file changed, 13 insertions(+)

diff --git a/gallery/index.yaml b/gallery/index.yaml
index d9f9e5b7..71cfc20b 100644
--- a/gallery/index.yaml
+++ b/gallery/index.yaml
@@ -3705,6 +3705,19 @@
     - filename: calme-2.4-llama3-70b.Q4_K_M.gguf
       sha256: 0b44ac8a88395dfc60f1b9d3cfffc0ffef74ec0a302e610ef91fc787187568f2
       uri: huggingface://mradermacher/calme-2.4-llama3-70b-GGUF/calme-2.4-llama3-70b.Q4_K_M.gguf
+- !!merge <<: *llama3
+  name: "meta-llama-3-instruct-8.9b-brainstorm-5x-form-11"
+  urls:
+    - https://huggingface.co/DavidAU/Meta-Llama-3-Instruct-8.9B-BRAINSTORM-5x-FORM-11-GGUF
+  description: |
+    Meta-Llama-3-8B Instruct (now at 8.9B) is an enhanced version of the LLM model, specifically designed for creative use cases such as story writing, roleplaying, and fiction. This model has been augmented through the "Brainstorm" process, which involves expanding and calibrating the reasoning center of the LLM to improve its performance in various creative tasks. The enhancements brought by this process include more detailed and nuanced descriptions, stronger prose, and a greater sense of immersion in the story. The model is capable of generating long and vivid content, with fewer clichés and more focused, coherent narratives. Users can provide more instructions and details to elicit stronger and more engaging responses from the model. The "Brainstorm" process has been tested on multiple LLM models, including Llama2, Llama3, and Mistral, as well as on individual models like Llama3 Instruct, Mistral Instruct, and custom fine-tuned models.
+  overrides:
+    parameters:
+      model: Meta-Llama-3-8B-Instruct-exp5-11-Q4_K_M.gguf
+  files:
+    - filename: Meta-Llama-3-8B-Instruct-exp5-11-Q4_K_M.gguf
+      sha256: 5dd81b8b809667d10036499affdd1461cf95af50b405cbc9f800b421a4b60e98
+      uri: huggingface://DavidAU/Meta-Llama-3-Instruct-8.9B-BRAINSTORM-5x-FORM-11-GGUF/Meta-Llama-3-8B-Instruct-exp5-11-Q4_K_M.gguf
 - &command-R
   ### START Command-r
   url: "github:mudler/LocalAI/gallery/command-r.yaml@master"

From 274487c5eb2d6ae36c4e2f077ed2f5b4e94a2c48 Mon Sep 17 00:00:00 2001
From: Ettore Di Giacinto <mudler@users.noreply.github.com>
Date: Tue, 30 Jul 2024 15:04:13 +0200
Subject: [PATCH 049/235] fix(llama-cpp): do not compress with UPX (#3084)

Fixes: https://github.com/mudler/LocalAI/issues/3041

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
---
 Makefile | 6 ------
 1 file changed, 6 deletions(-)

diff --git a/Makefile b/Makefile
index f939f715..92b1fbdc 100644
--- a/Makefile
+++ b/Makefile
@@ -783,9 +783,6 @@ else
 	echo "BUILD_GRPC_FOR_BACKEND_LLAMA is not defined."
 	LLAMA_VERSION=$(CPPLLAMA_VERSION) $(MAKE) -C backend/cpp/${VARIANT} grpc-server
 endif
-ifneq ($(UPX),)
-	$(UPX) backend/cpp/${VARIANT}/grpc-server
-endif
 
 # This target is for manually building a variant with-auto detected flags
 backend-assets/grpc/llama-cpp: backend-assets/grpc backend/cpp/llama/llama.cpp
@@ -858,9 +855,6 @@ backend-assets/grpc/llama-cpp-grpc: backend-assets/grpc backend/cpp/llama/llama.
 backend-assets/util/llama-cpp-rpc-server: backend-assets/grpc/llama-cpp-grpc
 	mkdir -p backend-assets/util/
 	cp -rf backend/cpp/llama-grpc/llama.cpp/build/bin/rpc-server backend-assets/util/llama-cpp-rpc-server
-ifneq ($(UPX),)
-	$(UPX) backend-assets/util/llama-cpp-rpc-server
-endif
 
 backend-assets/grpc/llama-ggml: sources/go-llama.cpp sources/go-llama.cpp/libbinding.a backend-assets/grpc
 	CGO_LDFLAGS="$(CGO_LDFLAGS)" C_INCLUDE_PATH=$(CURDIR)/sources/go-llama.cpp LIBRARY_PATH=$(CURDIR)/sources/go-llama.cpp \

From 57ea7f81bb1d0749696e1423f65a96bae7b5ef86 Mon Sep 17 00:00:00 2001
From: Ettore Di Giacinto <mudler@users.noreply.github.com>
Date: Tue, 30 Jul 2024 17:06:22 +0200
Subject: [PATCH 050/235] fix(ci): update openvoice checkpoints URLs (#3085)

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
---
 backend/python/openvoice/test.sh | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/backend/python/openvoice/test.sh b/backend/python/openvoice/test.sh
index 218c0dcd..6c0a840f 100755
--- a/backend/python/openvoice/test.sh
+++ b/backend/python/openvoice/test.sh
@@ -5,7 +5,7 @@ source $(dirname $0)/../common/libbackend.sh
 
 # Download checkpoints if not present
 if [ ! -d "checkpoints_v2" ]; then
-    wget https://myshell-public-repo-hosting.s3.amazonaws.com/openvoice/checkpoints_v2_0417.zip -O checkpoints_v2.zip
+    wget https://myshell-public-repo-host.s3.amazonaws.com/openvoice/checkpoints_v2_0417.zip -O checkpoints_v2.zip
     unzip checkpoints_v2.zip
 fi
 

From 9b21f0d6ad91269de23239fc090f8f5a88367dd7 Mon Sep 17 00:00:00 2001
From: "LocalAI [bot]" <139863280+localai-bot@users.noreply.github.com>
Date: Tue, 30 Jul 2024 23:55:24 +0200
Subject: [PATCH 051/235] chore: :arrow_up: Update ggerganov/llama.cpp (#3086)

:arrow_up: Update ggerganov/llama.cpp

Signed-off-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com>
Co-authored-by: mudler <2420543+mudler@users.noreply.github.com>
---
 Makefile | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/Makefile b/Makefile
index 92b1fbdc..607389f1 100644
--- a/Makefile
+++ b/Makefile
@@ -8,7 +8,7 @@ DETECT_LIBS?=true
 # llama.cpp versions
 GOLLAMA_REPO?=https://github.com/go-skynet/go-llama.cpp
 GOLLAMA_VERSION?=2b57a8ae43e4699d3dc5d1496a1ccd42922993be
-CPPLLAMA_VERSION?=75af08c475e285888f66556d0f459c533b7deb95
+CPPLLAMA_VERSION?=7e72aa74fd676a093eb9970e761085ec22734c71
 
 # gpt4all version
 GPT4ALL_REPO?=https://github.com/nomic-ai/gpt4all

From 98ffc00926afc440951082e37d87f8377c6996f3 Mon Sep 17 00:00:00 2001
From: Ettore Di Giacinto <mudler@users.noreply.github.com>
Date: Wed, 31 Jul 2024 09:17:10 +0200
Subject: [PATCH 052/235] models(gallery): add sunfall-simpo (#3088)

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
---
 gallery/index.yaml | 26 ++++++++++++++++++++++++++
 1 file changed, 26 insertions(+)

diff --git a/gallery/index.yaml b/gallery/index.yaml
index 71cfc20b..71673ec9 100644
--- a/gallery/index.yaml
+++ b/gallery/index.yaml
@@ -1155,6 +1155,32 @@
     - filename: Gemmoy-9B-G2-MK.3.i1-Q4_K_M.gguf
       sha256: 0d1004a246fbda7f1408a6841129b73c4100e697bd0a6806fc698eabbb0802a1
       uri: huggingface://mradermacher/Gemmoy-9B-G2-MK.3-i1-GGUF/Gemmoy-9B-G2-MK.3.i1-Q4_K_M.gguf
+- !!merge <<: *gemma
+  name: "sunfall-simpo-9b"
+  urls:
+    - https://huggingface.co/mradermacher/sunfall-SimPO-9B-GGUF
+  description: |
+    Crazy idea that what if you put the LoRA from crestf411/sunfall-peft on top of princeton-nlp/gemma-2-9b-it-SimPO and therefore this exists solely for that purpose alone in the universe.
+  overrides:
+    parameters:
+      model: sunfall-SimPO-9B.Q4_K_M.gguf
+  files:
+    - filename: sunfall-SimPO-9B.Q4_K_M.gguf
+      sha256: 810c51c6ce34107706d921531b97cfa409cd53c215d18b88bce7cdb617f73ceb
+      uri: huggingface://mradermacher/sunfall-SimPO-9B-GGUF/sunfall-SimPO-9B.Q4_K_M.gguf
+- !!merge <<: *gemma
+  name: "sunfall-simpo-9b-i1"
+  urls:
+    - https://huggingface.co/mradermacher/sunfall-SimPO-9B-i1-GGUF
+  description: |
+    Crazy idea that what if you put the LoRA from crestf411/sunfall-peft on top of princeton-nlp/gemma-2-9b-it-SimPO and therefore this exists solely for that purpose alone in the universe.
+  overrides:
+    parameters:
+      model: sunfall-SimPO-9B.i1-Q4_K_M.gguf
+  files:
+    - filename: sunfall-SimPO-9B.i1-Q4_K_M.gguf
+      sha256: edde9df372a9a5b2316dc6822dc2f52f5a2059103dd7f08072e5a5355c5f5d0b
+      uri: huggingface://mradermacher/sunfall-SimPO-9B-i1-GGUF/sunfall-SimPO-9B.i1-Q4_K_M.gguf
 - &llama3
   url: "github:mudler/LocalAI/gallery/llama3-instruct.yaml@master"
   icon: https://cdn-uploads.huggingface.co/production/uploads/642cc1c253e76b4c2286c58e/aJJxKus1wP5N-euvHEUq7.png

From 2775edb3f0941e4f2886a7f48bc3df422b7b17b6 Mon Sep 17 00:00:00 2001
From: Ettore Di Giacinto <mudler@users.noreply.github.com>
Date: Wed, 31 Jul 2024 09:21:24 +0200
Subject: [PATCH 053/235] models(gallery): add genius-llama3.1-i1 (#3089)

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
---
 gallery/index.yaml | 15 +++++++++++++++
 1 file changed, 15 insertions(+)

diff --git a/gallery/index.yaml b/gallery/index.yaml
index 71673ec9..a8df3d97 100644
--- a/gallery/index.yaml
+++ b/gallery/index.yaml
@@ -194,6 +194,21 @@
     - filename: Llama-Guard-3-8B.Q4_K_M.gguf
       sha256: c5ea8760a1e544eea66a8915fcc3fbd2c67357ea2ee6871a9e6a6c33b64d4981
       uri: huggingface://QuantFactory/Llama-Guard-3-8B-GGUF/Llama-Guard-3-8B.Q4_K_M.gguf
+- !!merge <<: *llama31
+  name: "genius-llama3.1-i1"
+  icon: https://github.com/fangyuan-ksgk/GeniusUpload/assets/66006349/7272c93e-9806-461c-a3d0-2e50ef2b7af0
+  urls:
+    - https://huggingface.co/Ksgk-fy/Genius-Llama3.1
+    - https://huggingface.co/mradermacher/Genius-Llama3.1-i1-GGUF
+  description: |
+    Finetuned Llama-3.1 base on Lex Fridman's podcast transcript.
+  overrides:
+    parameters:
+      model: Genius-Llama3.1.i1-Q4_K_M.gguf
+  files:
+    - filename: Genius-Llama3.1.i1-Q4_K_M.gguf
+      sha256: a272bb2a6ab7ed565738733fb8af8e345b177eba9e76ce615ea845c25ebf8cd5
+      uri: huggingface://mradermacher/Genius-Llama3.1-i1-GGUF/Genius-Llama3.1.i1-Q4_K_M.gguf
 ## Uncensored models
 - !!merge <<: *llama31
   name: "darkidol-llama-3.1-8b-instruct-1.0-uncensored-i1"

From 92faf5fd1dbbc59cbb481355d08fb220738ed6f1 Mon Sep 17 00:00:00 2001
From: Ettore Di Giacinto <mudler@users.noreply.github.com>
Date: Wed, 31 Jul 2024 09:25:48 +0200
Subject: [PATCH 054/235] models(gallery): add seeker-9b (#3090)

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
---
 gallery/index.yaml | 15 +++++++++++++++
 1 file changed, 15 insertions(+)

diff --git a/gallery/index.yaml b/gallery/index.yaml
index a8df3d97..e88a16f6 100644
--- a/gallery/index.yaml
+++ b/gallery/index.yaml
@@ -1196,6 +1196,21 @@
     - filename: sunfall-SimPO-9B.i1-Q4_K_M.gguf
       sha256: edde9df372a9a5b2316dc6822dc2f52f5a2059103dd7f08072e5a5355c5f5d0b
       uri: huggingface://mradermacher/sunfall-SimPO-9B-i1-GGUF/sunfall-SimPO-9B.i1-Q4_K_M.gguf
+- !!merge <<: *gemma
+  name: "seeker-9b"
+  icon: https://huggingface.co/lodrick-the-lafted/seeker-9b/resolve/main/seeker.webp
+  urls:
+    - https://huggingface.co/lodrick-the-lafted/seeker-9b
+    - https://huggingface.co/mradermacher/seeker-9b-GGUF
+  description: |
+    The LLM model is the "Seeker-9b" model, which is a large language model trained on a diverse range of text data. It has 9 billion parameters and is based on the "lodrick-the-lafted" repository. The model is capable of generating text and can be used for a variety of natural language processing tasks such as language translation, text summarization, and text generation. It supports the English language and is available under the Apache-2.0 license.
+  overrides:
+    parameters:
+      model: seeker-9b.Q4_K_M.gguf
+  files:
+    - filename: seeker-9b.Q4_K_M.gguf
+      sha256: 7658e5bdad96dc8d232f83cff7c3fe5fa993defbfd3e728dcc7436352574a00a
+      uri: huggingface://mradermacher/seeker-9b-GGUF/seeker-9b.Q4_K_M.gguf
 - &llama3
   url: "github:mudler/LocalAI/gallery/llama3-instruct.yaml@master"
   icon: https://cdn-uploads.huggingface.co/production/uploads/642cc1c253e76b4c2286c58e/aJJxKus1wP5N-euvHEUq7.png

From 8845524d01e13dd50a6ef7506def3a61b8007e06 Mon Sep 17 00:00:00 2001
From: Ettore Di Giacinto <mudler@users.noreply.github.com>
Date: Wed, 31 Jul 2024 09:36:17 +0200
Subject: [PATCH 055/235] models(gallery): add llama3.1-chinese-chat (#3091)

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
---
 gallery/index.yaml | 29 +++++++++++++++++++++++++++++
 1 file changed, 29 insertions(+)

diff --git a/gallery/index.yaml b/gallery/index.yaml
index e88a16f6..28439637 100644
--- a/gallery/index.yaml
+++ b/gallery/index.yaml
@@ -209,6 +209,35 @@
     - filename: Genius-Llama3.1.i1-Q4_K_M.gguf
       sha256: a272bb2a6ab7ed565738733fb8af8e345b177eba9e76ce615ea845c25ebf8cd5
       uri: huggingface://mradermacher/Genius-Llama3.1-i1-GGUF/Genius-Llama3.1.i1-Q4_K_M.gguf
+- !!merge <<: *llama31
+  name: "llama3.1-8b-chinese-chat"
+  urls:
+    - https://huggingface.co/shenzhi-wang/Llama3.1-8B-Chinese-Chat
+    - https://huggingface.co/QuantFactory/Llama3.1-8B-Chinese-Chat-GGUF
+  description: |
+    llama3.1-8B-Chinese-Chat is an instruction-tuned language model for Chinese & English users with various abilities such as roleplaying & tool-using built upon the Meta-Llama-3.1-8B-Instruct model. Developers: [Shenzhi Wang](https://shenzhi-wang.netlify.app)*, [Yaowei Zheng](https://github.com/hiyouga)*, Guoyin Wang (in.ai), Shiji Song, Gao Huang. (*: Equal Contribution) - License: [Llama-3.1 License](https://huggingface.co/meta-llama/Meta-Llla...
+    m-3.1-8B/blob/main/LICENSE) - Base Model: Meta-Llama-3.1-8B-Instruct - Model Size: 8.03B - Context length: 128K(reported by [Meta-Llama-3.1-8B-Instruct model](https://huggingface.co/meta-llama/Meta-Llama-3.1-8B-Instruct), untested for our Chinese model)
+  overrides:
+    parameters:
+      model: Llama3.1-8B-Chinese-Chat.Q4_K_M.gguf
+  files:
+    - filename: Llama3.1-8B-Chinese-Chat.Q4_K_M.gguf
+      sha256: 824847b6cca82c4d60107c6a059d80ba975a68543e6effd98880435436ddba06
+      uri: huggingface://QuantFactory/Llama3.1-8B-Chinese-Chat-GGUF/Llama3.1-8B-Chinese-Chat.Q4_K_M.gguf
+- !!merge <<: *llama31
+  name: "llama3.1-70b-chinese-chat"
+  urls:
+    - https://huggingface.co/shenzhi-wang/Llama3.1-70B-Chinese-Chat
+    - https://huggingface.co/mradermacher/Llama3.1-70B-Chinese-Chat-GGUF
+  description: |
+    "Llama3.1-70B-Chinese-Chat" is a 70-billion parameter large language model pre-trained on a large corpus of Chinese text data. It is designed for chat and dialog applications, and can generate human-like responses to various prompts and inputs. The model is based on the Llama3.1 architecture and has been fine-tuned for Chinese language understanding and generation. It can be used for a wide range of natural language processing tasks, including language translation, text summarization, question answering, and more.
+  overrides:
+    parameters:
+      model: Llama3.1-70B-Chinese-Chat.Q4_K_M.gguf
+  files:
+    - filename: Llama3.1-70B-Chinese-Chat.Q4_K_M.gguf
+      sha256: 395cff3cce2b092f840b68eb6e31f4c8b670bc8e3854bbb230df8334369e671d
+      uri: huggingface://mradermacher/Llama3.1-70B-Chinese-Chat-GGUF/Llama3.1-70B-Chinese-Chat.Q4_K_M.gguf
 ## Uncensored models
 - !!merge <<: *llama31
   name: "darkidol-llama-3.1-8b-instruct-1.0-uncensored-i1"

From 33bc1e8b190cd5dd135c0f6e6f184e4fb233cc02 Mon Sep 17 00:00:00 2001
From: Ettore Di Giacinto <mudler@users.noreply.github.com>
Date: Wed, 31 Jul 2024 10:38:02 +0200
Subject: [PATCH 056/235] models(gallery): add gemmasutra-pro-27b-v1 (#3092)

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
---
 gallery/index.yaml | 15 +++++++++++++++
 1 file changed, 15 insertions(+)

diff --git a/gallery/index.yaml b/gallery/index.yaml
index 28439637..6e2aae21 100644
--- a/gallery/index.yaml
+++ b/gallery/index.yaml
@@ -1240,6 +1240,21 @@
     - filename: seeker-9b.Q4_K_M.gguf
       sha256: 7658e5bdad96dc8d232f83cff7c3fe5fa993defbfd3e728dcc7436352574a00a
       uri: huggingface://mradermacher/seeker-9b-GGUF/seeker-9b.Q4_K_M.gguf
+- !!merge <<: *gemma
+  name: "gemmasutra-pro-27b-v1"
+  icon: https://cdn-uploads.huggingface.co/production/uploads/65f2fd1c25b848bd061b5c2e/w0Oi8TReoQNT3ljm5Wf6c.webp
+  urls:
+    - https://huggingface.co/TheDrummer/Gemmasutra-Pro-27B-v1
+    - https://huggingface.co/mradermacher/Gemmasutra-Pro-27B-v1-GGUF
+  description: |
+    An RP model with impressive flexibility. Finetuned by yours truly.
+  overrides:
+    parameters:
+      model: Gemmasutra-Pro-27B-v1.Q4_K_M.gguf
+  files:
+    - filename: Gemmasutra-Pro-27B-v1.Q4_K_M.gguf
+      sha256: 336a2fbf142849fcc20e432123433807b6c7b09988652ef583a63636a0f90218
+      uri: huggingface://mradermacher/Gemmasutra-Pro-27B-v1-GGUF/Gemmasutra-Pro-27B-v1.Q4_K_M.gguf
 - &llama3
   url: "github:mudler/LocalAI/gallery/llama3-instruct.yaml@master"
   icon: https://cdn-uploads.huggingface.co/production/uploads/642cc1c253e76b4c2286c58e/aJJxKus1wP5N-euvHEUq7.png

From 476705708879467014440167d39d99cccdd2d4d3 Mon Sep 17 00:00:00 2001
From: Ettore Di Giacinto <mudler@users.noreply.github.com>
Date: Wed, 31 Jul 2024 10:43:45 +0200
Subject: [PATCH 057/235] models(gallery): add leetwizard (#3093)

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
---
 gallery/alpaca.yaml | 17 +++++++++++++++++
 gallery/index.yaml  | 22 ++++++++++++++++++++++
 2 files changed, 39 insertions(+)
 create mode 100644 gallery/alpaca.yaml

diff --git a/gallery/alpaca.yaml b/gallery/alpaca.yaml
new file mode 100644
index 00000000..b647d2f6
--- /dev/null
+++ b/gallery/alpaca.yaml
@@ -0,0 +1,17 @@
+---
+name: "alpaca"
+
+config_file: |
+  context_size: 4096
+  f16: true
+  mmap: true
+  template:
+    chat: |
+        Below is an instruction that describes a task. Write a response that appropriately completes the request.
+
+        ### Instruction:
+        {{.Input}}
+
+        ### Response:
+    completion: |
+        {{.Input}}
diff --git a/gallery/index.yaml b/gallery/index.yaml
index 6e2aae21..66ab4216 100644
--- a/gallery/index.yaml
+++ b/gallery/index.yaml
@@ -4386,6 +4386,28 @@
     - filename: "Codestral-22B-v0.1-Q4_K_M.gguf"
       uri: "huggingface://bartowski/Codestral-22B-v0.1-GGUF/Codestral-22B-v0.1-Q4_K_M.gguf"
       sha256: 003e48ed892850b80994fcddca2bd6b833b092a4ef2db2853c33a3144245e06c
+- !!merge <<: *codellama
+  url: "github:mudler/LocalAI/gallery/alpaca.yaml@master"
+  icon: https://huggingface.co/Nan-Do/LeetCodeWizard_7B_V1.1/resolve/main/LeetCodeWizardLogo.png
+  name: "leetcodewizard_7b_v1.1-i1"
+  urls:
+    - https://huggingface.co/Nan-Do/LeetCodeWizard_7B_V1.1
+    - https://huggingface.co/mradermacher/LeetCodeWizard_7B_V1.1-i1-GGUF
+  description: |
+    LeetCodeWizard is a coding large language model specifically trained to solve and explain Leetcode (or any) programming problems.
+    This model is a fine-tuned version of the WizardCoder-Python-7B with a dataset of Leetcode problems\
+    Model capabilities:
+
+        It should be able to solve most of the problems found at Leetcode and even pass the sample interviews they offer on the site.
+
+        It can write both the code and the explanations for the solutions.
+  overrides:
+    parameters:
+      model: LeetCodeWizard_7B_V1.1.i1-Q4_K_M.gguf
+  files:
+    - filename: LeetCodeWizard_7B_V1.1.i1-Q4_K_M.gguf
+      sha256: 19720d8e1ba89d32c6f88ed6518caf0251f9e3ec011297929c801efc5ea979f4
+      uri: huggingface://mradermacher/LeetCodeWizard_7B_V1.1-i1-GGUF/LeetCodeWizard_7B_V1.1.i1-Q4_K_M.gguf
 - &llm-compiler
   url: "github:mudler/LocalAI/gallery/codellama.yaml@master"
   name: "llm-compiler-13b-imat"

From 115b523732bf52780b6c46ae16930d25a7d8a812 Mon Sep 17 00:00:00 2001
From: Ettore Di Giacinto <mudler@users.noreply.github.com>
Date: Wed, 31 Jul 2024 16:09:58 +0200
Subject: [PATCH 058/235] models(gallery): add tarnished-9b-i1 (#3096)

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
---
 gallery/index.yaml | 24 ++++++++++++++++++++++++
 1 file changed, 24 insertions(+)

diff --git a/gallery/index.yaml b/gallery/index.yaml
index 66ab4216..def06b9f 100644
--- a/gallery/index.yaml
+++ b/gallery/index.yaml
@@ -1255,6 +1255,30 @@
     - filename: Gemmasutra-Pro-27B-v1.Q4_K_M.gguf
       sha256: 336a2fbf142849fcc20e432123433807b6c7b09988652ef583a63636a0f90218
       uri: huggingface://mradermacher/Gemmasutra-Pro-27B-v1-GGUF/Gemmasutra-Pro-27B-v1.Q4_K_M.gguf
+- !!merge <<: *gemma
+  name: "tarnished-9b-i1"
+  icon: https://huggingface.co/lodrick-the-lafted/tarnished-9b/resolve/main/nox.jpg
+  urls:
+    - https://huggingface.co/lodrick-the-lafted/tarnished-9b
+    - https://huggingface.co/mradermacher/tarnished-9b-i1-GGUF
+  description: |
+    Ah, so you've heard whispers on the winds, have you?  🧐
+
+    Imagine this:
+    Tarnished-9b, a name that echoes with the rasp of coin-hungry merchants and the clatter of forgotten machinery. This LLM speaks with the voice of those who straddle the line between worlds, who've tasted the bittersweet nectar of eldritch power and the tang of the Interdimensional Trade Council.
+
+    It's a tongue that dances with secrets, a whisperer of lore lost and found.  Its words may guide you through the twisting paths of history, revealing truths hidden beneath layers of dust and time.
+
+    But be warned, Tarnished One!  For knowledge comes at a price.  The LLM's gaze can pierce the veil of reality, but it can also lure you into the labyrinthine depths of madness.
+
+    Dare you tread this path?
+  overrides:
+    parameters:
+      model: tarnished-9b.i1-Q4_K_M.gguf
+  files:
+    - filename: tarnished-9b.i1-Q4_K_M.gguf
+      sha256: 62ab09124b3f6698bd94ef966533ae5d427d87f6bdc09f6f46917def96420a0c
+      uri: huggingface://mradermacher/tarnished-9b-i1-GGUF/tarnished-9b.i1-Q4_K_M.gguf
 - &llama3
   url: "github:mudler/LocalAI/gallery/llama3-instruct.yaml@master"
   icon: https://cdn-uploads.huggingface.co/production/uploads/642cc1c253e76b4c2286c58e/aJJxKus1wP5N-euvHEUq7.png

From 4c7e8f4d54756706bb99f8a63519c820ffa6377e Mon Sep 17 00:00:00 2001
From: Ettore Di Giacinto <mudler@users.noreply.github.com>
Date: Wed, 31 Jul 2024 17:06:06 +0200
Subject: [PATCH 059/235] models(gallery): add
 meta-llama-3-instruct-12.2b-brainstorm-20x-form-8 (#3097)

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
---
 gallery/index.yaml | 13 +++++++++++++
 1 file changed, 13 insertions(+)

diff --git a/gallery/index.yaml b/gallery/index.yaml
index def06b9f..a9b3a266 100644
--- a/gallery/index.yaml
+++ b/gallery/index.yaml
@@ -2409,6 +2409,19 @@
     - filename: L3-Stheno-Maid-Blackroot-Grand-HORROR-16B-D_AU-Q4_K_M.gguf
       sha256: ae29f38d73dfb04415821405cf8b319fc42d78d0cdd0da91db147d12e68030fe
       uri: huggingface://DavidAU/L3-Stheno-Maid-Blackroot-Grand-HORROR-16B-GGUF/L3-Stheno-Maid-Blackroot-Grand-HORROR-16B-D_AU-Q4_K_M.gguf
+- !!merge <<: *llama3
+  name: "meta-llama-3-instruct-12.2b-brainstorm-20x-form-8"
+  urls:
+    - https://huggingface.co/DavidAU/Meta-Llama-3-Instruct-12.2B-BRAINSTORM-20x-FORM-8-GGUF
+  description: |
+    Meta-Llama-3-8B Instruct (now at 12.2B) with Brainstorm process that increases its performance at the core level for any creative use case. It has calibrations that allow it to exceed the logic solving abilities of the original model. The Brainstorm process expands the reasoning center of the LLM, reassembles and calibrates it, introducing subtle changes into the reasoning process. This enhances the model's detail, concept, connection to the "world", general concept connections, prose quality, and prose length without affecting instruction following. It improves coherence, description, simile, metaphors, emotional engagement, and takes fewer liberties with instructions while following them more closely. The model's performance is further enhanced by other technologies like "Ultra" (precision), "Neo Imatrix" (custom imatrix datasets), and "X-quants" (custom application of the imatrix process). It has been tested on multiple LLaMA2, LLaMA3, and Mistral models of various parameter sizes.
+  overrides:
+    parameters:
+      model: Meta-Llama-3-8B-Instruct-exp20-8-Q4_K_M.gguf
+  files:
+    - filename: Meta-Llama-3-8B-Instruct-exp20-8-Q4_K_M.gguf
+      sha256: 5568ab6195ab5da703f728cc118108ddcbe97255e3ba4a543b531acdf082b999
+      uri: huggingface://DavidAU/Meta-Llama-3-Instruct-12.2B-BRAINSTORM-20x-FORM-8-GGUF/Meta-Llama-3-8B-Instruct-exp20-8-Q4_K_M.gguf
 - &dolphin
   name: "dolphin-2.9-llama3-8b"
   url: "github:mudler/LocalAI/gallery/hermes-2-pro-mistral.yaml@master"

From 05c75ca617bbe3da181e2fbc560259a5b095eea2 Mon Sep 17 00:00:00 2001
From: Ettore Di Giacinto <mudler@users.noreply.github.com>
Date: Wed, 31 Jul 2024 17:10:31 +0200
Subject: [PATCH 060/235] models(gallery): add loki-base-i1 (#3098)

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
---
 gallery/index.yaml | 62 ++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 62 insertions(+)

diff --git a/gallery/index.yaml b/gallery/index.yaml
index a9b3a266..7828f953 100644
--- a/gallery/index.yaml
+++ b/gallery/index.yaml
@@ -2422,6 +2422,68 @@
     - filename: Meta-Llama-3-8B-Instruct-exp20-8-Q4_K_M.gguf
       sha256: 5568ab6195ab5da703f728cc118108ddcbe97255e3ba4a543b531acdf082b999
       uri: huggingface://DavidAU/Meta-Llama-3-Instruct-12.2B-BRAINSTORM-20x-FORM-8-GGUF/Meta-Llama-3-8B-Instruct-exp20-8-Q4_K_M.gguf
+- !!merge <<: *llama3
+  name: "loki-base-i1"
+  urls:
+    - https://huggingface.co/MrRobotoAI/Loki-base
+    - https://huggingface.co/mradermacher/Loki-base-i1-GGUF
+  description: |
+    Merge of several models using mergekit:
+    - model: abacusai/Llama-3-Smaug-8B
+    - model: Aculi/Llama3-Sophie
+    - model: ajibawa-2023/Uncensored-Frank-Llama-3-8B
+    - model: Blackroot/Llama-3-Gamma-Twist
+    - model: Casual-Autopsy/L3-Super-Nova-RP-8B
+    - model: Casual-Autopsy/L3-Umbral-Mind-RP-v3.0-8B
+    - model: cgato/L3-TheSpice-8b-v0.8.3
+    - model: ChaoticNeutrals/Hathor_Respawn-L3-8B-v0.8
+    - model: ChaoticNeutrals/Hathor_RP-v.01-L3-8B
+    - model: chargoddard/prometheus-2-llama-3-8b
+    - model: chujiezheng/Llama-3-Instruct-8B-SimPO-ExPO
+    - model: chujiezheng/LLaMA3-iterative-DPO-final-ExPO
+    - model: Fizzarolli/L3-8b-Rosier-v1
+    - model: flammenai/Mahou-1.2a-llama3-8B
+    - model: HaitameLaf/Llama-3-8B-StoryGenerator
+    - model: HPAI-BSC/Llama3-Aloe-8B-Alpha
+    - model: iRyanBell/ARC1
+    - model: iRyanBell/ARC1-II
+    - model: lemon07r/Llama-3-RedMagic4-8B
+    - model: lemon07r/Lllama-3-RedElixir-8B
+    - model: Locutusque/Llama-3-Hercules-5.0-8B
+    - model: Magpie-Align/Llama-3-8B-Magpie-Pro-MT-SFT-v0.1
+    - model: maldv/badger-lambda-llama-3-8b
+    - model: maldv/badger-mu-llama-3-8b
+    - model: maldv/badger-writer-llama-3-8b
+    - model: mlabonne/NeuralDaredevil-8B-abliterated
+    - model: MrRobotoAI/Fiction-Writer-6
+    - model: MrRobotoAI/Unholy-Thoth-8B-v2
+    - model: nbeerbower/llama-3-spicy-abliterated-stella-8B
+    - model: NeverSleep/Llama-3-Lumimaid-8B-v0.1
+    - model: NeverSleep/Llama-3-Lumimaid-8B-v0.1-OAS
+    - model: Nitral-AI/Hathor_Sofit-L3-8B-v1
+    - model: Nitral-AI/Hathor_Stable-v0.2-L3-8B
+    - model: Nitral-AI/Hathor_Tahsin-L3-8B-v0.85
+    - model: Nitral-AI/Poppy_Porpoise-0.72-L3-8B
+    - model: nothingiisreal/L3-8B-Instruct-Abliterated-DWP
+    - model: nothingiisreal/L3-8B-Stheno-Horny-v3.3-32K
+    - model: NousResearch/Hermes-2-Theta-Llama-3-8B
+    - model: OwenArli/Awanllm-Llama-3-8B-Cumulus-v1.0
+    - model: refuelai/Llama-3-Refueled
+    - model: ResplendentAI/Nymph_8B
+    - model: shauray/Llama3-8B-DPO-uncensored
+    - model: SicariusSicariiStuff/LLAMA-3_8B_Unaligned_Alpha
+    - model: TIGER-Lab/MAmmoTH2-8B-Plus
+    - model: Undi95/Llama-3-LewdPlay-8B
+    - model: Undi95/Meta-Llama-3-8B-hf
+    - model: VAGOsolutions/Llama-3-SauerkrautLM-8b-Instruct
+    - model: WhiteRabbitNeo/Llama-3-WhiteRabbitNeo-8B-v2.0
+  overrides:
+    parameters:
+      model: Loki-base.i1-Q4_K_M.gguf
+  files:
+    - filename: Loki-base.i1-Q4_K_M.gguf
+      sha256: 60a4357fa399bfd18aa841cc529da09439791331d117a4f06f0467d002b385bb
+      uri: huggingface://mradermacher/Loki-base-i1-GGUF/Loki-base.i1-Q4_K_M.gguf
 - &dolphin
   name: "dolphin-2.9-llama3-8b"
   url: "github:mudler/LocalAI/gallery/hermes-2-pro-mistral.yaml@master"

From c492a9735af338b4c2f852b1acdda8d495ff149f Mon Sep 17 00:00:00 2001
From: Ettore Di Giacinto <mudler@users.noreply.github.com>
Date: Wed, 31 Jul 2024 17:14:46 +0200
Subject: [PATCH 061/235] models(gallery): add tifa (#3099)

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
---
 gallery/index.yaml | 19 +++++++++++++++++++
 1 file changed, 19 insertions(+)

diff --git a/gallery/index.yaml b/gallery/index.yaml
index 7828f953..776fd3aa 100644
--- a/gallery/index.yaml
+++ b/gallery/index.yaml
@@ -662,6 +662,25 @@
     - filename: magnum-32b-v1.i1-Q4_K_M.gguf
       sha256: a31704ce0d7e5b774f155522b9ab7ef6015a4ece4e9056bf4dfc6cac561ff0a3
       uri: huggingface://mradermacher/magnum-32b-v1-i1-GGUF/magnum-32b-v1.i1-Q4_K_M.gguf
+- !!merge <<: *qwen2
+  name: "tifa-7b-qwen2-v0.1"
+  urls:
+    - https://huggingface.co/Tifa-RP/Tifa-7B-Qwen2-v0.1-GGUF
+  description: |
+    The Tifa role-playing language model is a high-performance language model based on a self-developed 220B model distillation, with a new base model of qwen2-7B. The model has been converted to gguf format for running in the Ollama framework, providing excellent dialogue and text generation capabilities.
+
+    The original model was trained on a large-scale industrial dataset and then fine-tuned with 400GB of novel data and 20GB of multi-round dialogue directive data to achieve good role-playing effects.
+
+    The Tifa model is suitable for multi-round dialogue processing, role-playing and scenario simulation, EFX industrial knowledge integration, and high-quality literary creation.
+
+    Note: The Tifa model is in Chinese and English, with 7.6% of the data in Chinese role-playing and 4.2% in English role-playing. The model has been trained with a mix of EFX industrial field parameters and question-answer dialogues generated from 220B model outputs since 2023. The recommended quantization method is f16, as it retains more detail and accuracy in the model's performance.
+  overrides:
+    parameters:
+      model: tifa-7b-qwen2-v0.1.q4_k_m.gguf
+  files:
+    - filename: tifa-7b-qwen2-v0.1.q4_k_m.gguf
+      sha256: 1f5adbe8cb0a6400f51abdca3bf4e32284ebff73cc681a43abb35c0a6ccd3820
+      uri: huggingface://Tifa-RP/Tifa-7B-Qwen2-v0.1-GGUF/tifa-7b-qwen2-v0.1.q4_k_m.gguf
 - &mistral03
   ## START Mistral
   url: "github:mudler/LocalAI/gallery/mistral-0.3.yaml@master"

From af0545834fd565ab56af0b9348550ca9c3cb5349 Mon Sep 17 00:00:00 2001
From: "LocalAI [bot]" <139863280+localai-bot@users.noreply.github.com>
Date: Thu, 1 Aug 2024 02:55:09 +0200
Subject: [PATCH 062/235] chore: :arrow_up: Update ggerganov/llama.cpp (#3102)

:arrow_up: Update ggerganov/llama.cpp

Signed-off-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com>
Co-authored-by: mudler <2420543+mudler@users.noreply.github.com>
---
 Makefile | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/Makefile b/Makefile
index 607389f1..7927d7fa 100644
--- a/Makefile
+++ b/Makefile
@@ -8,7 +8,7 @@ DETECT_LIBS?=true
 # llama.cpp versions
 GOLLAMA_REPO?=https://github.com/go-skynet/go-llama.cpp
 GOLLAMA_VERSION?=2b57a8ae43e4699d3dc5d1496a1ccd42922993be
-CPPLLAMA_VERSION?=7e72aa74fd676a093eb9970e761085ec22734c71
+CPPLLAMA_VERSION?=ed9d2854c9de4ae1f448334294e61167b04bec2a
 
 # gpt4all version
 GPT4ALL_REPO?=https://github.com/nomic-ai/gpt4all

From 26f393bd99b9bafbed6a4627e7cb8bd6d373bca5 Mon Sep 17 00:00:00 2001
From: Ettore Di Giacinto <mudler@users.noreply.github.com>
Date: Thu, 1 Aug 2024 09:35:43 +0200
Subject: [PATCH 063/235] models(gallery): add
 meta-llama-3.1-instruct-9.99b-brainstorm-10x-form-3 (#3103)

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
---
 gallery/index.yaml | 13 +++++++++++++
 1 file changed, 13 insertions(+)

diff --git a/gallery/index.yaml b/gallery/index.yaml
index 776fd3aa..67f96d2e 100644
--- a/gallery/index.yaml
+++ b/gallery/index.yaml
@@ -238,6 +238,19 @@
     - filename: Llama3.1-70B-Chinese-Chat.Q4_K_M.gguf
       sha256: 395cff3cce2b092f840b68eb6e31f4c8b670bc8e3854bbb230df8334369e671d
       uri: huggingface://mradermacher/Llama3.1-70B-Chinese-Chat-GGUF/Llama3.1-70B-Chinese-Chat.Q4_K_M.gguf
+- !!merge <<: *llama31
+  name: "meta-llama-3.1-instruct-9.99b-brainstorm-10x-form-3"
+  urls:
+    - https://huggingface.co/DavidAU/Meta-Llama-3.1-Instruct-9.99B-BRAINSTORM-10x-FORM-3-GGUF
+  description: |
+    The Meta-Llama-3.1-8B Instruct model is a large language model trained on a diverse range of text data, with the goal of generating high-quality and coherent text in response to user input. This model is enhanced through a process called "Brainstorm", which involves expanding and recalibrating the model's reasoning center to improve its creative and generative capabilities. The resulting model is capable of generating detailed, vivid, and nuanced text, with a focus on prose quality, conceptually complex responses, and a deeper understanding of the user's intent. The Brainstorm process is designed to enhance the model's performance in creative writing, roleplaying, and story generation, and to improve its ability to generate coherent and engaging text in a wide range of contexts. The model is based on the Llama3 architecture and has been fine-tuned using the Instruct framework, which provides it with a strong foundation for understanding natural language instructions and generating appropriate responses. The model can be used for a variety of tasks, including creative writing,Generating coherent and detailed text, exploring different perspectives and scenarios, and brainstorming ideas.
+  overrides:
+    parameters:
+      model: Meta-Llama-3.1-8B-Instruct-Instruct-exp10-3-Q4_K_M.gguf
+  files:
+    - filename: Meta-Llama-3.1-8B-Instruct-Instruct-exp10-3-Q4_K_M.gguf
+      sha256: f52ff984100b1ff6acfbd7ed1df770064118274a54ae5d48749400a662113615
+      uri: huggingface://DavidAU/Meta-Llama-3.1-Instruct-9.99B-BRAINSTORM-10x-FORM-3-GGUF/Meta-Llama-3.1-8B-Instruct-Instruct-exp10-3-Q4_K_M.gguf
 ## Uncensored models
 - !!merge <<: *llama31
   name: "darkidol-llama-3.1-8b-instruct-1.0-uncensored-i1"

From d590532d7f79cba07fef718d410f3dd6efee46d6 Mon Sep 17 00:00:00 2001
From: Ettore Di Giacinto <mudler@users.noreply.github.com>
Date: Thu, 1 Aug 2024 09:56:23 +0200
Subject: [PATCH 064/235] models(gallery): add mn-12b-celeste-v1.9 (#3104)

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
---
 gallery/index.yaml | 20 ++++++++++++++++++++
 1 file changed, 20 insertions(+)

diff --git a/gallery/index.yaml b/gallery/index.yaml
index 67f96d2e..5920835e 100644
--- a/gallery/index.yaml
+++ b/gallery/index.yaml
@@ -816,6 +816,26 @@
     - filename: lumimaid-v0.2-12b-q4_k_m.gguf
       sha256: f72299858a07e52be920b86d42ddcfcd5008b961d601ef6fd6a98a3377adccbf
       uri: huggingface://mudler/Lumimaid-v0.2-12B-Q4_K_M-GGUF/lumimaid-v0.2-12b-q4_k_m.gguf
+- !!merge <<: *mistral03
+  url: "github:mudler/LocalAI/gallery/chatml.yaml@master"
+  name: "mn-12b-celeste-v1.9"
+  icon: https://cdn-uploads.huggingface.co/production/uploads/630cf5d14ca0a22768bbe10c/QcU3xEgVu18jeFtMFxIw-.webp
+  urls:
+    - https://huggingface.co/nothingiisreal/MN-12B-Celeste-V1.9
+    - https://huggingface.co/mradermacher/MN-12B-Celeste-V1.9-GGUF
+  description: |
+      Mistral Nemo 12B Celeste V1.9
+
+      This is a story writing and roleplaying model trained on Mistral NeMo 12B Instruct at 8K context using Reddit Writing Prompts, Kalo's Opus 25K Instruct and c2 logs cleaned
+
+      This version has improved NSFW, smarter and more active narration. It's also trained with ChatML tokens so there should be no EOS bleeding whatsoever.
+  overrides:
+    parameters:
+      model: MN-12B-Celeste-V1.9.Q4_K_M.gguf
+  files:
+    - filename: MN-12B-Celeste-V1.9.Q4_K_M.gguf
+      sha256: 019daeaa63d82d55d1ea623b9c255deea6793af4044bb4994d2b4d09e8959f7b
+      uri: huggingface://mradermacher/MN-12B-Celeste-V1.9-GGUF/MN-12B-Celeste-V1.9.Q4_K_M.gguf
 - &mudler
   ### START mudler's LocalAI specific-models
   url: "github:mudler/LocalAI/gallery/mudler.yaml@master"

From e4b91e9dbb982f0d11e3fd989aadf23d7777c2f4 Mon Sep 17 00:00:00 2001
From: Ettore Di Giacinto <mudler@users.noreply.github.com>
Date: Thu, 1 Aug 2024 09:58:28 +0200
Subject: [PATCH 065/235] models(gallery): add shieldgemma (#3105)

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
---
 gallery/index.yaml | 14 ++++++++++++++
 1 file changed, 14 insertions(+)

diff --git a/gallery/index.yaml b/gallery/index.yaml
index 5920835e..a797aeda 100644
--- a/gallery/index.yaml
+++ b/gallery/index.yaml
@@ -1331,6 +1331,20 @@
     - filename: tarnished-9b.i1-Q4_K_M.gguf
       sha256: 62ab09124b3f6698bd94ef966533ae5d427d87f6bdc09f6f46917def96420a0c
       uri: huggingface://mradermacher/tarnished-9b-i1-GGUF/tarnished-9b.i1-Q4_K_M.gguf
+- !!merge <<: *gemma
+  name: "shieldgemma-9b-i1"
+  urls:
+    - https://huggingface.co/google/shieldgemma-9b
+    - https://huggingface.co/mradermacher/shieldgemma-9b-i1-GGUF
+  description: |
+    ShieldGemma is a series of safety content moderation models built upon Gemma 2 that target four harm categories (sexually explicit, dangerous content, hate, and harassment). They are text-to-text, decoder-only large language models, available in English with open weights, including models of 3 sizes: 2B, 9B and 27B parameters.
+  overrides:
+    parameters:
+      model: shieldgemma-9b.i1-Q4_K_M.gguf
+  files:
+    - filename: shieldgemma-9b.i1-Q4_K_M.gguf
+      sha256: ffa7eaadcc0c7d0544fda5b0d86bba3ffa3431b673e5b2135f421cfe65bd8732
+      uri: huggingface://mradermacher/shieldgemma-9b-i1-GGUF/shieldgemma-9b.i1-Q4_K_M.gguf
 - &llama3
   url: "github:mudler/LocalAI/gallery/llama3-instruct.yaml@master"
   icon: https://cdn-uploads.huggingface.co/production/uploads/642cc1c253e76b4c2286c58e/aJJxKus1wP5N-euvHEUq7.png

From d792cf115b2e11cacffaa19707a0e3f42e5e85f8 Mon Sep 17 00:00:00 2001
From: Ettore Di Giacinto <mudler@users.noreply.github.com>
Date: Thu, 1 Aug 2024 17:27:40 +0200
Subject: [PATCH 066/235] fix(ui): do not show duplicate entries if not
 installed by gallery (#3107)

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
---
 core/http/endpoints/localai/welcome.go | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/core/http/endpoints/localai/welcome.go b/core/http/endpoints/localai/welcome.go
index 5d217173..396c4084 100644
--- a/core/http/endpoints/localai/welcome.go
+++ b/core/http/endpoints/localai/welcome.go
@@ -17,7 +17,10 @@ func WelcomeEndpoint(appConfig *config.ApplicationConfig,
 		backendConfigs := cl.GetAllBackendConfigs()
 
 		galleryConfigs := map[string]*gallery.Config{}
+		modelsWithBackendConfig := map[string]interface{}{}
+
 		for _, m := range backendConfigs {
+			modelsWithBackendConfig[m.Name] = nil
 
 			cfg, err := gallery.GetLocalModelConfiguration(ml.ModelPath, m.Name)
 			if err != nil {
@@ -32,7 +35,7 @@ func WelcomeEndpoint(appConfig *config.ApplicationConfig,
 		modelsWithoutConfig := []string{}
 
 		for _, m := range models {
-			if _, ok := galleryConfigs[m]; !ok {
+			if _, ok := modelsWithBackendConfig[m]; !ok {
 				modelsWithoutConfig = append(modelsWithoutConfig, m)
 			}
 		}

From 5afd2de87e88da7af3cb9ffeba94a618063a66f0 Mon Sep 17 00:00:00 2001
From: Ettore Di Giacinto <mudler@users.noreply.github.com>
Date: Thu, 1 Aug 2024 18:44:39 +0200
Subject: [PATCH 067/235] Update README.md

Signed-off-by: Ettore Di Giacinto <mudler@users.noreply.github.com>
---
 README.md | 1 +
 1 file changed, 1 insertion(+)

diff --git a/README.md b/README.md
index 765fe5df..9119946d 100644
--- a/README.md
+++ b/README.md
@@ -150,6 +150,7 @@ Other:
 
 ## :book: 🎥 [Media, Blogs, Social](https://localai.io/basics/news/#media-blogs-social)
 
+- [Run Visual studio code with LocalAI (SUSE)](https://www.suse.com/c/running-ai-locally/)
 - 🆕 [Run LocalAI on Jetson Nano Devkit](https://mudler.pm/posts/local-ai-jetson-nano-devkit/)
 - [Run LocalAI on AWS EKS with Pulumi](https://www.pulumi.com/blog/low-code-llm-apps-with-local-ai-flowise-and-pulumi/)
 - [Run LocalAI on AWS](https://staleks.hashnode.dev/installing-localai-on-aws-ec2-instance)

From 01d83129a23f2173e71d9f1d6e7094a7bad71489 Mon Sep 17 00:00:00 2001
From: "LocalAI [bot]" <139863280+localai-bot@users.noreply.github.com>
Date: Fri, 2 Aug 2024 00:09:50 +0200
Subject: [PATCH 068/235] docs: :arrow_up: update docs version mudler/LocalAI
 (#3109)

:arrow_up: Update docs version mudler/LocalAI

Signed-off-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com>
Co-authored-by: mudler <2420543+mudler@users.noreply.github.com>
---
 docs/data/version.json | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/docs/data/version.json b/docs/data/version.json
index 94160f08..d07ef798 100644
--- a/docs/data/version.json
+++ b/docs/data/version.json
@@ -1,3 +1,3 @@
 {
-  "version": "v2.19.3"
+  "version": "v2.19.4"
 }

From 4c8957de63fefb9ac20cfe77aae9c5f1c23adc70 Mon Sep 17 00:00:00 2001
From: "LocalAI [bot]" <139863280+localai-bot@users.noreply.github.com>
Date: Fri, 2 Aug 2024 00:42:44 +0200
Subject: [PATCH 069/235] chore: :arrow_up: Update ggerganov/llama.cpp (#3110)

:arrow_up: Update ggerganov/llama.cpp

Signed-off-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com>
Co-authored-by: mudler <2420543+mudler@users.noreply.github.com>
---
 Makefile | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/Makefile b/Makefile
index 7927d7fa..be104116 100644
--- a/Makefile
+++ b/Makefile
@@ -8,7 +8,7 @@ DETECT_LIBS?=true
 # llama.cpp versions
 GOLLAMA_REPO?=https://github.com/go-skynet/go-llama.cpp
 GOLLAMA_VERSION?=2b57a8ae43e4699d3dc5d1496a1ccd42922993be
-CPPLLAMA_VERSION?=ed9d2854c9de4ae1f448334294e61167b04bec2a
+CPPLLAMA_VERSION?=b7a08fd5e0e7c898c68d1743066ea495202d9608
 
 # gpt4all version
 GPT4ALL_REPO?=https://github.com/nomic-ai/gpt4all

From 2b55dd2c4f6eed20224764162b787a3f76cf4b49 Mon Sep 17 00:00:00 2001
From: Ettore Di Giacinto <mudler@users.noreply.github.com>
Date: Fri, 2 Aug 2024 10:51:09 +0200
Subject: [PATCH 070/235] models(gallery): add llama-3.1-techne-rp-8b-v1
 (#3112)

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
---
 gallery/index.yaml | 26 ++++++++++++++++++++++++++
 1 file changed, 26 insertions(+)

diff --git a/gallery/index.yaml b/gallery/index.yaml
index a797aeda..4f5caebd 100644
--- a/gallery/index.yaml
+++ b/gallery/index.yaml
@@ -251,6 +251,32 @@
     - filename: Meta-Llama-3.1-8B-Instruct-Instruct-exp10-3-Q4_K_M.gguf
       sha256: f52ff984100b1ff6acfbd7ed1df770064118274a54ae5d48749400a662113615
       uri: huggingface://DavidAU/Meta-Llama-3.1-Instruct-9.99B-BRAINSTORM-10x-FORM-3-GGUF/Meta-Llama-3.1-8B-Instruct-Instruct-exp10-3-Q4_K_M.gguf
+- !!merge <<: *llama31
+  name: "llama-3.1-techne-rp-8b-v1"
+  icon: https://cdn-uploads.huggingface.co/production/uploads/633a809fa4a8f33508dce32c/BMdwgJ6cHZWbiGL48Q-Wq.png
+  urls:
+    - https://huggingface.co/athirdpath/Llama-3.1-Techne-RP-8b-v1
+    - https://huggingface.co/mradermacher/Llama-3.1-Techne-RP-8b-v1-GGUF
+  description: |
+      athirdpath/Llama-3.1-Instruct_NSFW-pretrained_e1-plus_reddit was further trained in the order below:
+      SFT
+
+          Doctor-Shotgun/no-robots-sharegpt
+          grimulkan/LimaRP-augmented
+          Inv/c2-logs-cleaned-deslopped
+
+      DPO
+
+          jondurbin/truthy-dpo-v0.1
+          Undi95/Weyaxi-humanish-dpo-project-noemoji
+          athirdpath/DPO_Pairs-Roleplay-Llama3-NSFW
+  overrides:
+    parameters:
+      model: Llama-3.1-Techne-RP-8b-v1.Q4_K_M.gguf
+  files:
+    - filename: Llama-3.1-Techne-RP-8b-v1.Q4_K_M.gguf
+      sha256: 6557c5d5091f2507d19ab1f8bfb9ceb4e1536a755ab70f148b18aeb33741580f
+      uri: huggingface://mradermacher/Llama-3.1-Techne-RP-8b-v1-GGUF/Llama-3.1-Techne-RP-8b-v1.Q4_K_M.gguf
 ## Uncensored models
 - !!merge <<: *llama31
   name: "darkidol-llama-3.1-8b-instruct-1.0-uncensored-i1"

From fc50a90f6a556ca56c5f61bf2d734a65692a25df Mon Sep 17 00:00:00 2001
From: Ettore Di Giacinto <mudler@users.noreply.github.com>
Date: Fri, 2 Aug 2024 12:45:22 +0200
Subject: [PATCH 071/235] Update README.md

Signed-off-by: Ettore Di Giacinto <mudler@users.noreply.github.com>
---
 README.md | 1 +
 1 file changed, 1 insertion(+)

diff --git a/README.md b/README.md
index 9119946d..ce3289f9 100644
--- a/README.md
+++ b/README.md
@@ -84,6 +84,7 @@ docker run -ti --name local-ai -p 8080:8080 localai/localai:latest-aio-cpu
 
 Hot topics (looking for contributors):
 
+- 🔥🔥 Distributed, P2P Global community pools: https://github.com/mudler/LocalAI/issues/3113
 - WebUI improvements: https://github.com/mudler/LocalAI/issues/2156
 - Backends v2: https://github.com/mudler/LocalAI/issues/1126
 - Improving UX v2: https://github.com/mudler/LocalAI/issues/1373

From a36b721ca63436d72d18db7c39df47b506fcaba5 Mon Sep 17 00:00:00 2001
From: Ettore Di Giacinto <mudler@users.noreply.github.com>
Date: Fri, 2 Aug 2024 20:06:25 +0200
Subject: [PATCH 072/235] fix: be consistent in downloading files, check for
 scanner errors (#3108)

* fix(downloader): be consistent in downloading files

This PR puts some order in the downloader such as functions are re-used
across several places.

This fixes an issue with having uri's inside the model YAML file, it
would resolve to MD5 rather then using the filename

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>

* fix(scanner): do raise error only if unsafeFiles are found

Fixes: https://github.com/mudler/LocalAI/issues/3114

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>

---------

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
---
 core/cli/models.go                   |   4 +-
 core/cli/util.go                     |   4 +-
 core/config/backend_config.go        |  27 +++--
 core/config/backend_config_loader.go |  10 +-
 core/dependencies_manager/manager.go |   3 +-
 core/gallery/gallery.go              |  10 +-
 core/gallery/models.go               |  11 +-
 core/http/app_test.go                |   3 +-
 embedded/embedded.go                 |   4 +-
 pkg/downloader/huggingface.go        |  49 +++++++++
 pkg/downloader/uri.go                | 157 ++++++++++-----------------
 pkg/downloader/uri_test.go           |  10 +-
 pkg/startup/model_preload.go         |  52 +++------
 13 files changed, 173 insertions(+), 171 deletions(-)
 create mode 100644 pkg/downloader/huggingface.go

diff --git a/core/cli/models.go b/core/cli/models.go
index 03047018..56d13fc7 100644
--- a/core/cli/models.go
+++ b/core/cli/models.go
@@ -83,7 +83,9 @@ func (mi *ModelsInstall) Run(ctx *cliContext.Context) error {
 			return err
 		}
 
-		if !downloader.LooksLikeOCI(modelName) {
+		modelURI := downloader.URI(modelName)
+
+		if !modelURI.LooksLikeOCI() {
 			model := gallery.FindModel(models, modelName, mi.ModelsPath)
 			if model == nil {
 				log.Error().Str("model", modelName).Msg("model not found")
diff --git a/core/cli/util.go b/core/cli/util.go
index a7204092..b3e545d8 100644
--- a/core/cli/util.go
+++ b/core/cli/util.go
@@ -86,8 +86,8 @@ func (hfscmd *HFScanCMD) Run(ctx *cliContext.Context) error {
 		var errs error = nil
 		for _, uri := range hfscmd.ToScan {
 			log.Info().Str("uri", uri).Msg("scanning specific uri")
-			scanResults, err := downloader.HuggingFaceScan(uri)
-			if err != nil && !errors.Is(err, downloader.ErrNonHuggingFaceFile) {
+			scanResults, err := downloader.HuggingFaceScan(downloader.URI(uri))
+			if err != nil && errors.Is(err, downloader.ErrUnsafeFilesFound) {
 				log.Error().Err(err).Strs("clamAV", scanResults.ClamAVInfectedFiles).Strs("pickles", scanResults.DangerousPickles).Msg("! WARNING ! A known-vulnerable model is included in this repo!")
 				errs = errors.Join(errs, err)
 			}
diff --git a/core/config/backend_config.go b/core/config/backend_config.go
index 383686cd..b83e1a98 100644
--- a/core/config/backend_config.go
+++ b/core/config/backend_config.go
@@ -8,7 +8,6 @@ import (
 	"github.com/mudler/LocalAI/core/schema"
 	"github.com/mudler/LocalAI/pkg/downloader"
 	"github.com/mudler/LocalAI/pkg/functions"
-	"github.com/mudler/LocalAI/pkg/utils"
 )
 
 const (
@@ -72,9 +71,9 @@ type BackendConfig struct {
 }
 
 type File struct {
-	Filename string `yaml:"filename" json:"filename"`
-	SHA256   string `yaml:"sha256" json:"sha256"`
-	URI      string `yaml:"uri" json:"uri"`
+	Filename string         `yaml:"filename" json:"filename"`
+	SHA256   string         `yaml:"sha256" json:"sha256"`
+	URI      downloader.URI `yaml:"uri" json:"uri"`
 }
 
 type VallE struct {
@@ -213,28 +212,32 @@ func (c *BackendConfig) ShouldCallSpecificFunction() bool {
 // MMProjFileName returns the filename of the MMProj file
 // If the MMProj is a URL, it will return the MD5 of the URL which is the filename
 func (c *BackendConfig) MMProjFileName() string {
-	modelURL := downloader.ConvertURL(c.MMProj)
-	if downloader.LooksLikeURL(modelURL) {
-		return utils.MD5(modelURL)
+	uri := downloader.URI(c.MMProj)
+	if uri.LooksLikeURL() {
+		f, _ := uri.FilenameFromUrl()
+		return f
 	}
 
 	return c.MMProj
 }
 
 func (c *BackendConfig) IsMMProjURL() bool {
-	return downloader.LooksLikeURL(downloader.ConvertURL(c.MMProj))
+	uri := downloader.URI(c.MMProj)
+	return uri.LooksLikeURL()
 }
 
 func (c *BackendConfig) IsModelURL() bool {
-	return downloader.LooksLikeURL(downloader.ConvertURL(c.Model))
+	uri := downloader.URI(c.Model)
+	return uri.LooksLikeURL()
 }
 
 // ModelFileName returns the filename of the model
 // If the model is a URL, it will return the MD5 of the URL which is the filename
 func (c *BackendConfig) ModelFileName() string {
-	modelURL := downloader.ConvertURL(c.Model)
-	if downloader.LooksLikeURL(modelURL) {
-		return utils.MD5(modelURL)
+	uri := downloader.URI(c.Model)
+	if uri.LooksLikeURL() {
+		f, _ := uri.FilenameFromUrl()
+		return f
 	}
 
 	return c.Model
diff --git a/core/config/backend_config_loader.go b/core/config/backend_config_loader.go
index 283dac52..45fe259e 100644
--- a/core/config/backend_config_loader.go
+++ b/core/config/backend_config_loader.go
@@ -244,7 +244,7 @@ func (bcl *BackendConfigLoader) Preload(modelPath string) error {
 			// Create file path
 			filePath := filepath.Join(modelPath, file.Filename)
 
-			if err := downloader.DownloadFile(file.URI, filePath, file.SHA256, i, len(config.DownloadFiles), status); err != nil {
+			if err := file.URI.DownloadFile(filePath, file.SHA256, i, len(config.DownloadFiles), status); err != nil {
 				return err
 			}
 		}
@@ -252,10 +252,10 @@ func (bcl *BackendConfigLoader) Preload(modelPath string) error {
 		// If the model is an URL, expand it, and download the file
 		if config.IsModelURL() {
 			modelFileName := config.ModelFileName()
-			modelURL := downloader.ConvertURL(config.Model)
+			uri := downloader.URI(config.Model)
 			// check if file exists
 			if _, err := os.Stat(filepath.Join(modelPath, modelFileName)); errors.Is(err, os.ErrNotExist) {
-				err := downloader.DownloadFile(modelURL, filepath.Join(modelPath, modelFileName), "", 0, 0, status)
+				err := uri.DownloadFile(filepath.Join(modelPath, modelFileName), "", 0, 0, status)
 				if err != nil {
 					return err
 				}
@@ -269,10 +269,10 @@ func (bcl *BackendConfigLoader) Preload(modelPath string) error {
 
 		if config.IsMMProjURL() {
 			modelFileName := config.MMProjFileName()
-			modelURL := downloader.ConvertURL(config.MMProj)
+			uri := downloader.URI(config.MMProj)
 			// check if file exists
 			if _, err := os.Stat(filepath.Join(modelPath, modelFileName)); errors.Is(err, os.ErrNotExist) {
-				err := downloader.DownloadFile(modelURL, filepath.Join(modelPath, modelFileName), "", 0, 0, status)
+				err := uri.DownloadFile(filepath.Join(modelPath, modelFileName), "", 0, 0, status)
 				if err != nil {
 					return err
 				}
diff --git a/core/dependencies_manager/manager.go b/core/dependencies_manager/manager.go
index b86139e0..8434f721 100644
--- a/core/dependencies_manager/manager.go
+++ b/core/dependencies_manager/manager.go
@@ -37,7 +37,8 @@ func main() {
 
 	// download the assets
 	for _, asset := range assets {
-		if err := downloader.DownloadFile(asset.URL, filepath.Join(destPath, asset.FileName), asset.SHA, 1, 1, utils.DisplayDownloadFunction); err != nil {
+		uri := downloader.URI(asset.URL)
+		if err := uri.DownloadFile(filepath.Join(destPath, asset.FileName), asset.SHA, 1, 1, utils.DisplayDownloadFunction); err != nil {
 			panic(err)
 		}
 	}
diff --git a/core/gallery/gallery.go b/core/gallery/gallery.go
index 9288c44f..6ced6244 100644
--- a/core/gallery/gallery.go
+++ b/core/gallery/gallery.go
@@ -131,7 +131,8 @@ func AvailableGalleryModels(galleries []config.Gallery, basePath string) ([]*Gal
 
 func findGalleryURLFromReferenceURL(url string, basePath string) (string, error) {
 	var refFile string
-	err := downloader.DownloadAndUnmarshal(url, basePath, func(url string, d []byte) error {
+	uri := downloader.URI(url)
+	err := uri.DownloadAndUnmarshal(basePath, func(url string, d []byte) error {
 		refFile = string(d)
 		if len(refFile) == 0 {
 			return fmt.Errorf("invalid reference file at url %s: %s", url, d)
@@ -153,8 +154,9 @@ func getGalleryModels(gallery config.Gallery, basePath string) ([]*GalleryModel,
 			return models, err
 		}
 	}
+	uri := downloader.URI(gallery.URL)
 
-	err := downloader.DownloadAndUnmarshal(gallery.URL, basePath, func(url string, d []byte) error {
+	err := uri.DownloadAndUnmarshal(basePath, func(url string, d []byte) error {
 		return yaml.Unmarshal(d, &models)
 	})
 	if err != nil {
@@ -252,8 +254,8 @@ func SafetyScanGalleryModels(galleries []config.Gallery, basePath string) error
 
 func SafetyScanGalleryModel(galleryModel *GalleryModel) error {
 	for _, file := range galleryModel.AdditionalFiles {
-		scanResults, err := downloader.HuggingFaceScan(file.URI)
-		if err != nil && !errors.Is(err, downloader.ErrNonHuggingFaceFile) {
+		scanResults, err := downloader.HuggingFaceScan(downloader.URI(file.URI))
+		if err != nil && errors.Is(err, downloader.ErrUnsafeFilesFound) {
 			log.Error().Str("model", galleryModel.Name).Strs("clamAV", scanResults.ClamAVInfectedFiles).Strs("pickles", scanResults.DangerousPickles).Msg("Contains unsafe file(s)!")
 			return err
 		}
diff --git a/core/gallery/models.go b/core/gallery/models.go
index 32460a9c..dec6312e 100644
--- a/core/gallery/models.go
+++ b/core/gallery/models.go
@@ -68,7 +68,8 @@ type PromptTemplate struct {
 
 func GetGalleryConfigFromURL(url string, basePath string) (Config, error) {
 	var config Config
-	err := downloader.DownloadAndUnmarshal(url, basePath, func(url string, d []byte) error {
+	uri := downloader.URI(url)
+	err := uri.DownloadAndUnmarshal(basePath, func(url string, d []byte) error {
 		return yaml.Unmarshal(d, &config)
 	})
 	if err != nil {
@@ -118,14 +119,14 @@ func InstallModel(basePath, nameOverride string, config *Config, configOverrides
 		filePath := filepath.Join(basePath, file.Filename)
 
 		if enforceScan {
-			scanResults, err := downloader.HuggingFaceScan(file.URI)
-			if err != nil && !errors.Is(err, downloader.ErrNonHuggingFaceFile) {
+			scanResults, err := downloader.HuggingFaceScan(downloader.URI(file.URI))
+			if err != nil && errors.Is(err, downloader.ErrUnsafeFilesFound) {
 				log.Error().Str("model", config.Name).Strs("clamAV", scanResults.ClamAVInfectedFiles).Strs("pickles", scanResults.DangerousPickles).Msg("Contains unsafe file(s)!")
 				return err
 			}
 		}
-
-		if err := downloader.DownloadFile(file.URI, filePath, file.SHA256, i, len(config.Files), downloadStatus); err != nil {
+		uri := downloader.URI(file.URI)
+		if err := uri.DownloadFile(filePath, file.SHA256, i, len(config.Files), downloadStatus); err != nil {
 			return err
 		}
 	}
diff --git a/core/http/app_test.go b/core/http/app_test.go
index 3fb16581..b21ad25a 100644
--- a/core/http/app_test.go
+++ b/core/http/app_test.go
@@ -73,8 +73,9 @@ func getModelStatus(url string) (response map[string]interface{}) {
 }
 
 func getModels(url string) (response []gallery.GalleryModel) {
+	uri := downloader.URI(url)
 	// TODO: No tests currently seem to exercise file:// urls. Fix?
-	downloader.DownloadAndUnmarshal(url, "", func(url string, i []byte) error {
+	uri.DownloadAndUnmarshal("", func(url string, i []byte) error {
 		// Unmarshal YAML data into a struct
 		return json.Unmarshal(i, &response)
 	})
diff --git a/embedded/embedded.go b/embedded/embedded.go
index d5fd72df..672c32ed 100644
--- a/embedded/embedded.go
+++ b/embedded/embedded.go
@@ -38,8 +38,8 @@ func init() {
 
 func GetRemoteLibraryShorteners(url string, basePath string) (map[string]string, error) {
 	remoteLibrary := map[string]string{}
-
-	err := downloader.DownloadAndUnmarshal(url, basePath, func(_ string, i []byte) error {
+	uri := downloader.URI(url)
+	err := uri.DownloadAndUnmarshal(basePath, func(_ string, i []byte) error {
 		return yaml.Unmarshal(i, &remoteLibrary)
 	})
 	if err != nil {
diff --git a/pkg/downloader/huggingface.go b/pkg/downloader/huggingface.go
new file mode 100644
index 00000000..34ba9bd9
--- /dev/null
+++ b/pkg/downloader/huggingface.go
@@ -0,0 +1,49 @@
+package downloader
+
+import (
+	"encoding/json"
+	"errors"
+	"fmt"
+	"io"
+	"net/http"
+	"strings"
+)
+
+type HuggingFaceScanResult struct {
+	RepositoryId        string   `json:"repositoryId"`
+	Revision            string   `json:"revision"`
+	HasUnsafeFiles      bool     `json:"hasUnsafeFile"`
+	ClamAVInfectedFiles []string `json:"clamAVInfectedFiles"`
+	DangerousPickles    []string `json:"dangerousPickles"`
+	ScansDone           bool     `json:"scansDone"`
+}
+
+var ErrNonHuggingFaceFile = errors.New("not a huggingface repo")
+var ErrUnsafeFilesFound = errors.New("unsafe files found")
+
+func HuggingFaceScan(uri URI) (*HuggingFaceScanResult, error) {
+	cleanParts := strings.Split(uri.ResolveURL(), "/")
+	if len(cleanParts) <= 4 || cleanParts[2] != "huggingface.co" {
+		return nil, ErrNonHuggingFaceFile
+	}
+	results, err := http.Get(fmt.Sprintf("https://huggingface.co/api/models/%s/%s/scan", cleanParts[3], cleanParts[4]))
+	if err != nil {
+		return nil, err
+	}
+	if results.StatusCode != 200 {
+		return nil, fmt.Errorf("unexpected status code during HuggingFaceScan: %d", results.StatusCode)
+	}
+	scanResult := &HuggingFaceScanResult{}
+	bodyBytes, err := io.ReadAll(results.Body)
+	if err != nil {
+		return nil, err
+	}
+	err = json.Unmarshal(bodyBytes, scanResult)
+	if err != nil {
+		return nil, err
+	}
+	if scanResult.HasUnsafeFiles {
+		return scanResult, ErrUnsafeFilesFound
+	}
+	return scanResult, nil
+}
diff --git a/pkg/downloader/uri.go b/pkg/downloader/uri.go
index 1f88bbb1..7fedd646 100644
--- a/pkg/downloader/uri.go
+++ b/pkg/downloader/uri.go
@@ -2,12 +2,10 @@ package downloader
 
 import (
 	"crypto/sha256"
-	"encoding/base64"
-	"encoding/json"
-	"errors"
 	"fmt"
 	"io"
 	"net/http"
+	"net/url"
 	"os"
 	"path/filepath"
 	"strconv"
@@ -28,13 +26,16 @@ const (
 	HTTPSPrefix       = "https://"
 	GithubURI         = "github:"
 	GithubURI2        = "github://"
+	LocalPrefix       = "file://"
 )
 
-func DownloadAndUnmarshal(url string, basePath string, f func(url string, i []byte) error) error {
-	url = ConvertURL(url)
+type URI string
 
-	if strings.HasPrefix(url, "file://") {
-		rawURL := strings.TrimPrefix(url, "file://")
+func (uri URI) DownloadAndUnmarshal(basePath string, f func(url string, i []byte) error) error {
+	url := uri.ResolveURL()
+
+	if strings.HasPrefix(url, LocalPrefix) {
+		rawURL := strings.TrimPrefix(url, LocalPrefix)
 		// checks if the file is symbolic, and resolve if so - otherwise, this function returns the path unmodified.
 		resolvedFile, err := filepath.EvalSymlinks(rawURL)
 		if err != nil {
@@ -78,24 +79,54 @@ func DownloadAndUnmarshal(url string, basePath string, f func(url string, i []by
 	return f(url, body)
 }
 
-func LooksLikeURL(s string) bool {
-	return strings.HasPrefix(s, HTTPPrefix) ||
-		strings.HasPrefix(s, HTTPSPrefix) ||
-		strings.HasPrefix(s, HuggingFacePrefix) ||
-		strings.HasPrefix(s, GithubURI) ||
-		strings.HasPrefix(s, OllamaPrefix) ||
-		strings.HasPrefix(s, OCIPrefix) ||
-		strings.HasPrefix(s, GithubURI2)
+func (u URI) FilenameFromUrl() (string, error) {
+	f, err := filenameFromUrl(string(u))
+	if err != nil || f == "" {
+		f = utils.MD5(string(u))
+		if strings.HasSuffix(string(u), ".yaml") || strings.HasSuffix(string(u), ".yml") {
+			f = f + ".yaml"
+		}
+		err = nil
+	}
+
+	return f, err
 }
 
-func LooksLikeOCI(s string) bool {
-	return strings.HasPrefix(s, OCIPrefix) || strings.HasPrefix(s, OllamaPrefix)
+func filenameFromUrl(urlstr string) (string, error) {
+	// strip anything after @
+	if strings.Contains(urlstr, "@") {
+		urlstr = strings.Split(urlstr, "@")[0]
+	}
+
+	u, err := url.Parse(urlstr)
+	if err != nil {
+		return "", fmt.Errorf("error due to parsing url: %w", err)
+	}
+	x, err := url.QueryUnescape(u.EscapedPath())
+	if err != nil {
+		return "", fmt.Errorf("error due to escaping: %w", err)
+	}
+	return filepath.Base(x), nil
 }
 
-func ConvertURL(s string) string {
+func (u URI) LooksLikeURL() bool {
+	return strings.HasPrefix(string(u), HTTPPrefix) ||
+		strings.HasPrefix(string(u), HTTPSPrefix) ||
+		strings.HasPrefix(string(u), HuggingFacePrefix) ||
+		strings.HasPrefix(string(u), GithubURI) ||
+		strings.HasPrefix(string(u), OllamaPrefix) ||
+		strings.HasPrefix(string(u), OCIPrefix) ||
+		strings.HasPrefix(string(u), GithubURI2)
+}
+
+func (s URI) LooksLikeOCI() bool {
+	return strings.HasPrefix(string(s), OCIPrefix) || strings.HasPrefix(string(s), OllamaPrefix)
+}
+
+func (s URI) ResolveURL() string {
 	switch {
-	case strings.HasPrefix(s, GithubURI2):
-		repository := strings.Replace(s, GithubURI2, "", 1)
+	case strings.HasPrefix(string(s), GithubURI2):
+		repository := strings.Replace(string(s), GithubURI2, "", 1)
 
 		repoParts := strings.Split(repository, "@")
 		branch := "main"
@@ -110,8 +141,8 @@ func ConvertURL(s string) string {
 		projectPath := strings.Join(repoPath[2:], "/")
 
 		return fmt.Sprintf("https://raw.githubusercontent.com/%s/%s/%s/%s", org, project, branch, projectPath)
-	case strings.HasPrefix(s, GithubURI):
-		parts := strings.Split(s, ":")
+	case strings.HasPrefix(string(s), GithubURI):
+		parts := strings.Split(string(s), ":")
 		repoParts := strings.Split(parts[1], "@")
 		branch := "main"
 
@@ -125,8 +156,8 @@ func ConvertURL(s string) string {
 		projectPath := strings.Join(repoPath[2:], "/")
 
 		return fmt.Sprintf("https://raw.githubusercontent.com/%s/%s/%s/%s", org, project, branch, projectPath)
-	case strings.HasPrefix(s, HuggingFacePrefix):
-		repository := strings.Replace(s, HuggingFacePrefix, "", 1)
+	case strings.HasPrefix(string(s), HuggingFacePrefix):
+		repository := strings.Replace(string(s), HuggingFacePrefix, "", 1)
 		// convert repository to a full URL.
 		// e.g. TheBloke/Mixtral-8x7B-v0.1-GGUF/mixtral-8x7b-v0.1.Q2_K.gguf@main -> https://huggingface.co/TheBloke/Mixtral-8x7B-v0.1-GGUF/resolve/main/mixtral-8x7b-v0.1.Q2_K.gguf
 		owner := strings.Split(repository, "/")[0]
@@ -144,7 +175,7 @@ func ConvertURL(s string) string {
 		return fmt.Sprintf("https://huggingface.co/%s/%s/resolve/%s/%s", owner, repo, branch, filepath)
 	}
 
-	return s
+	return string(s)
 }
 
 func removePartialFile(tmpFilePath string) error {
@@ -161,9 +192,9 @@ func removePartialFile(tmpFilePath string) error {
 	return nil
 }
 
-func DownloadFile(url string, filePath, sha string, fileN, total int, downloadStatus func(string, string, string, float64)) error {
-	url = ConvertURL(url)
-	if LooksLikeOCI(url) {
+func (uri URI) DownloadFile(filePath, sha string, fileN, total int, downloadStatus func(string, string, string, float64)) error {
+	url := uri.ResolveURL()
+	if uri.LooksLikeOCI() {
 		progressStatus := func(desc ocispec.Descriptor) io.Writer {
 			return &progressWriter{
 				fileName:       filePath,
@@ -298,37 +329,6 @@ func DownloadFile(url string, filePath, sha string, fileN, total int, downloadSt
 	return nil
 }
 
-// this function check if the string is an URL, if it's an URL downloads the image in memory
-// encodes it in base64 and returns the base64 string
-func GetBase64Image(s string) (string, error) {
-	if strings.HasPrefix(s, "http") {
-		// download the image
-		resp, err := http.Get(s)
-		if err != nil {
-			return "", err
-		}
-		defer resp.Body.Close()
-
-		// read the image data into memory
-		data, err := io.ReadAll(resp.Body)
-		if err != nil {
-			return "", err
-		}
-
-		// encode the image data in base64
-		encoded := base64.StdEncoding.EncodeToString(data)
-
-		// return the base64 string
-		return encoded, nil
-	}
-
-	// if the string instead is prefixed with "data:image/jpeg;base64,", drop it
-	if strings.HasPrefix(s, "data:image/jpeg;base64,") {
-		return strings.ReplaceAll(s, "data:image/jpeg;base64,", ""), nil
-	}
-	return "", fmt.Errorf("not valid string")
-}
-
 func formatBytes(bytes int64) string {
 	const unit = 1024
 	if bytes < unit {
@@ -356,42 +356,3 @@ func calculateSHA(filePath string) (string, error) {
 
 	return fmt.Sprintf("%x", hash.Sum(nil)), nil
 }
-
-type HuggingFaceScanResult struct {
-	RepositoryId        string   `json:"repositoryId"`
-	Revision            string   `json:"revision"`
-	HasUnsafeFiles      bool     `json:"hasUnsafeFile"`
-	ClamAVInfectedFiles []string `json:"clamAVInfectedFiles"`
-	DangerousPickles    []string `json:"dangerousPickles"`
-	ScansDone           bool     `json:"scansDone"`
-}
-
-var ErrNonHuggingFaceFile = errors.New("not a huggingface repo")
-var ErrUnsafeFilesFound = errors.New("unsafe files found")
-
-func HuggingFaceScan(uri string) (*HuggingFaceScanResult, error) {
-	cleanParts := strings.Split(ConvertURL(uri), "/")
-	if len(cleanParts) <= 4 || cleanParts[2] != "huggingface.co" {
-		return nil, ErrNonHuggingFaceFile
-	}
-	results, err := http.Get(fmt.Sprintf("https://huggingface.co/api/models/%s/%s/scan", cleanParts[3], cleanParts[4]))
-	if err != nil {
-		return nil, err
-	}
-	if results.StatusCode != 200 {
-		return nil, fmt.Errorf("unexpected status code during HuggingFaceScan: %d", results.StatusCode)
-	}
-	scanResult := &HuggingFaceScanResult{}
-	bodyBytes, err := io.ReadAll(results.Body)
-	if err != nil {
-		return nil, err
-	}
-	err = json.Unmarshal(bodyBytes, scanResult)
-	if err != nil {
-		return nil, err
-	}
-	if scanResult.HasUnsafeFiles {
-		return scanResult, ErrUnsafeFilesFound
-	}
-	return scanResult, nil
-}
diff --git a/pkg/downloader/uri_test.go b/pkg/downloader/uri_test.go
index 66a4cb4e..21a093a9 100644
--- a/pkg/downloader/uri_test.go
+++ b/pkg/downloader/uri_test.go
@@ -9,24 +9,28 @@ import (
 var _ = Describe("Gallery API tests", func() {
 	Context("URI", func() {
 		It("parses github with a branch", func() {
+			uri := URI("github:go-skynet/model-gallery/gpt4all-j.yaml")
 			Expect(
-				DownloadAndUnmarshal("github:go-skynet/model-gallery/gpt4all-j.yaml", "", func(url string, i []byte) error {
+				uri.DownloadAndUnmarshal("", func(url string, i []byte) error {
 					Expect(url).To(Equal("https://raw.githubusercontent.com/go-skynet/model-gallery/main/gpt4all-j.yaml"))
 					return nil
 				}),
 			).ToNot(HaveOccurred())
 		})
 		It("parses github without a branch", func() {
+			uri := URI("github:go-skynet/model-gallery/gpt4all-j.yaml@main")
+
 			Expect(
-				DownloadAndUnmarshal("github:go-skynet/model-gallery/gpt4all-j.yaml@main", "", func(url string, i []byte) error {
+				uri.DownloadAndUnmarshal("", func(url string, i []byte) error {
 					Expect(url).To(Equal("https://raw.githubusercontent.com/go-skynet/model-gallery/main/gpt4all-j.yaml"))
 					return nil
 				}),
 			).ToNot(HaveOccurred())
 		})
 		It("parses github with urls", func() {
+			uri := URI("https://raw.githubusercontent.com/go-skynet/model-gallery/main/gpt4all-j.yaml")
 			Expect(
-				DownloadAndUnmarshal("https://raw.githubusercontent.com/go-skynet/model-gallery/main/gpt4all-j.yaml", "", func(url string, i []byte) error {
+				uri.DownloadAndUnmarshal("", func(url string, i []byte) error {
 					Expect(url).To(Equal("https://raw.githubusercontent.com/go-skynet/model-gallery/main/gpt4all-j.yaml"))
 					return nil
 				}),
diff --git a/pkg/startup/model_preload.go b/pkg/startup/model_preload.go
index 9fa890b0..a445b10e 100644
--- a/pkg/startup/model_preload.go
+++ b/pkg/startup/model_preload.go
@@ -3,7 +3,6 @@ package startup
 import (
 	"errors"
 	"fmt"
-	"net/url"
 	"os"
 	"path/filepath"
 	"strings"
@@ -23,21 +22,21 @@ func InstallModels(galleries []config.Gallery, modelLibraryURL string, modelPath
 	// create an error that groups all errors
 	var err error
 
-	for _, url := range models {
+	lib, _ := embedded.GetRemoteLibraryShorteners(modelLibraryURL, modelPath)
 
+	for _, url := range models {
 		// As a best effort, try to resolve the model from the remote library
 		// if it's not resolved we try with the other method below
 		if modelLibraryURL != "" {
-			lib, err := embedded.GetRemoteLibraryShorteners(modelLibraryURL, modelPath)
-			if err == nil {
-				if lib[url] != "" {
-					log.Debug().Msgf("[startup] model configuration is defined remotely: %s (%s)", url, lib[url])
-					url = lib[url]
-				}
+			if lib[url] != "" {
+				log.Debug().Msgf("[startup] model configuration is defined remotely: %s (%s)", url, lib[url])
+				url = lib[url]
 			}
 		}
 
 		url = embedded.ModelShortURL(url)
+		uri := downloader.URI(url)
+
 		switch {
 		case embedded.ExistsInModelsLibrary(url):
 			modelYAML, e := embedded.ResolveContent(url)
@@ -55,7 +54,7 @@ func InstallModels(galleries []config.Gallery, modelLibraryURL string, modelPath
 				log.Error().Err(e).Str("filepath", modelDefinitionFilePath).Msg("error writing model definition")
 				err = errors.Join(err, e)
 			}
-		case downloader.LooksLikeOCI(url):
+		case uri.LooksLikeOCI():
 			log.Debug().Msgf("[startup] resolved OCI model to download: %s", url)
 
 			// convert OCI image name to a file name.
@@ -67,7 +66,7 @@ func InstallModels(galleries []config.Gallery, modelLibraryURL string, modelPath
 			// check if file exists
 			if _, e := os.Stat(filepath.Join(modelPath, ociName)); errors.Is(e, os.ErrNotExist) {
 				modelDefinitionFilePath := filepath.Join(modelPath, ociName)
-				e := downloader.DownloadFile(url, modelDefinitionFilePath, "", 0, 0, func(fileName, current, total string, percent float64) {
+				e := uri.DownloadFile(modelDefinitionFilePath, "", 0, 0, func(fileName, current, total string, percent float64) {
 					utils.DisplayDownloadFunction(fileName, current, total, percent)
 				})
 				if e != nil {
@@ -77,19 +76,15 @@ func InstallModels(galleries []config.Gallery, modelLibraryURL string, modelPath
 			}
 
 			log.Info().Msgf("[startup] installed model from OCI repository: %s", ociName)
-		case downloader.LooksLikeURL(url):
+		case uri.LooksLikeURL():
 			log.Debug().Msgf("[startup] downloading %s", url)
 
 			// Extract filename from URL
-			fileName, e := filenameFromUrl(url)
-			if e != nil || fileName == "" {
-				fileName = utils.MD5(url)
-				if strings.HasSuffix(url, ".yaml") || strings.HasSuffix(url, ".yml") {
-					fileName = fileName + ".yaml"
-				}
+			fileName, e := uri.FilenameFromUrl()
+			if e != nil {
 				log.Warn().Err(e).Str("url", url).Msg("error extracting filename from URL")
-				//err = errors.Join(err, e)
-				//continue
+				err = errors.Join(err, e)
+				continue
 			}
 
 			modelPath := filepath.Join(modelPath, fileName)
@@ -102,7 +97,7 @@ func InstallModels(galleries []config.Gallery, modelLibraryURL string, modelPath
 
 			// check if file exists
 			if _, e := os.Stat(modelPath); errors.Is(e, os.ErrNotExist) {
-				e := downloader.DownloadFile(url, modelPath, "", 0, 0, func(fileName, current, total string, percent float64) {
+				e := uri.DownloadFile(modelPath, "", 0, 0, func(fileName, current, total string, percent float64) {
 					utils.DisplayDownloadFunction(fileName, current, total, percent)
 				})
 				if e != nil {
@@ -167,20 +162,3 @@ func installModel(galleries []config.Gallery, modelName, modelPath string, downl
 
 	return nil, true
 }
-
-func filenameFromUrl(urlstr string) (string, error) {
-	// strip anything after @
-	if strings.Contains(urlstr, "@") {
-		urlstr = strings.Split(urlstr, "@")[0]
-	}
-
-	u, err := url.Parse(urlstr)
-	if err != nil {
-		return "", fmt.Errorf("error due to parsing url: %w", err)
-	}
-	x, err := url.QueryUnescape(u.EscapedPath())
-	if err != nil {
-		return "", fmt.Errorf("error due to escaping: %w", err)
-	}
-	return filepath.Base(x), nil
-}

From 797c1739ce7480b15a9b18ec77adcf5b58e835ce Mon Sep 17 00:00:00 2001
From: "LocalAI [bot]" <139863280+localai-bot@users.noreply.github.com>
Date: Fri, 2 Aug 2024 23:54:45 +0200
Subject: [PATCH 073/235] chore: :arrow_up: Update ggerganov/llama.cpp (#3115)

:arrow_up: Update ggerganov/llama.cpp

Signed-off-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com>
Co-authored-by: mudler <2420543+mudler@users.noreply.github.com>
---
 Makefile | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/Makefile b/Makefile
index be104116..77eded2b 100644
--- a/Makefile
+++ b/Makefile
@@ -8,7 +8,7 @@ DETECT_LIBS?=true
 # llama.cpp versions
 GOLLAMA_REPO?=https://github.com/go-skynet/go-llama.cpp
 GOLLAMA_VERSION?=2b57a8ae43e4699d3dc5d1496a1ccd42922993be
-CPPLLAMA_VERSION?=b7a08fd5e0e7c898c68d1743066ea495202d9608
+CPPLLAMA_VERSION?=b72c20b85c1029d135022d39e9a20d4807c11893
 
 # gpt4all version
 GPT4ALL_REPO?=https://github.com/nomic-ai/gpt4all

From c2576d08798fcf2eabcf2196a70b1eeb92af3b1c Mon Sep 17 00:00:00 2001
From: Ettore Di Giacinto <mudler@users.noreply.github.com>
Date: Sat, 3 Aug 2024 10:36:25 +0200
Subject: [PATCH 074/235] models(gallery): add llama-spark (#3116)

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
---
 gallery/index.yaml | 15 +++++++++++++++
 1 file changed, 15 insertions(+)

diff --git a/gallery/index.yaml b/gallery/index.yaml
index 4f5caebd..57881d3b 100644
--- a/gallery/index.yaml
+++ b/gallery/index.yaml
@@ -277,6 +277,21 @@
     - filename: Llama-3.1-Techne-RP-8b-v1.Q4_K_M.gguf
       sha256: 6557c5d5091f2507d19ab1f8bfb9ceb4e1536a755ab70f148b18aeb33741580f
       uri: huggingface://mradermacher/Llama-3.1-Techne-RP-8b-v1-GGUF/Llama-3.1-Techne-RP-8b-v1.Q4_K_M.gguf
+- !!merge <<: *llama31
+  icon: https://i.ibb.co/9hwFrvL/BLMs-Wkx-NQf-W-46-FZDg-ILhg.jpg
+  name: "llama-spark"
+  urls:
+    - https://huggingface.co/arcee-ai/Llama-Spark
+    - https://huggingface.co/arcee-ai/Llama-Spark-GGUF
+  description: |
+    Llama-Spark is a powerful conversational AI model developed by Arcee.ai. It's built on the foundation of Llama-3.1-8B and merges the power of our Tome Dataset with Llama-3.1-8B-Instruct, resulting in a remarkable conversationalist that punches well above its 8B parameter weight class.
+  overrides:
+    parameters:
+      model: llama-spark-dpo-v0.3-Q4_K_M.gguf
+  files:
+    - filename: llama-spark-dpo-v0.3-Q4_K_M.gguf
+      sha256: 41367168bbdc4b16eb80efcbee4dacc941781ee8748065940167fe6947b4e4c3
+      uri: huggingface://arcee-ai/Llama-Spark-GGUF/llama-spark-dpo-v0.3-Q4_K_M.gguf
 ## Uncensored models
 - !!merge <<: *llama31
   name: "darkidol-llama-3.1-8b-instruct-1.0-uncensored-i1"

From 8f0bf9810af5ad5e53b7d22d89713533feba7985 Mon Sep 17 00:00:00 2001
From: "LocalAI [bot]" <139863280+localai-bot@users.noreply.github.com>
Date: Sat, 3 Aug 2024 23:47:06 +0200
Subject: [PATCH 075/235] chore: :arrow_up: Update ggerganov/llama.cpp (#3117)

:arrow_up: Update ggerganov/llama.cpp

Signed-off-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com>
Co-authored-by: mudler <2420543+mudler@users.noreply.github.com>
---
 Makefile | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/Makefile b/Makefile
index 77eded2b..557ab56d 100644
--- a/Makefile
+++ b/Makefile
@@ -8,7 +8,7 @@ DETECT_LIBS?=true
 # llama.cpp versions
 GOLLAMA_REPO?=https://github.com/go-skynet/go-llama.cpp
 GOLLAMA_VERSION?=2b57a8ae43e4699d3dc5d1496a1ccd42922993be
-CPPLLAMA_VERSION?=b72c20b85c1029d135022d39e9a20d4807c11893
+CPPLLAMA_VERSION?=76614f352e94d25659306d9e97321f204e5de0d3
 
 # gpt4all version
 GPT4ALL_REPO?=https://github.com/nomic-ai/gpt4all

From d1a123954b252eedeebeb11e32a239faa4dafbb0 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Serta=C3=A7=20=C3=96zercan?=
 <852750+sozercan@users.noreply.github.com>
Date: Sun, 4 Aug 2024 00:45:42 -0700
Subject: [PATCH 076/235] feat(guesser): add gemma2 (#3118)

* feat(guesser): add gemma2

Signed-off-by: Sertac Ozercan <sozercan@gmail.com>

* update

Signed-off-by: Sertac Ozercan <sozercan@gmail.com>

---------

Signed-off-by: Sertac Ozercan <sozercan@gmail.com>
---
 core/config/guesser.go | 11 ++++++++---
 1 file changed, 8 insertions(+), 3 deletions(-)

diff --git a/core/config/guesser.go b/core/config/guesser.go
index 6c6ef430..b63dd051 100644
--- a/core/config/guesser.go
+++ b/core/config/guesser.go
@@ -26,15 +26,17 @@ const (
 type settingsConfig struct {
 	StopWords      []string
 	TemplateConfig TemplateConfig
+	RepeatPenalty float64
 }
 
 // default settings to adopt with a given model family
 var defaultsSettings map[familyType]settingsConfig = map[familyType]settingsConfig{
 	Gemma: {
+		RepeatPenalty: 1.0,
 		StopWords: []string{"<|im_end|>", "<end_of_turn>", "<start_of_turn>"},
 		TemplateConfig: TemplateConfig{
-			Chat:        "{{.Input }}\n<|start_of_turn|>model\n",
-			ChatMessage: "<|start_of_turn|>{{if eq .RoleName \"assistant\" }}model{{else}}{{ .RoleName }}{{end}}\n{{ if .Content -}}\n{{.Content -}}\n{{ end -}}<|end_of_turn|>",
+			Chat:        "{{.Input }}\n<start_of_turn>model\n",
+			ChatMessage: "<start_of_turn>{{if eq .RoleName \"assistant\" }}model{{else}}{{ .RoleName }}{{end}}\n{{ if .Content -}}\n{{.Content -}}\n{{ end -}}<end_of_turn>",
 			Completion:  "{{.Input}}",
 		},
 	},
@@ -192,6 +194,9 @@ func guessDefaultsFromFile(cfg *BackendConfig, modelPath string) {
 		if len(cfg.StopWords) == 0 {
 			cfg.StopWords = settings.StopWords
 		}
+		if cfg.RepeatPenalty == 0.0 {
+			cfg.RepeatPenalty = settings.RepeatPenalty
+		}
 	} else {
 		log.Debug().Any("family", family).Msgf("guessDefaultsFromFile: no template found for family")
 	}
@@ -219,7 +224,7 @@ func identifyFamily(f *gguf.GGUFFile) familyType {
 	commandR := arch == "command-r" && eosTokenID == 255001
 	qwen2 := arch == "qwen2"
 	phi3 := arch == "phi-3"
-	gemma := strings.HasPrefix(f.Model().Name, "gemma")
+	gemma := strings.HasPrefix(arch, "gemma") || strings.Contains(strings.ToLower(f.Model().Name), "gemma")
 	deepseek2 := arch == "deepseek2"
 
 	switch {

From 12d6d2d1779ccf354f803bbebef8be4162cc411b Mon Sep 17 00:00:00 2001
From: Ettore Di Giacinto <mudler@users.noreply.github.com>
Date: Sun, 4 Aug 2024 14:50:32 +0200
Subject: [PATCH 077/235] models(gallery): add glitz (#3119)

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
---
 gallery/index.yaml | 15 +++++++++++++++
 1 file changed, 15 insertions(+)

diff --git a/gallery/index.yaml b/gallery/index.yaml
index 57881d3b..a0ee1448 100644
--- a/gallery/index.yaml
+++ b/gallery/index.yaml
@@ -292,6 +292,21 @@
     - filename: llama-spark-dpo-v0.3-Q4_K_M.gguf
       sha256: 41367168bbdc4b16eb80efcbee4dacc941781ee8748065940167fe6947b4e4c3
       uri: huggingface://arcee-ai/Llama-Spark-GGUF/llama-spark-dpo-v0.3-Q4_K_M.gguf
+- !!merge <<: *llama31
+  name: "l3.1-70b-glitz-v0.2-i1"
+  icon: https://cdn-uploads.huggingface.co/production/uploads/634262af8d8089ebaefd410e/q2dOUnzc1GRbZp3YfzGXB.png
+  urls:
+    - https://huggingface.co/Fizzarolli/L3.1-70b-glitz-v0.2
+    - https://huggingface.co/mradermacher/L3.1-70b-glitz-v0.2-i1-GGUF
+  description: |
+    this is an experimental l3.1 70b finetuning run... that crashed midway through. however, the results are still interesting, so i wanted to publish them :3
+  overrides:
+    parameters:
+      model: L3.1-70b-glitz-v0.2.i1-Q4_K_M.gguf
+  files:
+    - filename: L3.1-70b-glitz-v0.2.i1-Q4_K_M.gguf
+      sha256: 585efc83e7f6893043be2487fc09c914a381fb463ce97942ef2f25ae85103bcd
+      uri: huggingface://mradermacher/L3.1-70b-glitz-v0.2-i1-GGUF/L3.1-70b-glitz-v0.2.i1-Q4_K_M.gguf
 ## Uncensored models
 - !!merge <<: *llama31
   name: "darkidol-llama-3.1-8b-instruct-1.0-uncensored-i1"

From 1788fc8d4acd240834e6d396dd3efadad2d191a8 Mon Sep 17 00:00:00 2001
From: Ettore Di Giacinto <mudler@users.noreply.github.com>
Date: Sun, 4 Aug 2024 15:17:24 +0200
Subject: [PATCH 078/235] models(gallery): add gemmasutra-mini (#3120)

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
---
 gallery/index.yaml | 14 ++++++++++++++
 1 file changed, 14 insertions(+)

diff --git a/gallery/index.yaml b/gallery/index.yaml
index a0ee1448..7e308d93 100644
--- a/gallery/index.yaml
+++ b/gallery/index.yaml
@@ -1363,6 +1363,20 @@
     - filename: Gemmasutra-Pro-27B-v1.Q4_K_M.gguf
       sha256: 336a2fbf142849fcc20e432123433807b6c7b09988652ef583a63636a0f90218
       uri: huggingface://mradermacher/Gemmasutra-Pro-27B-v1-GGUF/Gemmasutra-Pro-27B-v1.Q4_K_M.gguf
+- !!merge <<: *gemma
+  name: "gemmasutra-mini-2b-v1"
+  icon: https://cdn-uploads.huggingface.co/production/uploads/65f2fd1c25b848bd061b5c2e/w0Oi8TReoQNT3ljm5Wf6c.webp
+  urls:
+    - https://huggingface.co/TheDrummer/Gemmasutra-Mini-2B-v1-GGUF
+  description: |
+    It is a small, 2 billion parameter language model that has been trained for role-playing purposes. The model is designed to work well in various settings, such as in the browser, on a laptop, or even on a Raspberry Pi. It has been fine-tuned for RP use and claims to provide a satisfying experience, even in low-resource environments. The model is uncensored and unaligned, and it can be used with the Gemma Instruct template or with chat completion. For the best experience, it is recommended to modify the template to support the `system` role. The model also features examples of its output, highlighting its versatility and creativity.
+  overrides:
+    parameters:
+      model: Gemmasutra-Mini-2B-v1i-Q4_K_M.gguf
+  files:
+    - filename: Gemmasutra-Mini-2B-v1i-Q4_K_M.gguf
+      sha256: 29ba3db911fbadef4452ba757ddd9ce58fb892b7a872f19eefd0743c961797fb
+      uri: huggingface://TheDrummer/Gemmasutra-Mini-2B-v1-GGUF/Gemmasutra-Mini-2B-v1i-Q4_K_M.gguf
 - !!merge <<: *gemma
   name: "tarnished-9b-i1"
   icon: https://huggingface.co/lodrick-the-lafted/tarnished-9b/resolve/main/nox.jpg

From e2e2a8e447e8996d7d3cb4916520a3bc6fa0c2cb Mon Sep 17 00:00:00 2001
From: Ettore Di Giacinto <mudler@users.noreply.github.com>
Date: Sun, 4 Aug 2024 15:20:02 +0200
Subject: [PATCH 079/235] models(gallery): add kumiho-v1-rp-uwu-8b (#3121)

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
---
 gallery/index.yaml | 14 ++++++++++++++
 1 file changed, 14 insertions(+)

diff --git a/gallery/index.yaml b/gallery/index.yaml
index 7e308d93..bcfa4f35 100644
--- a/gallery/index.yaml
+++ b/gallery/index.yaml
@@ -438,6 +438,20 @@
     - filename: L3.1-8B-Celeste-V1.5-Q4_K_M.gguf
       sha256: a408dfbbd91ed5561f70d3129af040dfd06704d6c7fa21146aa9f09714aafbc6
       uri: huggingface://bartowski/L3.1-8B-Celeste-V1.5-GGUF/L3.1-8B-Celeste-V1.5-Q4_K_M.gguf
+- !!merge <<: *llama31
+  icon: https://cdn-uploads.huggingface.co/production/uploads/659c4ecb413a1376bee2f661/szz8sIxofYzSe5XPet2pO.png
+  name: "kumiho-v1-rp-uwu-8b"
+  urls:
+    - https://huggingface.co/juvi21/Kumiho-v1-rp-UwU-8B-GGUF
+  description: |
+    Meet Kumiho-V1 uwu. Kumiho-V1-rp-UwU aims to be a generalist model with specialization in roleplay and writing capabilities. It is finetuned and merged with various models, with a heavy base of Meta's LLaMA 3.1-8B as base model, and Claude 3.5 Sonnet and Claude 3 Opus generated synthetic data.
+  overrides:
+    parameters:
+      model: Kumiho-v1-rp-UwU-8B-gguf-q4_k_m.gguf
+  files:
+    - filename: Kumiho-v1-rp-UwU-8B-gguf-q4_k_m.gguf
+      sha256: a1deb46675418277cf785a406cd1508fec556ff6e4d45d2231eb2a82986d52d0
+      uri: huggingface://juvi21/Kumiho-v1-rp-UwU-8B-GGUF/Kumiho-v1-rp-UwU-8B-gguf-q4_k_m.gguf
 - &deepseek
   ## Deepseek
   url: "github:mudler/LocalAI/gallery/deepseek.yaml@master"

From 6e1ec08f46422ef5a2bff868c27599040fb5106c Mon Sep 17 00:00:00 2001
From: "LocalAI [bot]" <139863280+localai-bot@users.noreply.github.com>
Date: Sun, 4 Aug 2024 23:48:09 +0200
Subject: [PATCH 080/235] chore: :arrow_up: Update ggerganov/llama.cpp (#3123)

:arrow_up: Update ggerganov/llama.cpp

Signed-off-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com>
Co-authored-by: mudler <2420543+mudler@users.noreply.github.com>
---
 Makefile | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/Makefile b/Makefile
index 557ab56d..f8155e06 100644
--- a/Makefile
+++ b/Makefile
@@ -8,7 +8,7 @@ DETECT_LIBS?=true
 # llama.cpp versions
 GOLLAMA_REPO?=https://github.com/go-skynet/go-llama.cpp
 GOLLAMA_VERSION?=2b57a8ae43e4699d3dc5d1496a1ccd42922993be
-CPPLLAMA_VERSION?=76614f352e94d25659306d9e97321f204e5de0d3
+CPPLLAMA_VERSION?=0d6fb52be0c1b7e77eb855f3adc4952771c8ce4c
 
 # gpt4all version
 GPT4ALL_REPO?=https://github.com/nomic-ai/gpt4all

From f15a93b19b885ad139e12685272dd9ab95de5140 Mon Sep 17 00:00:00 2001
From: Ettore Di Giacinto <mudler@users.noreply.github.com>
Date: Mon, 5 Aug 2024 10:11:00 +0200
Subject: [PATCH 081/235] models(gallery): add
 humanish-roleplay-llama-3.1-8b-i1 (#3126)

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
---
 gallery/index.yaml | 17 +++++++++++++++++
 1 file changed, 17 insertions(+)

diff --git a/gallery/index.yaml b/gallery/index.yaml
index bcfa4f35..c80455a8 100644
--- a/gallery/index.yaml
+++ b/gallery/index.yaml
@@ -307,6 +307,23 @@
     - filename: L3.1-70b-glitz-v0.2.i1-Q4_K_M.gguf
       sha256: 585efc83e7f6893043be2487fc09c914a381fb463ce97942ef2f25ae85103bcd
       uri: huggingface://mradermacher/L3.1-70b-glitz-v0.2-i1-GGUF/L3.1-70b-glitz-v0.2.i1-Q4_K_M.gguf
+- !!merge <<: *llama31
+  name: "humanish-roleplay-llama-3.1-8b-i1"
+  icon: https://cdn-uploads.huggingface.co/production/uploads/5fad8602b8423e1d80b8a965/VPwtjS3BtjEEEq7ck4kAQ.webp
+  urls:
+    - https://huggingface.co/mradermacher/Humanish-Roleplay-Llama-3.1-8B-i1-GGUF
+  description: |
+    A DPO-tuned Llama-3.1 to behave more "humanish", i.e., avoiding all the AI assistant slop. It also works for role-play (RP). To achieve this, the model was fine-tuned over a series of datasets:
+        General conversations from Claude Opus, from Undi95/Meta-Llama-3.1-8B-Claude
+        Undi95/Weyaxi-humanish-dpo-project-noemoji, to make the model react as a human, rejecting assistant-like or too neutral responses.
+        ResplendentAI/NSFW_RP_Format_DPO, to steer the model towards using the *action* format in RP settings. Works best if in the first message you also use this format naturally (see example)
+  overrides:
+    parameters:
+      model: Humanish-Roleplay-Llama-3.1-8B.i1-Q4_K_M.gguf
+  files:
+    - filename: Humanish-Roleplay-Llama-3.1-8B.i1-Q4_K_M.gguf
+      sha256: 18cf753684e5226b51f3defc708852ca4924f50dc8bc31c9a7d0a036a477b7a7
+      uri: huggingface://mradermacher/Humanish-Roleplay-Llama-3.1-8B-i1-GGUF/Humanish-Roleplay-Llama-3.1-8B.i1-Q4_K_M.gguf
 ## Uncensored models
 - !!merge <<: *llama31
   name: "darkidol-llama-3.1-8b-instruct-1.0-uncensored-i1"

From ed322bf59f0dcc4d3c7329c829e52e6cbcd02291 Mon Sep 17 00:00:00 2001
From: cryptk <421501+cryptk@users.noreply.github.com>
Date: Mon, 5 Aug 2024 11:38:33 -0500
Subject: [PATCH 082/235] fix: ensure correct version of torch is always
 installed based on BUILD_TYPE(#2890)

* fix: ensure correct version of torch is always installed based on BUILD_TYPE

Signed-off-by: Chris Jowett <421501+cryptk@users.noreply.github.com>

* Move causal-conv1d installation to build_types

Signed-off-by: mudler <mudler@localai.io>

* Move mamba-ssd install to build-type requirements.txt

Signed-off-by: mudler <mudler@localai.io>

---------

Signed-off-by: Chris Jowett <421501+cryptk@users.noreply.github.com>
Signed-off-by: mudler <mudler@localai.io>
Co-authored-by: Ettore Di Giacinto <mudler@users.noreply.github.com>
Co-authored-by: mudler <mudler@localai.io>
---
 backend/python/autogptq/requirements-cublas11.txt          | 2 ++
 backend/python/autogptq/requirements-cublas12.txt          | 1 +
 backend/python/autogptq/requirements.txt                   | 1 -
 backend/python/bark/requirements-cublas11.txt              | 3 +++
 backend/python/bark/requirements-cublas12.txt              | 2 ++
 backend/python/common/libbackend.sh                        | 7 +++++++
 backend/python/coqui/requirements-cublas11.txt             | 3 +++
 backend/python/coqui/requirements-cublas12.txt             | 2 ++
 backend/python/diffusers/requirements-cublas11.txt         | 2 ++
 backend/python/diffusers/requirements-cublas12.txt         | 1 +
 backend/python/diffusers/requirements.txt                  | 1 -
 backend/python/exllama/requirements-cublas11.txt           | 2 ++
 backend/python/exllama/requirements-cublas12.txt           | 1 +
 backend/python/exllama/requirements.txt                    | 1 -
 backend/python/exllama2/requirements-cublas11.txt          | 2 ++
 backend/python/exllama2/requirements-cublas12.txt          | 1 +
 backend/python/exllama2/requirements.txt                   | 1 -
 backend/python/mamba/requirements-after.txt                | 2 ++
 backend/python/mamba/requirements-cpu.txt                  | 1 +
 backend/python/mamba/requirements-cublas11.txt             | 2 ++
 backend/python/mamba/requirements-cublas12.txt             | 1 +
 backend/python/mamba/requirements-install.txt              | 3 +--
 backend/python/mamba/requirements.txt                      | 2 --
 backend/python/openvoice/requirements-cublas11.txt         | 2 ++
 backend/python/openvoice/requirements-cublas12.txt         | 1 +
 backend/python/parler-tts/requirements-cublas11.txt        | 3 +++
 backend/python/parler-tts/requirements-cublas12.txt        | 2 ++
 backend/python/parler-tts/requirements.txt                 | 1 -
 backend/python/petals/requirements-cublas11.txt            | 2 ++
 backend/python/petals/requirements-cublas12.txt            | 1 +
 backend/python/rerankers/requirements-cublas11.txt         | 2 ++
 backend/python/rerankers/requirements-cublas12.txt         | 1 +
 .../python/sentencetransformers/requirements-cublas11.txt  | 2 ++
 .../python/sentencetransformers/requirements-cublas12.txt  | 1 +
 .../python/transformers-musicgen/requirements-cublas11.txt | 2 ++
 .../python/transformers-musicgen/requirements-cublas12.txt | 1 +
 backend/python/transformers-musicgen/requirements.txt      | 1 -
 backend/python/transformers/requirements-cublas11.txt      | 2 ++
 backend/python/transformers/requirements-cublas12.txt      | 1 +
 backend/python/transformers/requirements.txt               | 1 -
 backend/python/vall-e-x/requirements-cublas11.txt          | 3 +++
 backend/python/vall-e-x/requirements-cublas12.txt          | 2 ++
 backend/python/vllm/requirements-cublas.txt                | 1 -
 backend/python/vllm/requirements-cublas11.txt              | 3 +++
 backend/python/vllm/requirements-cublas12.txt              | 2 ++
 45 files changed, 69 insertions(+), 12 deletions(-)
 create mode 100644 backend/python/autogptq/requirements-cublas11.txt
 create mode 100644 backend/python/autogptq/requirements-cublas12.txt
 create mode 100644 backend/python/bark/requirements-cublas11.txt
 create mode 100644 backend/python/bark/requirements-cublas12.txt
 create mode 100644 backend/python/coqui/requirements-cublas11.txt
 create mode 100644 backend/python/coqui/requirements-cublas12.txt
 create mode 100644 backend/python/diffusers/requirements-cublas11.txt
 create mode 100644 backend/python/diffusers/requirements-cublas12.txt
 create mode 100644 backend/python/exllama/requirements-cublas11.txt
 create mode 100644 backend/python/exllama/requirements-cublas12.txt
 create mode 100644 backend/python/exllama2/requirements-cublas11.txt
 create mode 100644 backend/python/exllama2/requirements-cublas12.txt
 create mode 100644 backend/python/mamba/requirements-after.txt
 create mode 100644 backend/python/mamba/requirements-cpu.txt
 create mode 100644 backend/python/mamba/requirements-cublas11.txt
 create mode 100644 backend/python/mamba/requirements-cublas12.txt
 create mode 100644 backend/python/openvoice/requirements-cublas11.txt
 create mode 100644 backend/python/openvoice/requirements-cublas12.txt
 create mode 100644 backend/python/parler-tts/requirements-cublas11.txt
 create mode 100644 backend/python/parler-tts/requirements-cublas12.txt
 create mode 100644 backend/python/petals/requirements-cublas11.txt
 create mode 100644 backend/python/petals/requirements-cublas12.txt
 create mode 100644 backend/python/rerankers/requirements-cublas11.txt
 create mode 100644 backend/python/rerankers/requirements-cublas12.txt
 create mode 100644 backend/python/sentencetransformers/requirements-cublas11.txt
 create mode 100644 backend/python/sentencetransformers/requirements-cublas12.txt
 create mode 100644 backend/python/transformers-musicgen/requirements-cublas11.txt
 create mode 100644 backend/python/transformers-musicgen/requirements-cublas12.txt
 create mode 100644 backend/python/transformers/requirements-cublas11.txt
 create mode 100644 backend/python/transformers/requirements-cublas12.txt
 create mode 100644 backend/python/vall-e-x/requirements-cublas11.txt
 create mode 100644 backend/python/vall-e-x/requirements-cublas12.txt
 delete mode 100644 backend/python/vllm/requirements-cublas.txt
 create mode 100644 backend/python/vllm/requirements-cublas11.txt
 create mode 100644 backend/python/vllm/requirements-cublas12.txt

diff --git a/backend/python/autogptq/requirements-cublas11.txt b/backend/python/autogptq/requirements-cublas11.txt
new file mode 100644
index 00000000..6461b696
--- /dev/null
+++ b/backend/python/autogptq/requirements-cublas11.txt
@@ -0,0 +1,2 @@
+--extra-index-url https://download.pytorch.org/whl/cu118
+torch
diff --git a/backend/python/autogptq/requirements-cublas12.txt b/backend/python/autogptq/requirements-cublas12.txt
new file mode 100644
index 00000000..12c6d5d5
--- /dev/null
+++ b/backend/python/autogptq/requirements-cublas12.txt
@@ -0,0 +1 @@
+torch
diff --git a/backend/python/autogptq/requirements.txt b/backend/python/autogptq/requirements.txt
index 7a1bf85f..078c015f 100644
--- a/backend/python/autogptq/requirements.txt
+++ b/backend/python/autogptq/requirements.txt
@@ -2,6 +2,5 @@ accelerate
 auto-gptq==0.7.1
 grpcio==1.65.1
 protobuf
-torch
 certifi
 transformers
\ No newline at end of file
diff --git a/backend/python/bark/requirements-cublas11.txt b/backend/python/bark/requirements-cublas11.txt
new file mode 100644
index 00000000..0de92979
--- /dev/null
+++ b/backend/python/bark/requirements-cublas11.txt
@@ -0,0 +1,3 @@
+--extra-index-url https://download.pytorch.org/whl/cu118
+torch
+torchaudio
\ No newline at end of file
diff --git a/backend/python/bark/requirements-cublas12.txt b/backend/python/bark/requirements-cublas12.txt
new file mode 100644
index 00000000..6c3c7e7a
--- /dev/null
+++ b/backend/python/bark/requirements-cublas12.txt
@@ -0,0 +1,2 @@
+torch
+torchaudio
\ No newline at end of file
diff --git a/backend/python/common/libbackend.sh b/backend/python/common/libbackend.sh
index e8dfea03..7287fb95 100644
--- a/backend/python/common/libbackend.sh
+++ b/backend/python/common/libbackend.sh
@@ -122,6 +122,13 @@ function installRequirements() {
         requirementFiles+=("${MY_DIR}/requirements-${BUILD_PROFILE}.txt")
     fi
 
+    # if BUILD_TYPE is empty, we are a CPU build, so we should try to install the CPU requirements
+    if [ "x${BUILD_TYPE}" == "x" ]; then
+        requirementFiles+=("${MY_DIR}/requirements-cpu.txt")
+    fi
+
+    requirementFiles+=("${MY_DIR}/requirements-after.txt")
+
     for reqFile in ${requirementFiles[@]}; do
         if [ -f ${reqFile} ]; then
             echo "starting requirements install for ${reqFile}"
diff --git a/backend/python/coqui/requirements-cublas11.txt b/backend/python/coqui/requirements-cublas11.txt
new file mode 100644
index 00000000..0de92979
--- /dev/null
+++ b/backend/python/coqui/requirements-cublas11.txt
@@ -0,0 +1,3 @@
+--extra-index-url https://download.pytorch.org/whl/cu118
+torch
+torchaudio
\ No newline at end of file
diff --git a/backend/python/coqui/requirements-cublas12.txt b/backend/python/coqui/requirements-cublas12.txt
new file mode 100644
index 00000000..6c3c7e7a
--- /dev/null
+++ b/backend/python/coqui/requirements-cublas12.txt
@@ -0,0 +1,2 @@
+torch
+torchaudio
\ No newline at end of file
diff --git a/backend/python/diffusers/requirements-cublas11.txt b/backend/python/diffusers/requirements-cublas11.txt
new file mode 100644
index 00000000..6461b696
--- /dev/null
+++ b/backend/python/diffusers/requirements-cublas11.txt
@@ -0,0 +1,2 @@
+--extra-index-url https://download.pytorch.org/whl/cu118
+torch
diff --git a/backend/python/diffusers/requirements-cublas12.txt b/backend/python/diffusers/requirements-cublas12.txt
new file mode 100644
index 00000000..12c6d5d5
--- /dev/null
+++ b/backend/python/diffusers/requirements-cublas12.txt
@@ -0,0 +1 @@
+torch
diff --git a/backend/python/diffusers/requirements.txt b/backend/python/diffusers/requirements.txt
index 6f04d677..ea707bb7 100644
--- a/backend/python/diffusers/requirements.txt
+++ b/backend/python/diffusers/requirements.txt
@@ -8,6 +8,5 @@ opencv-python
 pillow
 protobuf
 sentencepiece
-torch
 transformers
 certifi
diff --git a/backend/python/exllama/requirements-cublas11.txt b/backend/python/exllama/requirements-cublas11.txt
new file mode 100644
index 00000000..6461b696
--- /dev/null
+++ b/backend/python/exllama/requirements-cublas11.txt
@@ -0,0 +1,2 @@
+--extra-index-url https://download.pytorch.org/whl/cu118
+torch
diff --git a/backend/python/exllama/requirements-cublas12.txt b/backend/python/exllama/requirements-cublas12.txt
new file mode 100644
index 00000000..12c6d5d5
--- /dev/null
+++ b/backend/python/exllama/requirements-cublas12.txt
@@ -0,0 +1 @@
+torch
diff --git a/backend/python/exllama/requirements.txt b/backend/python/exllama/requirements.txt
index 2aab2631..b06efcea 100644
--- a/backend/python/exllama/requirements.txt
+++ b/backend/python/exllama/requirements.txt
@@ -1,6 +1,5 @@
 grpcio==1.65.0
 protobuf
-torch
 transformers
 certifi
 setuptools
\ No newline at end of file
diff --git a/backend/python/exllama2/requirements-cublas11.txt b/backend/python/exllama2/requirements-cublas11.txt
new file mode 100644
index 00000000..6461b696
--- /dev/null
+++ b/backend/python/exllama2/requirements-cublas11.txt
@@ -0,0 +1,2 @@
+--extra-index-url https://download.pytorch.org/whl/cu118
+torch
diff --git a/backend/python/exllama2/requirements-cublas12.txt b/backend/python/exllama2/requirements-cublas12.txt
new file mode 100644
index 00000000..12c6d5d5
--- /dev/null
+++ b/backend/python/exllama2/requirements-cublas12.txt
@@ -0,0 +1 @@
+torch
diff --git a/backend/python/exllama2/requirements.txt b/backend/python/exllama2/requirements.txt
index 6aae273c..f2dfa976 100644
--- a/backend/python/exllama2/requirements.txt
+++ b/backend/python/exllama2/requirements.txt
@@ -2,6 +2,5 @@ accelerate
 grpcio==1.65.1
 protobuf
 certifi
-torch
 wheel
 setuptools
\ No newline at end of file
diff --git a/backend/python/mamba/requirements-after.txt b/backend/python/mamba/requirements-after.txt
new file mode 100644
index 00000000..ea6890eb
--- /dev/null
+++ b/backend/python/mamba/requirements-after.txt
@@ -0,0 +1,2 @@
+causal-conv1d==1.4.0
+mamba-ssm==2.2.2
\ No newline at end of file
diff --git a/backend/python/mamba/requirements-cpu.txt b/backend/python/mamba/requirements-cpu.txt
new file mode 100644
index 00000000..08ed5eeb
--- /dev/null
+++ b/backend/python/mamba/requirements-cpu.txt
@@ -0,0 +1 @@
+torch
\ No newline at end of file
diff --git a/backend/python/mamba/requirements-cublas11.txt b/backend/python/mamba/requirements-cublas11.txt
new file mode 100644
index 00000000..2f89bd95
--- /dev/null
+++ b/backend/python/mamba/requirements-cublas11.txt
@@ -0,0 +1,2 @@
+--extra-index-url https://download.pytorch.org/whl/cu118
+torch
\ No newline at end of file
diff --git a/backend/python/mamba/requirements-cublas12.txt b/backend/python/mamba/requirements-cublas12.txt
new file mode 100644
index 00000000..08ed5eeb
--- /dev/null
+++ b/backend/python/mamba/requirements-cublas12.txt
@@ -0,0 +1 @@
+torch
\ No newline at end of file
diff --git a/backend/python/mamba/requirements-install.txt b/backend/python/mamba/requirements-install.txt
index 2fc9a07c..69d263f0 100644
--- a/backend/python/mamba/requirements-install.txt
+++ b/backend/python/mamba/requirements-install.txt
@@ -3,5 +3,4 @@
 # https://github.com/Dao-AILab/causal-conv1d/issues/24
 packaging
 setuptools
-wheel
-torch==2.3.1
\ No newline at end of file
+wheel
\ No newline at end of file
diff --git a/backend/python/mamba/requirements.txt b/backend/python/mamba/requirements.txt
index 2aac2cda..068bf336 100644
--- a/backend/python/mamba/requirements.txt
+++ b/backend/python/mamba/requirements.txt
@@ -1,5 +1,3 @@
-causal-conv1d==1.4.0
-mamba-ssm==2.2.2
 grpcio==1.65.1
 protobuf
 certifi
diff --git a/backend/python/openvoice/requirements-cublas11.txt b/backend/python/openvoice/requirements-cublas11.txt
new file mode 100644
index 00000000..6461b696
--- /dev/null
+++ b/backend/python/openvoice/requirements-cublas11.txt
@@ -0,0 +1,2 @@
+--extra-index-url https://download.pytorch.org/whl/cu118
+torch
diff --git a/backend/python/openvoice/requirements-cublas12.txt b/backend/python/openvoice/requirements-cublas12.txt
new file mode 100644
index 00000000..12c6d5d5
--- /dev/null
+++ b/backend/python/openvoice/requirements-cublas12.txt
@@ -0,0 +1 @@
+torch
diff --git a/backend/python/parler-tts/requirements-cublas11.txt b/backend/python/parler-tts/requirements-cublas11.txt
new file mode 100644
index 00000000..0de92979
--- /dev/null
+++ b/backend/python/parler-tts/requirements-cublas11.txt
@@ -0,0 +1,3 @@
+--extra-index-url https://download.pytorch.org/whl/cu118
+torch
+torchaudio
\ No newline at end of file
diff --git a/backend/python/parler-tts/requirements-cublas12.txt b/backend/python/parler-tts/requirements-cublas12.txt
new file mode 100644
index 00000000..6c3c7e7a
--- /dev/null
+++ b/backend/python/parler-tts/requirements-cublas12.txt
@@ -0,0 +1,2 @@
+torch
+torchaudio
\ No newline at end of file
diff --git a/backend/python/parler-tts/requirements.txt b/backend/python/parler-tts/requirements.txt
index 147cad9a..1dfa6675 100644
--- a/backend/python/parler-tts/requirements.txt
+++ b/backend/python/parler-tts/requirements.txt
@@ -1,7 +1,6 @@
 accelerate
 grpcio==1.65.1
 protobuf
-torch
 git+https://github.com/huggingface/parler-tts.git@10016fb0300c0dc31a0fb70e26f3affee7b62f16
 certifi
 transformers
\ No newline at end of file
diff --git a/backend/python/petals/requirements-cublas11.txt b/backend/python/petals/requirements-cublas11.txt
new file mode 100644
index 00000000..6461b696
--- /dev/null
+++ b/backend/python/petals/requirements-cublas11.txt
@@ -0,0 +1,2 @@
+--extra-index-url https://download.pytorch.org/whl/cu118
+torch
diff --git a/backend/python/petals/requirements-cublas12.txt b/backend/python/petals/requirements-cublas12.txt
new file mode 100644
index 00000000..12c6d5d5
--- /dev/null
+++ b/backend/python/petals/requirements-cublas12.txt
@@ -0,0 +1 @@
+torch
diff --git a/backend/python/rerankers/requirements-cublas11.txt b/backend/python/rerankers/requirements-cublas11.txt
new file mode 100644
index 00000000..6461b696
--- /dev/null
+++ b/backend/python/rerankers/requirements-cublas11.txt
@@ -0,0 +1,2 @@
+--extra-index-url https://download.pytorch.org/whl/cu118
+torch
diff --git a/backend/python/rerankers/requirements-cublas12.txt b/backend/python/rerankers/requirements-cublas12.txt
new file mode 100644
index 00000000..12c6d5d5
--- /dev/null
+++ b/backend/python/rerankers/requirements-cublas12.txt
@@ -0,0 +1 @@
+torch
diff --git a/backend/python/sentencetransformers/requirements-cublas11.txt b/backend/python/sentencetransformers/requirements-cublas11.txt
new file mode 100644
index 00000000..6461b696
--- /dev/null
+++ b/backend/python/sentencetransformers/requirements-cublas11.txt
@@ -0,0 +1,2 @@
+--extra-index-url https://download.pytorch.org/whl/cu118
+torch
diff --git a/backend/python/sentencetransformers/requirements-cublas12.txt b/backend/python/sentencetransformers/requirements-cublas12.txt
new file mode 100644
index 00000000..12c6d5d5
--- /dev/null
+++ b/backend/python/sentencetransformers/requirements-cublas12.txt
@@ -0,0 +1 @@
+torch
diff --git a/backend/python/transformers-musicgen/requirements-cublas11.txt b/backend/python/transformers-musicgen/requirements-cublas11.txt
new file mode 100644
index 00000000..6461b696
--- /dev/null
+++ b/backend/python/transformers-musicgen/requirements-cublas11.txt
@@ -0,0 +1,2 @@
+--extra-index-url https://download.pytorch.org/whl/cu118
+torch
diff --git a/backend/python/transformers-musicgen/requirements-cublas12.txt b/backend/python/transformers-musicgen/requirements-cublas12.txt
new file mode 100644
index 00000000..12c6d5d5
--- /dev/null
+++ b/backend/python/transformers-musicgen/requirements-cublas12.txt
@@ -0,0 +1 @@
+torch
diff --git a/backend/python/transformers-musicgen/requirements.txt b/backend/python/transformers-musicgen/requirements.txt
index 8ffa3c31..ac758034 100644
--- a/backend/python/transformers-musicgen/requirements.txt
+++ b/backend/python/transformers-musicgen/requirements.txt
@@ -2,6 +2,5 @@ accelerate
 transformers
 grpcio==1.65.1
 protobuf
-torch
 scipy==1.14.0
 certifi
\ No newline at end of file
diff --git a/backend/python/transformers/requirements-cublas11.txt b/backend/python/transformers/requirements-cublas11.txt
new file mode 100644
index 00000000..6461b696
--- /dev/null
+++ b/backend/python/transformers/requirements-cublas11.txt
@@ -0,0 +1,2 @@
+--extra-index-url https://download.pytorch.org/whl/cu118
+torch
diff --git a/backend/python/transformers/requirements-cublas12.txt b/backend/python/transformers/requirements-cublas12.txt
new file mode 100644
index 00000000..12c6d5d5
--- /dev/null
+++ b/backend/python/transformers/requirements-cublas12.txt
@@ -0,0 +1 @@
+torch
diff --git a/backend/python/transformers/requirements.txt b/backend/python/transformers/requirements.txt
index 55925b32..c32fe1f8 100644
--- a/backend/python/transformers/requirements.txt
+++ b/backend/python/transformers/requirements.txt
@@ -2,7 +2,6 @@ accelerate
 transformers
 grpcio==1.65.1
 protobuf
-torch
 certifi
 intel-extension-for-transformers
 bitsandbytes
diff --git a/backend/python/vall-e-x/requirements-cublas11.txt b/backend/python/vall-e-x/requirements-cublas11.txt
new file mode 100644
index 00000000..0de92979
--- /dev/null
+++ b/backend/python/vall-e-x/requirements-cublas11.txt
@@ -0,0 +1,3 @@
+--extra-index-url https://download.pytorch.org/whl/cu118
+torch
+torchaudio
\ No newline at end of file
diff --git a/backend/python/vall-e-x/requirements-cublas12.txt b/backend/python/vall-e-x/requirements-cublas12.txt
new file mode 100644
index 00000000..6c3c7e7a
--- /dev/null
+++ b/backend/python/vall-e-x/requirements-cublas12.txt
@@ -0,0 +1,2 @@
+torch
+torchaudio
\ No newline at end of file
diff --git a/backend/python/vllm/requirements-cublas.txt b/backend/python/vllm/requirements-cublas.txt
deleted file mode 100644
index 7bfe8efe..00000000
--- a/backend/python/vllm/requirements-cublas.txt
+++ /dev/null
@@ -1 +0,0 @@
-flash-attn
\ No newline at end of file
diff --git a/backend/python/vllm/requirements-cublas11.txt b/backend/python/vllm/requirements-cublas11.txt
new file mode 100644
index 00000000..bed8cea8
--- /dev/null
+++ b/backend/python/vllm/requirements-cublas11.txt
@@ -0,0 +1,3 @@
+--extra-index-url https://download.pytorch.org/whl/cu118
+torch
+flash-attn
\ No newline at end of file
diff --git a/backend/python/vllm/requirements-cublas12.txt b/backend/python/vllm/requirements-cublas12.txt
new file mode 100644
index 00000000..b6fef4d7
--- /dev/null
+++ b/backend/python/vllm/requirements-cublas12.txt
@@ -0,0 +1,2 @@
+torch
+flash-attn
\ No newline at end of file

From 42fe864cb463d799c388b15f71b82644a59ea1a6 Mon Sep 17 00:00:00 2001
From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com>
Date: Mon, 5 Aug 2024 21:32:10 +0000
Subject: [PATCH 083/235] chore(deps): Bump grpcio from 1.65.1 to 1.65.4 in
 /backend/python/autogptq (#3130)

chore(deps): Bump grpcio in /backend/python/autogptq

Bumps [grpcio](https://github.com/grpc/grpc) from 1.65.1 to 1.65.4.
- [Release notes](https://github.com/grpc/grpc/releases)
- [Changelog](https://github.com/grpc/grpc/blob/master/doc/grpc_release_schedule.md)
- [Commits](https://github.com/grpc/grpc/compare/v1.65.1...v1.65.4)

---
updated-dependencies:
- dependency-name: grpcio
  dependency-type: direct:production
  update-type: version-update:semver-patch
...

Signed-off-by: dependabot[bot] <support@github.com>
Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>
---
 backend/python/autogptq/requirements.txt | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/backend/python/autogptq/requirements.txt b/backend/python/autogptq/requirements.txt
index 078c015f..53946f23 100644
--- a/backend/python/autogptq/requirements.txt
+++ b/backend/python/autogptq/requirements.txt
@@ -1,6 +1,6 @@
 accelerate
 auto-gptq==0.7.1
-grpcio==1.65.1
+grpcio==1.65.4
 protobuf
 certifi
 transformers
\ No newline at end of file

From 094a6fccd8695a05f056fd8585f86f35c27726c9 Mon Sep 17 00:00:00 2001
From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com>
Date: Mon, 5 Aug 2024 21:35:07 +0000
Subject: [PATCH 084/235] chore(deps): Bump grpcio from 1.65.1 to 1.65.4 in
 /backend/python/common/template (#3131)

chore(deps): Bump grpcio in /backend/python/common/template

Bumps [grpcio](https://github.com/grpc/grpc) from 1.65.1 to 1.65.4.
- [Release notes](https://github.com/grpc/grpc/releases)
- [Changelog](https://github.com/grpc/grpc/blob/master/doc/grpc_release_schedule.md)
- [Commits](https://github.com/grpc/grpc/compare/v1.65.1...v1.65.4)

---
updated-dependencies:
- dependency-name: grpcio
  dependency-type: direct:production
  update-type: version-update:semver-patch
...

Signed-off-by: dependabot[bot] <support@github.com>
Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>
---
 backend/python/common/template/requirements.txt | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/backend/python/common/template/requirements.txt b/backend/python/common/template/requirements.txt
index 8d1e3151..ad97e2ae 100644
--- a/backend/python/common/template/requirements.txt
+++ b/backend/python/common/template/requirements.txt
@@ -1,2 +1,2 @@
-grpcio==1.65.1
+grpcio==1.65.4
 protobuf
\ No newline at end of file

From 55318cca0f881df00db3573c209c4260072875c1 Mon Sep 17 00:00:00 2001
From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com>
Date: Mon, 5 Aug 2024 21:37:47 +0000
Subject: [PATCH 085/235] chore(deps): Bump langchain from 0.2.10 to 0.2.12 in
 /examples/functions (#3132)

Bumps [langchain](https://github.com/langchain-ai/langchain) from 0.2.10 to 0.2.12.
- [Release notes](https://github.com/langchain-ai/langchain/releases)
- [Commits](https://github.com/langchain-ai/langchain/compare/langchain==0.2.10...langchain==0.2.12)

---
updated-dependencies:
- dependency-name: langchain
  dependency-type: direct:production
  update-type: version-update:semver-patch
...

Signed-off-by: dependabot[bot] <support@github.com>
Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>
---
 examples/functions/requirements.txt | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/examples/functions/requirements.txt b/examples/functions/requirements.txt
index f8afacdc..27bb9881 100644
--- a/examples/functions/requirements.txt
+++ b/examples/functions/requirements.txt
@@ -1,2 +1,2 @@
-langchain==0.2.10
+langchain==0.2.12
 openai==1.37.0

From 62176de6d2add590fde4c39cf6af27f08a1d35e6 Mon Sep 17 00:00:00 2001
From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com>
Date: Mon, 5 Aug 2024 22:13:59 +0000
Subject: [PATCH 086/235] chore(deps): Bump grpcio from 1.65.1 to 1.65.4 in
 /backend/python/openvoice (#3137)

chore(deps): Bump grpcio in /backend/python/openvoice

Bumps [grpcio](https://github.com/grpc/grpc) from 1.65.1 to 1.65.4.
- [Release notes](https://github.com/grpc/grpc/releases)
- [Changelog](https://github.com/grpc/grpc/blob/master/doc/grpc_release_schedule.md)
- [Commits](https://github.com/grpc/grpc/compare/v1.65.1...v1.65.4)

---
updated-dependencies:
- dependency-name: grpcio
  dependency-type: direct:production
  update-type: version-update:semver-patch
...

Signed-off-by: dependabot[bot] <support@github.com>
Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>
---
 backend/python/openvoice/requirements-intel.txt | 2 +-
 backend/python/openvoice/requirements.txt       | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/backend/python/openvoice/requirements-intel.txt b/backend/python/openvoice/requirements-intel.txt
index bad088a9..85618c86 100644
--- a/backend/python/openvoice/requirements-intel.txt
+++ b/backend/python/openvoice/requirements-intel.txt
@@ -2,7 +2,7 @@
 intel-extension-for-pytorch
 torch
 optimum[openvino]
-grpcio==1.65.1
+grpcio==1.65.4
 protobuf
 librosa==0.9.1
 faster-whisper==1.0.3
diff --git a/backend/python/openvoice/requirements.txt b/backend/python/openvoice/requirements.txt
index 86d16ec2..cc40adbc 100644
--- a/backend/python/openvoice/requirements.txt
+++ b/backend/python/openvoice/requirements.txt
@@ -1,4 +1,4 @@
-grpcio==1.65.1
+grpcio==1.65.4
 protobuf
 librosa
 faster-whisper

From 1c0bbb92b27790ff14a1e1e239eddbb380984235 Mon Sep 17 00:00:00 2001
From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com>
Date: Mon, 5 Aug 2024 22:27:49 +0000
Subject: [PATCH 087/235] chore(deps): Bump grpcio from 1.65.1 to 1.65.4 in
 /backend/python/coqui (#3138)

Bumps [grpcio](https://github.com/grpc/grpc) from 1.65.1 to 1.65.4.
- [Release notes](https://github.com/grpc/grpc/releases)
- [Changelog](https://github.com/grpc/grpc/blob/master/doc/grpc_release_schedule.md)
- [Commits](https://github.com/grpc/grpc/compare/v1.65.1...v1.65.4)

---
updated-dependencies:
- dependency-name: grpcio
  dependency-type: direct:production
  update-type: version-update:semver-patch
...

Signed-off-by: dependabot[bot] <support@github.com>
Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>
---
 backend/python/coqui/requirements.txt | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/backend/python/coqui/requirements.txt b/backend/python/coqui/requirements.txt
index e1cddaa3..a1bdac44 100644
--- a/backend/python/coqui/requirements.txt
+++ b/backend/python/coqui/requirements.txt
@@ -1,6 +1,6 @@
 accelerate
 TTS==0.22.0
-grpcio==1.65.1
+grpcio==1.65.4
 protobuf
 certifi
 transformers
\ No newline at end of file

From 4c31e4567a069de3522d8685ea984e31f85cd108 Mon Sep 17 00:00:00 2001
From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com>
Date: Mon, 5 Aug 2024 22:30:08 +0000
Subject: [PATCH 088/235] chore(deps): Bump grpcio from 1.65.1 to 1.65.4 in
 /backend/python/transformers-musicgen (#3140)

chore(deps): Bump grpcio in /backend/python/transformers-musicgen

Bumps [grpcio](https://github.com/grpc/grpc) from 1.65.1 to 1.65.4.
- [Release notes](https://github.com/grpc/grpc/releases)
- [Changelog](https://github.com/grpc/grpc/blob/master/doc/grpc_release_schedule.md)
- [Commits](https://github.com/grpc/grpc/compare/v1.65.1...v1.65.4)

---
updated-dependencies:
- dependency-name: grpcio
  dependency-type: direct:production
  update-type: version-update:semver-patch
...

Signed-off-by: dependabot[bot] <support@github.com>
Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>
---
 backend/python/transformers-musicgen/requirements.txt | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/backend/python/transformers-musicgen/requirements.txt b/backend/python/transformers-musicgen/requirements.txt
index ac758034..bec86241 100644
--- a/backend/python/transformers-musicgen/requirements.txt
+++ b/backend/python/transformers-musicgen/requirements.txt
@@ -1,6 +1,6 @@
 accelerate
 transformers
-grpcio==1.65.1
+grpcio==1.65.4
 protobuf
 scipy==1.14.0
 certifi
\ No newline at end of file

From dc38b1f71ef93dff1d8ccdb28629859bf32bf30a Mon Sep 17 00:00:00 2001
From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com>
Date: Mon, 5 Aug 2024 23:27:07 +0000
Subject: [PATCH 089/235] chore(deps): Bump grpcio from 1.65.1 to 1.65.4 in
 /backend/python/diffusers (#3141)

chore(deps): Bump grpcio in /backend/python/diffusers

Bumps [grpcio](https://github.com/grpc/grpc) from 1.65.1 to 1.65.4.
- [Release notes](https://github.com/grpc/grpc/releases)
- [Changelog](https://github.com/grpc/grpc/blob/master/doc/grpc_release_schedule.md)
- [Commits](https://github.com/grpc/grpc/compare/v1.65.1...v1.65.4)

---
updated-dependencies:
- dependency-name: grpcio
  dependency-type: direct:production
  update-type: version-update:semver-patch
...

Signed-off-by: dependabot[bot] <support@github.com>
Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>
---
 backend/python/diffusers/requirements.txt | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/backend/python/diffusers/requirements.txt b/backend/python/diffusers/requirements.txt
index ea707bb7..9919b20a 100644
--- a/backend/python/diffusers/requirements.txt
+++ b/backend/python/diffusers/requirements.txt
@@ -3,7 +3,7 @@ accelerate
 compel
 peft
 diffusers
-grpcio==1.65.1
+grpcio==1.65.4
 opencv-python
 pillow
 protobuf

From 22ffe1a0833113c57d979c55a23c94f3d3c02e87 Mon Sep 17 00:00:00 2001
From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com>
Date: Tue, 6 Aug 2024 00:15:54 +0000
Subject: [PATCH 090/235] chore(deps): Bump llama-index from 0.10.56 to 0.10.59
 in /examples/chainlit (#3143)

chore(deps): Bump llama-index in /examples/chainlit

Bumps [llama-index](https://github.com/run-llama/llama_index) from 0.10.56 to 0.10.59.
- [Release notes](https://github.com/run-llama/llama_index/releases)
- [Changelog](https://github.com/run-llama/llama_index/blob/main/CHANGELOG.md)
- [Commits](https://github.com/run-llama/llama_index/compare/v0.10.56...v0.10.59)

---
updated-dependencies:
- dependency-name: llama-index
  dependency-type: direct:production
  update-type: version-update:semver-patch
...

Signed-off-by: dependabot[bot] <support@github.com>
Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>
---
 examples/chainlit/requirements.txt | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/examples/chainlit/requirements.txt b/examples/chainlit/requirements.txt
index 13415f11..52e2b8a2 100644
--- a/examples/chainlit/requirements.txt
+++ b/examples/chainlit/requirements.txt
@@ -1,4 +1,4 @@
-llama_index==0.10.56
+llama_index==0.10.59
 requests==2.32.3
 weaviate_client==4.6.7
 transformers

From 57c96fe05e8eeee80e049be5ab738df78a79f670 Mon Sep 17 00:00:00 2001
From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com>
Date: Tue, 6 Aug 2024 00:46:41 +0000
Subject: [PATCH 091/235] chore(deps): Bump docs/themes/hugo-theme-relearn from
 `7aec99b` to `8b14837` (#3142)

chore(deps): Bump docs/themes/hugo-theme-relearn

Bumps [docs/themes/hugo-theme-relearn](https://github.com/McShelby/hugo-theme-relearn) from `7aec99b` to `8b14837`.
- [Release notes](https://github.com/McShelby/hugo-theme-relearn/releases)
- [Commits](https://github.com/McShelby/hugo-theme-relearn/compare/7aec99b38dc2668c6139bf71855535ace41c123c...8b148373366a643684eaa4b3fc5f8cfc4f9d4341)

---
updated-dependencies:
- dependency-name: docs/themes/hugo-theme-relearn
  dependency-type: direct:production
...

Signed-off-by: dependabot[bot] <support@github.com>
Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>
---
 docs/themes/hugo-theme-relearn | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/docs/themes/hugo-theme-relearn b/docs/themes/hugo-theme-relearn
index 7aec99b3..8b148373 160000
--- a/docs/themes/hugo-theme-relearn
+++ b/docs/themes/hugo-theme-relearn
@@ -1 +1 @@
-Subproject commit 7aec99b38dc2668c6139bf71855535ace41c123c
+Subproject commit 8b148373366a643684eaa4b3fc5f8cfc4f9d4341

From 30916e8eec27142497efe92130a45b3ada05a0e8 Mon Sep 17 00:00:00 2001
From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com>
Date: Tue, 6 Aug 2024 01:08:38 +0000
Subject: [PATCH 092/235] chore(deps): Bump grpcio from 1.65.1 to 1.65.4 in
 /backend/python/exllama2 (#3146)

chore(deps): Bump grpcio in /backend/python/exllama2

Bumps [grpcio](https://github.com/grpc/grpc) from 1.65.1 to 1.65.4.
- [Release notes](https://github.com/grpc/grpc/releases)
- [Changelog](https://github.com/grpc/grpc/blob/master/doc/grpc_release_schedule.md)
- [Commits](https://github.com/grpc/grpc/compare/v1.65.1...v1.65.4)

---
updated-dependencies:
- dependency-name: grpcio
  dependency-type: direct:production
  update-type: version-update:semver-patch
...

Signed-off-by: dependabot[bot] <support@github.com>
Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>
---
 backend/python/exllama2/requirements.txt | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/backend/python/exllama2/requirements.txt b/backend/python/exllama2/requirements.txt
index f2dfa976..487d89a9 100644
--- a/backend/python/exllama2/requirements.txt
+++ b/backend/python/exllama2/requirements.txt
@@ -1,5 +1,5 @@
 accelerate
-grpcio==1.65.1
+grpcio==1.65.4
 protobuf
 certifi
 wheel

From f0ed4aff1a0ef3448ca2e0439e49bf4d3bef5292 Mon Sep 17 00:00:00 2001
From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com>
Date: Tue, 6 Aug 2024 01:21:26 +0000
Subject: [PATCH 093/235] chore(deps): Bump grpcio from 1.65.1 to 1.65.4 in
 /backend/python/bark (#3144)

Bumps [grpcio](https://github.com/grpc/grpc) from 1.65.1 to 1.65.4.
- [Release notes](https://github.com/grpc/grpc/releases)
- [Changelog](https://github.com/grpc/grpc/blob/master/doc/grpc_release_schedule.md)
- [Commits](https://github.com/grpc/grpc/compare/v1.65.1...v1.65.4)

---
updated-dependencies:
- dependency-name: grpcio
  dependency-type: direct:production
  update-type: version-update:semver-patch
...

Signed-off-by: dependabot[bot] <support@github.com>
Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>
---
 backend/python/bark/requirements.txt | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/backend/python/bark/requirements.txt b/backend/python/bark/requirements.txt
index d3f9f52b..2e34d5a4 100644
--- a/backend/python/bark/requirements.txt
+++ b/backend/python/bark/requirements.txt
@@ -1,6 +1,6 @@
 accelerate
 bark==0.1.5
-grpcio==1.65.1
+grpcio==1.65.4
 protobuf
 certifi
 transformers
\ No newline at end of file

From a02fb001f9703066ee6faa0743ea6c931ad8f716 Mon Sep 17 00:00:00 2001
From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com>
Date: Tue, 6 Aug 2024 01:44:31 +0000
Subject: [PATCH 094/235] chore(deps): Bump grpcio from 1.65.1 to 1.65.4 in
 /backend/python/rerankers (#3147)

chore(deps): Bump grpcio in /backend/python/rerankers

Bumps [grpcio](https://github.com/grpc/grpc) from 1.65.1 to 1.65.4.
- [Release notes](https://github.com/grpc/grpc/releases)
- [Changelog](https://github.com/grpc/grpc/blob/master/doc/grpc_release_schedule.md)
- [Commits](https://github.com/grpc/grpc/compare/v1.65.1...v1.65.4)

---
updated-dependencies:
- dependency-name: grpcio
  dependency-type: direct:production
  update-type: version-update:semver-patch
...

Signed-off-by: dependabot[bot] <support@github.com>
Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>
---
 backend/python/rerankers/requirements.txt | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/backend/python/rerankers/requirements.txt b/backend/python/rerankers/requirements.txt
index 8b2ad4d0..33166382 100644
--- a/backend/python/rerankers/requirements.txt
+++ b/backend/python/rerankers/requirements.txt
@@ -1,6 +1,6 @@
 accelerate
 rerankers[transformers]
-grpcio==1.65.1
+grpcio==1.65.4
 protobuf
 certifi
 transformers
\ No newline at end of file

From 416aec3db61d352ec06f7e2a7129299845af6e94 Mon Sep 17 00:00:00 2001
From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com>
Date: Tue, 6 Aug 2024 01:45:20 +0000
Subject: [PATCH 095/235] chore(deps): Bump langchain from 0.2.10 to 0.2.12 in
 /examples/langchain-chroma (#3148)

chore(deps): Bump langchain in /examples/langchain-chroma

Bumps [langchain](https://github.com/langchain-ai/langchain) from 0.2.10 to 0.2.12.
- [Release notes](https://github.com/langchain-ai/langchain/releases)
- [Commits](https://github.com/langchain-ai/langchain/compare/langchain==0.2.10...langchain==0.2.12)

---
updated-dependencies:
- dependency-name: langchain
  dependency-type: direct:production
  update-type: version-update:semver-patch
...

Signed-off-by: dependabot[bot] <support@github.com>
Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>
---
 examples/langchain-chroma/requirements.txt | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/examples/langchain-chroma/requirements.txt b/examples/langchain-chroma/requirements.txt
index 50d6dc4f..f9c41621 100644
--- a/examples/langchain-chroma/requirements.txt
+++ b/examples/langchain-chroma/requirements.txt
@@ -1,4 +1,4 @@
-langchain==0.2.10
+langchain==0.2.12
 openai==1.37.0
 chromadb==0.5.5
 llama-index==0.10.56
\ No newline at end of file

From 9818d2d1e1fd91e6d03b5003639df1de67dfd6d1 Mon Sep 17 00:00:00 2001
From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com>
Date: Tue, 6 Aug 2024 02:25:17 +0000
Subject: [PATCH 096/235] chore(deps): Bump streamlit from 1.37.0 to 1.37.1 in
 /examples/streamlit-bot (#3151)

chore(deps): Bump streamlit in /examples/streamlit-bot

Bumps [streamlit](https://github.com/streamlit/streamlit) from 1.37.0 to 1.37.1.
- [Release notes](https://github.com/streamlit/streamlit/releases)
- [Commits](https://github.com/streamlit/streamlit/compare/1.37.0...1.37.1)

---
updated-dependencies:
- dependency-name: streamlit
  dependency-type: direct:production
  update-type: version-update:semver-patch
...

Signed-off-by: dependabot[bot] <support@github.com>
Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>
---
 examples/streamlit-bot/requirements.txt | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/examples/streamlit-bot/requirements.txt b/examples/streamlit-bot/requirements.txt
index 63291928..17e1bee0 100644
--- a/examples/streamlit-bot/requirements.txt
+++ b/examples/streamlit-bot/requirements.txt
@@ -1,2 +1,2 @@
-streamlit==1.37.0
+streamlit==1.37.1
 requests
\ No newline at end of file

From e1e221b6e54d45ead5472e0f904fa989d734b23e Mon Sep 17 00:00:00 2001
From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com>
Date: Tue, 6 Aug 2024 03:12:15 +0000
Subject: [PATCH 097/235] chore(deps): Bump grpcio from 1.65.1 to 1.65.4 in
 /backend/python/vllm (#3152)

Bumps [grpcio](https://github.com/grpc/grpc) from 1.65.1 to 1.65.4.
- [Release notes](https://github.com/grpc/grpc/releases)
- [Changelog](https://github.com/grpc/grpc/blob/master/doc/grpc_release_schedule.md)
- [Commits](https://github.com/grpc/grpc/compare/v1.65.1...v1.65.4)

---
updated-dependencies:
- dependency-name: grpcio
  dependency-type: direct:production
  update-type: version-update:semver-patch
...

Signed-off-by: dependabot[bot] <support@github.com>
Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>
---
 backend/python/vllm/requirements.txt | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/backend/python/vllm/requirements.txt b/backend/python/vllm/requirements.txt
index 7c612a2f..b8b79afb 100644
--- a/backend/python/vllm/requirements.txt
+++ b/backend/python/vllm/requirements.txt
@@ -1,6 +1,6 @@
 accelerate
 vllm
-grpcio==1.65.1
+grpcio==1.65.4
 protobuf
 certifi
 transformers

From de1f010f0195c2ad2fa69309c0b91125880e0fad Mon Sep 17 00:00:00 2001
From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com>
Date: Tue, 6 Aug 2024 04:21:27 +0000
Subject: [PATCH 098/235] chore(deps): Bump langchain from 0.2.11 to 0.2.12 in
 /examples/langchain/langchainpy-localai-example (#3155)

chore(deps): Bump langchain

Bumps [langchain](https://github.com/langchain-ai/langchain) from 0.2.11 to 0.2.12.
- [Release notes](https://github.com/langchain-ai/langchain/releases)
- [Commits](https://github.com/langchain-ai/langchain/compare/langchain==0.2.11...langchain==0.2.12)

---
updated-dependencies:
- dependency-name: langchain
  dependency-type: direct:production
  update-type: version-update:semver-patch
...

Signed-off-by: dependabot[bot] <support@github.com>
Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>
---
 examples/langchain/langchainpy-localai-example/requirements.txt | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/examples/langchain/langchainpy-localai-example/requirements.txt b/examples/langchain/langchainpy-localai-example/requirements.txt
index f29cb78a..40c20afb 100644
--- a/examples/langchain/langchainpy-localai-example/requirements.txt
+++ b/examples/langchain/langchainpy-localai-example/requirements.txt
@@ -10,7 +10,7 @@ debugpy==1.8.2
 frozenlist==1.4.1
 greenlet==3.0.3
 idna==3.7
-langchain==0.2.11
+langchain==0.2.12
 langchain-community==0.2.9
 marshmallow==3.21.3
 marshmallow-enum==1.5.1

From ada35e428e8ed20e67d7778d49d32e99ec1689f1 Mon Sep 17 00:00:00 2001
From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com>
Date: Tue, 6 Aug 2024 04:46:39 +0000
Subject: [PATCH 099/235] chore(deps): Bump grpcio from 1.65.1 to 1.65.4 in
 /backend/python/transformers (#3161)

chore(deps): Bump grpcio in /backend/python/transformers

Bumps [grpcio](https://github.com/grpc/grpc) from 1.65.1 to 1.65.4.
- [Release notes](https://github.com/grpc/grpc/releases)
- [Changelog](https://github.com/grpc/grpc/blob/master/doc/grpc_release_schedule.md)
- [Commits](https://github.com/grpc/grpc/compare/v1.65.1...v1.65.4)

---
updated-dependencies:
- dependency-name: grpcio
  dependency-type: direct:production
  update-type: version-update:semver-patch
...

Signed-off-by: dependabot[bot] <support@github.com>
Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>
---
 backend/python/transformers/requirements.txt | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/backend/python/transformers/requirements.txt b/backend/python/transformers/requirements.txt
index c32fe1f8..2a08ba45 100644
--- a/backend/python/transformers/requirements.txt
+++ b/backend/python/transformers/requirements.txt
@@ -1,6 +1,6 @@
 accelerate
 transformers
-grpcio==1.65.1
+grpcio==1.65.4
 protobuf
 certifi
 intel-extension-for-transformers

From 7bf5cc50b53ed3f686b5959744e5db2e74086f73 Mon Sep 17 00:00:00 2001
From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com>
Date: Tue, 6 Aug 2024 04:50:40 +0000
Subject: [PATCH 100/235] chore(deps): Bump grpcio from 1.65.1 to 1.65.4 in
 /backend/python/vall-e-x (#3156)

chore(deps): Bump grpcio in /backend/python/vall-e-x

Bumps [grpcio](https://github.com/grpc/grpc) from 1.65.1 to 1.65.4.
- [Release notes](https://github.com/grpc/grpc/releases)
- [Changelog](https://github.com/grpc/grpc/blob/master/doc/grpc_release_schedule.md)
- [Commits](https://github.com/grpc/grpc/compare/v1.65.1...v1.65.4)

---
updated-dependencies:
- dependency-name: grpcio
  dependency-type: direct:production
  update-type: version-update:semver-patch
...

Signed-off-by: dependabot[bot] <support@github.com>
Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>
---
 backend/python/vall-e-x/requirements.txt | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/backend/python/vall-e-x/requirements.txt b/backend/python/vall-e-x/requirements.txt
index d1d0583e..ec3584b2 100644
--- a/backend/python/vall-e-x/requirements.txt
+++ b/backend/python/vall-e-x/requirements.txt
@@ -1,4 +1,4 @@
 accelerate
-grpcio==1.65.1
+grpcio==1.65.4
 protobuf
 certifi
\ No newline at end of file

From 77c8152cbf68fd32bdce3100bdd2522c364c9734 Mon Sep 17 00:00:00 2001
From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com>
Date: Tue, 6 Aug 2024 05:42:59 +0000
Subject: [PATCH 101/235] chore(deps): Bump sqlalchemy from 2.0.31 to 2.0.32 in
 /examples/langchain/langchainpy-localai-example (#3157)

chore(deps): Bump sqlalchemy

Bumps [sqlalchemy](https://github.com/sqlalchemy/sqlalchemy) from 2.0.31 to 2.0.32.
- [Release notes](https://github.com/sqlalchemy/sqlalchemy/releases)
- [Changelog](https://github.com/sqlalchemy/sqlalchemy/blob/main/CHANGES.rst)
- [Commits](https://github.com/sqlalchemy/sqlalchemy/commits)

---
updated-dependencies:
- dependency-name: sqlalchemy
  dependency-type: direct:production
  update-type: version-update:semver-patch
...

Signed-off-by: dependabot[bot] <support@github.com>
Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>
---
 examples/langchain/langchainpy-localai-example/requirements.txt | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/examples/langchain/langchainpy-localai-example/requirements.txt b/examples/langchain/langchainpy-localai-example/requirements.txt
index 40c20afb..9d937ad6 100644
--- a/examples/langchain/langchainpy-localai-example/requirements.txt
+++ b/examples/langchain/langchainpy-localai-example/requirements.txt
@@ -24,7 +24,7 @@ packaging>=23.2
 pydantic==2.8.2
 PyYAML==6.0.1
 requests==2.32.3
-SQLAlchemy==2.0.31
+SQLAlchemy==2.0.32
 tenacity==8.5.0
 tqdm==4.66.4
 typing-inspect==0.9.0

From 1494ba13e60ceff754af9afbbb10edf511493e1d Mon Sep 17 00:00:00 2001
From: "LocalAI [bot]" <139863280+localai-bot@users.noreply.github.com>
Date: Tue, 6 Aug 2024 08:59:03 +0200
Subject: [PATCH 102/235] chore: :arrow_up: Update ggerganov/whisper.cpp
 (#3164)

:arrow_up: Update ggerganov/whisper.cpp

Signed-off-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com>
Co-authored-by: mudler <2420543+mudler@users.noreply.github.com>
---
 Makefile | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/Makefile b/Makefile
index f8155e06..9b6552bf 100644
--- a/Makefile
+++ b/Makefile
@@ -20,7 +20,7 @@ RWKV_VERSION?=661e7ae26d442f5cfebd2a0881b44e8c55949ec6
 
 # whisper.cpp version
 WHISPER_REPO?=https://github.com/ggerganov/whisper.cpp
-WHISPER_CPP_VERSION?=6739eb83c3ca5cf40d24c6fe8442a761a1eb6248
+WHISPER_CPP_VERSION?=fe36c909715e6751277ddb020e7892c7670b61d4
 
 # bert.cpp version
 BERT_REPO?=https://github.com/go-skynet/go-bert.cpp

From f9ddc31b77a2c9d06ae1a42ab2c82d8cddf3697a Mon Sep 17 00:00:00 2001
From: Ettore Di Giacinto <mudler@localai.io>
Date: Tue, 6 Aug 2024 09:04:57 +0200
Subject: [PATCH 103/235] ci(bump_deps): attempt to link also commit diff

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
---
 .github/bump_deps.sh             | 13 +++++++++++++
 .github/workflows/bump_deps.yaml |  8 +++++++-
 2 files changed, 20 insertions(+), 1 deletion(-)

diff --git a/.github/bump_deps.sh b/.github/bump_deps.sh
index d8fff4a3..ea730fd9 100755
--- a/.github/bump_deps.sh
+++ b/.github/bump_deps.sh
@@ -6,4 +6,17 @@ VAR=$3
 
 LAST_COMMIT=$(curl -s -H "Accept: application/vnd.github.VERSION.sha" "https://api.github.com/repos/$REPO/commits/$BRANCH")
 
+# Read $VAR from Makefile (only first match)
+set +e
+CURRENT_COMMIT="$(grep -m1 "^$VAR?=" Makefile | cut -d'=' -f2)"
+set -e
+
 sed -i Makefile -e "s/$VAR?=.*/$VAR?=$LAST_COMMIT/"
+
+if [ -z "$CURRENT_COMMIT" ]; then
+    echo "Could not find $VAR in Makefile."
+    exit 0
+fi
+
+echo "Updated $VAR from $CURRENT_COMMIT to $LAST_COMMIT." > "$REPO_message.txt"
+echo "https://github.com/$REPO/compare/$CURRENT_COMMIT..$LAST_COMMIT" >> "$REPO_message.txt"
\ No newline at end of file
diff --git a/.github/workflows/bump_deps.yaml b/.github/workflows/bump_deps.yaml
index 5909c981..b32dc378 100644
--- a/.github/workflows/bump_deps.yaml
+++ b/.github/workflows/bump_deps.yaml
@@ -40,8 +40,14 @@ jobs:
     steps:
       - uses: actions/checkout@v4
       - name: Bump dependencies 🔧
+        id: bump
         run: |
           bash .github/bump_deps.sh ${{ matrix.repository }} ${{ matrix.branch }} ${{ matrix.variable }}
+          {
+            echo 'message<<EOF'
+            cat "${{ matrix.repository }}_message.txt"
+            echo EOF
+          } >> "$GITHUB_OUTPUT"
       - name: Create Pull Request
         uses: peter-evans/create-pull-request@v6
         with:
@@ -50,7 +56,7 @@ jobs:
           commit-message: ':arrow_up: Update ${{ matrix.repository }}'
           title: 'chore: :arrow_up: Update ${{ matrix.repository }}'
           branch: "update/${{ matrix.variable }}"
-          body: Bump of ${{ matrix.repository }} version
+          body:  ${{ steps.bump.outputs.message }}
           signoff: true
 
 
From c53196e19779921632ae38c55b33a5c82c3883de Mon Sep 17 00:00:00 2001
From: Ettore Di Giacinto <mudler@localai.io>
Date: Tue, 6 Aug 2024 09:07:07 +0200
Subject: [PATCH 104/235] ci: use var as placeholder

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
---
 .github/bump_deps.sh             | 4 ++--
 .github/workflows/bump_deps.yaml | 2 +-
 2 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/.github/bump_deps.sh b/.github/bump_deps.sh
index ea730fd9..8c24ce42 100755
--- a/.github/bump_deps.sh
+++ b/.github/bump_deps.sh
@@ -18,5 +18,5 @@ if [ -z "$CURRENT_COMMIT" ]; then
     exit 0
 fi
 
-echo "Updated $VAR from $CURRENT_COMMIT to $LAST_COMMIT." > "$REPO_message.txt"
-echo "https://github.com/$REPO/compare/$CURRENT_COMMIT..$LAST_COMMIT" >> "$REPO_message.txt"
\ No newline at end of file
+echo "Updated $VAR from $CURRENT_COMMIT to $LAST_COMMIT." > "$VAR_message.txt"
+echo "https://github.com/$REPO/compare/$CURRENT_COMMIT..$LAST_COMMIT" >> "$VAR_message.txt"
\ No newline at end of file
diff --git a/.github/workflows/bump_deps.yaml b/.github/workflows/bump_deps.yaml
index b32dc378..08654fac 100644
--- a/.github/workflows/bump_deps.yaml
+++ b/.github/workflows/bump_deps.yaml
@@ -45,7 +45,7 @@ jobs:
           bash .github/bump_deps.sh ${{ matrix.repository }} ${{ matrix.branch }} ${{ matrix.variable }}
           {
             echo 'message<<EOF'
-            cat "${{ matrix.repository }}_message.txt"
+            cat "${{ matrix.variable }}_message.txt"
             echo EOF
           } >> "$GITHUB_OUTPUT"
       - name: Create Pull Request

From 69a2cf06c85a6b0df3bfab1ccd965d19a232175b Mon Sep 17 00:00:00 2001
From: Ettore Di Giacinto <mudler@localai.io>
Date: Tue, 6 Aug 2024 09:08:44 +0200
Subject: [PATCH 105/235] ci: fixups

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
---
 .github/bump_deps.sh | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/.github/bump_deps.sh b/.github/bump_deps.sh
index 8c24ce42..48d3c58b 100755
--- a/.github/bump_deps.sh
+++ b/.github/bump_deps.sh
@@ -18,5 +18,5 @@ if [ -z "$CURRENT_COMMIT" ]; then
     exit 0
 fi
 
-echo "Updated $VAR from $CURRENT_COMMIT to $LAST_COMMIT." > "$VAR_message.txt"
-echo "https://github.com/$REPO/compare/$CURRENT_COMMIT..$LAST_COMMIT" >> "$VAR_message.txt"
\ No newline at end of file
+echo "Updated $VAR from $CURRENT_COMMIT to ${LAST_COMMIT}." > "${VAR}_message.txt"
+echo "https://github.com/$REPO/compare/${CURRENT_COMMIT}..${LAST_COMMIT}" >> "${VAR}_message.txt"
\ No newline at end of file

From d1a222ea8763b7b1ea43c61091fcf60728e19561 Mon Sep 17 00:00:00 2001
From: Ettore Di Giacinto <mudler@localai.io>
Date: Tue, 6 Aug 2024 09:10:24 +0200
Subject: [PATCH 106/235] ci: remove message file

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
---
 .github/workflows/bump_deps.yaml | 1 +
 1 file changed, 1 insertion(+)

diff --git a/.github/workflows/bump_deps.yaml b/.github/workflows/bump_deps.yaml
index 08654fac..0d4c5cd3 100644
--- a/.github/workflows/bump_deps.yaml
+++ b/.github/workflows/bump_deps.yaml
@@ -48,6 +48,7 @@ jobs:
             cat "${{ matrix.variable }}_message.txt"
             echo EOF
           } >> "$GITHUB_OUTPUT"
+          rm -rfv ${{ matrix.variable }}_message.txt
       - name: Create Pull Request
         uses: peter-evans/create-pull-request@v6
         with:

From e03363df3d6137f207c8fcf078c78848b03af150 Mon Sep 17 00:00:00 2001
From: Ettore Di Giacinto <mudler@localai.io>
Date: Tue, 6 Aug 2024 09:12:10 +0200
Subject: [PATCH 107/235] ci: add commit id to title

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
---
 .github/bump_deps.sh             |  4 +++-
 .github/workflows/bump_deps.yaml | 11 ++++++++---
 2 files changed, 11 insertions(+), 4 deletions(-)

diff --git a/.github/bump_deps.sh b/.github/bump_deps.sh
index 48d3c58b..54b1b854 100755
--- a/.github/bump_deps.sh
+++ b/.github/bump_deps.sh
@@ -19,4 +19,6 @@ if [ -z "$CURRENT_COMMIT" ]; then
 fi
 
 echo "Updated $VAR from $CURRENT_COMMIT to ${LAST_COMMIT}." > "${VAR}_message.txt"
-echo "https://github.com/$REPO/compare/${CURRENT_COMMIT}..${LAST_COMMIT}" >> "${VAR}_message.txt"
\ No newline at end of file
+echo "" >> "${VAR}_message.txt"
+echo "Diff URL: https://github.com/$REPO/compare/${CURRENT_COMMIT}..${LAST_COMMIT}" >> "${VAR}_message.txt"
+echo "${LAST_COMMIT}" >> "${VAR}_commit.txt"
\ No newline at end of file
diff --git a/.github/workflows/bump_deps.yaml b/.github/workflows/bump_deps.yaml
index 0d4c5cd3..a79898b1 100644
--- a/.github/workflows/bump_deps.yaml
+++ b/.github/workflows/bump_deps.yaml
@@ -48,16 +48,21 @@ jobs:
             cat "${{ matrix.variable }}_message.txt"
             echo EOF
           } >> "$GITHUB_OUTPUT"
-          rm -rfv ${{ matrix.variable }}_message.txt
+          {
+            echo 'commit<<EOF'
+            cat "${{ matrix.variable }}_commit.txt"
+            echo EOF
+          } >> "$GITHUB_OUTPUT"
+          rm -rfv ${{ matrix.variable }}_commit.txt
       - name: Create Pull Request
         uses: peter-evans/create-pull-request@v6
         with:
           token: ${{ secrets.UPDATE_BOT_TOKEN }}
           push-to-fork: ci-forks/LocalAI
           commit-message: ':arrow_up: Update ${{ matrix.repository }}'
-          title: 'chore: :arrow_up: Update ${{ matrix.repository }}'
+          title: 'chore: :arrow_up: Update ${{ matrix.repository }} to ${{ steps.bump.outputs.commit }}'
           branch: "update/${{ matrix.variable }}"
-          body:  ${{ steps.bump.outputs.message }}
+          body: ${{ steps.bump.outputs.message }}
           signoff: true
 
 
From b3f362f22901721e03fad0f3495bd1afd9aca7b6 Mon Sep 17 00:00:00 2001
From: Ettore Di Giacinto <mudler@localai.io>
Date: Tue, 6 Aug 2024 09:16:17 +0200
Subject: [PATCH 108/235] ci: small fixes

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
---
 .github/bump_deps.sh             | 4 ++--
 .github/workflows/bump_deps.yaml | 3 ++-
 2 files changed, 4 insertions(+), 3 deletions(-)

diff --git a/.github/bump_deps.sh b/.github/bump_deps.sh
index 54b1b854..6ecb81a9 100755
--- a/.github/bump_deps.sh
+++ b/.github/bump_deps.sh
@@ -18,7 +18,7 @@ if [ -z "$CURRENT_COMMIT" ]; then
     exit 0
 fi
 
-echo "Updated $VAR from $CURRENT_COMMIT to ${LAST_COMMIT}." > "${VAR}_message.txt"
+echo "Updated `$VAR` from `$CURRENT_COMMIT` to `${LAST_COMMIT}`." > "${VAR}_message.txt"
 echo "" >> "${VAR}_message.txt"
-echo "Diff URL: https://github.com/$REPO/compare/${CURRENT_COMMIT}..${LAST_COMMIT}" >> "${VAR}_message.txt"
+echo "Changes: https://github.com/$REPO/compare/${CURRENT_COMMIT}..${LAST_COMMIT}" >> "${VAR}_message.txt"
 echo "${LAST_COMMIT}" >> "${VAR}_commit.txt"
\ No newline at end of file
diff --git a/.github/workflows/bump_deps.yaml b/.github/workflows/bump_deps.yaml
index a79898b1..68cb81cb 100644
--- a/.github/workflows/bump_deps.yaml
+++ b/.github/workflows/bump_deps.yaml
@@ -53,6 +53,7 @@ jobs:
             cat "${{ matrix.variable }}_commit.txt"
             echo EOF
           } >> "$GITHUB_OUTPUT"
+          rm -rfv ${{ matrix.variable }}_message.txt
           rm -rfv ${{ matrix.variable }}_commit.txt
       - name: Create Pull Request
         uses: peter-evans/create-pull-request@v6
@@ -60,7 +61,7 @@ jobs:
           token: ${{ secrets.UPDATE_BOT_TOKEN }}
           push-to-fork: ci-forks/LocalAI
           commit-message: ':arrow_up: Update ${{ matrix.repository }}'
-          title: 'chore: :arrow_up: Update ${{ matrix.repository }} to ${{ steps.bump.outputs.commit }}'
+          title: 'chore: :arrow_up: Update ${{ matrix.repository }} to `${{ steps.bump.outputs.commit }}`'
           branch: "update/${{ matrix.variable }}"
           body: ${{ steps.bump.outputs.message }}
           signoff: true

From c8fc92d6d5522ba8ec392c54e4101d2173888a7b Mon Sep 17 00:00:00 2001
From: Ettore Di Giacinto <mudler@localai.io>
Date: Tue, 6 Aug 2024 09:21:37 +0200
Subject: [PATCH 109/235] ci: small fixes

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
---
 .github/bump_deps.sh | 2 --
 1 file changed, 2 deletions(-)

diff --git a/.github/bump_deps.sh b/.github/bump_deps.sh
index 6ecb81a9..66dea9a3 100755
--- a/.github/bump_deps.sh
+++ b/.github/bump_deps.sh
@@ -18,7 +18,5 @@ if [ -z "$CURRENT_COMMIT" ]; then
     exit 0
 fi
 
-echo "Updated `$VAR` from `$CURRENT_COMMIT` to `${LAST_COMMIT}`." > "${VAR}_message.txt"
-echo "" >> "${VAR}_message.txt"
 echo "Changes: https://github.com/$REPO/compare/${CURRENT_COMMIT}..${LAST_COMMIT}" >> "${VAR}_message.txt"
 echo "${LAST_COMMIT}" >> "${VAR}_commit.txt"
\ No newline at end of file

From ecc63454360debd60d81dc94bde0400a5c9499ff Mon Sep 17 00:00:00 2001
From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com>
Date: Tue, 6 Aug 2024 07:53:34 +0000
Subject: [PATCH 110/235] chore(deps): Bump openai from 1.37.0 to 1.39.0 in
 /examples/functions (#3134)

Bumps [openai](https://github.com/openai/openai-python) from 1.37.0 to 1.39.0.
- [Release notes](https://github.com/openai/openai-python/releases)
- [Changelog](https://github.com/openai/openai-python/blob/main/CHANGELOG.md)
- [Commits](https://github.com/openai/openai-python/compare/v1.37.0...v1.39.0)

---
updated-dependencies:
- dependency-name: openai
  dependency-type: direct:production
  update-type: version-update:semver-minor
...

Signed-off-by: dependabot[bot] <support@github.com>
Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>
---
 examples/functions/requirements.txt | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/examples/functions/requirements.txt b/examples/functions/requirements.txt
index 27bb9881..a8a8ca8c 100644
--- a/examples/functions/requirements.txt
+++ b/examples/functions/requirements.txt
@@ -1,2 +1,2 @@
 langchain==0.2.12
-openai==1.37.0
+openai==1.39.0

From 06aa068ac731652a8239caf257c804973700cfe9 Mon Sep 17 00:00:00 2001
From: "LocalAI [bot]" <139863280+localai-bot@users.noreply.github.com>
Date: Tue, 6 Aug 2024 10:27:22 +0200
Subject: [PATCH 111/235] chore(model-gallery): :arrow_up: update checksum
 (#3167)

:arrow_up: Checksum updates in gallery/index.yaml

Signed-off-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com>
Co-authored-by: mudler <2420543+mudler@users.noreply.github.com>
---
 gallery/index.yaml | 38 ++++++++++++++------------------------
 1 file changed, 14 insertions(+), 24 deletions(-)

diff --git a/gallery/index.yaml b/gallery/index.yaml
index c80455a8..0d120e82 100644
--- a/gallery/index.yaml
+++ b/gallery/index.yaml
@@ -85,7 +85,7 @@
   files:
     - filename: meta-llama-3.1-8b-instruct-abliterated.Q4_K_M.gguf
       uri: huggingface://mlabonne/Meta-Llama-3.1-8B-Instruct-abliterated-GGUF/meta-llama-3.1-8b-instruct-abliterated.Q4_K_M.gguf
-      sha256: 2e1fd6d93b19cc6548b2b8ed2d3f1f34b432ee0573f3dcf358bbaab4f23c760b
+      sha256: c4735f9efaba8eb2c30113291652e3ffe13bf940b675ed61f6be749608b4f266
 - !!merge <<: *llama31
   name: "llama-3.1-70b-japanese-instruct-2407"
   urls:
@@ -258,18 +258,18 @@
     - https://huggingface.co/athirdpath/Llama-3.1-Techne-RP-8b-v1
     - https://huggingface.co/mradermacher/Llama-3.1-Techne-RP-8b-v1-GGUF
   description: |
-      athirdpath/Llama-3.1-Instruct_NSFW-pretrained_e1-plus_reddit was further trained in the order below:
-      SFT
+    athirdpath/Llama-3.1-Instruct_NSFW-pretrained_e1-plus_reddit was further trained in the order below:
+    SFT
 
-          Doctor-Shotgun/no-robots-sharegpt
-          grimulkan/LimaRP-augmented
-          Inv/c2-logs-cleaned-deslopped
+        Doctor-Shotgun/no-robots-sharegpt
+        grimulkan/LimaRP-augmented
+        Inv/c2-logs-cleaned-deslopped
 
-      DPO
+    DPO
 
-          jondurbin/truthy-dpo-v0.1
-          Undi95/Weyaxi-humanish-dpo-project-noemoji
-          athirdpath/DPO_Pairs-Roleplay-Llama3-NSFW
+        jondurbin/truthy-dpo-v0.1
+        Undi95/Weyaxi-humanish-dpo-project-noemoji
+        athirdpath/DPO_Pairs-Roleplay-Llama3-NSFW
   overrides:
     parameters:
       model: Llama-3.1-Techne-RP-8b-v1.Q4_K_M.gguf
@@ -911,11 +911,11 @@
     - https://huggingface.co/nothingiisreal/MN-12B-Celeste-V1.9
     - https://huggingface.co/mradermacher/MN-12B-Celeste-V1.9-GGUF
   description: |
-      Mistral Nemo 12B Celeste V1.9
+    Mistral Nemo 12B Celeste V1.9
 
-      This is a story writing and roleplaying model trained on Mistral NeMo 12B Instruct at 8K context using Reddit Writing Prompts, Kalo's Opus 25K Instruct and c2 logs cleaned
+    This is a story writing and roleplaying model trained on Mistral NeMo 12B Instruct at 8K context using Reddit Writing Prompts, Kalo's Opus 25K Instruct and c2 logs cleaned
 
-      This version has improved NSFW, smarter and more active narration. It's also trained with ChatML tokens so there should be no EOS bleeding whatsoever.
+    This version has improved NSFW, smarter and more active narration. It's also trained with ChatML tokens so there should be no EOS bleeding whatsoever.
   overrides:
     parameters:
       model: MN-12B-Celeste-V1.9.Q4_K_M.gguf
@@ -1414,17 +1414,7 @@
   urls:
     - https://huggingface.co/lodrick-the-lafted/tarnished-9b
     - https://huggingface.co/mradermacher/tarnished-9b-i1-GGUF
-  description: |
-    Ah, so you've heard whispers on the winds, have you?  🧐
-
-    Imagine this:
-    Tarnished-9b, a name that echoes with the rasp of coin-hungry merchants and the clatter of forgotten machinery. This LLM speaks with the voice of those who straddle the line between worlds, who've tasted the bittersweet nectar of eldritch power and the tang of the Interdimensional Trade Council.
-
-    It's a tongue that dances with secrets, a whisperer of lore lost and found.  Its words may guide you through the twisting paths of history, revealing truths hidden beneath layers of dust and time.
-
-    But be warned, Tarnished One!  For knowledge comes at a price.  The LLM's gaze can pierce the veil of reality, but it can also lure you into the labyrinthine depths of madness.
-
-    Dare you tread this path?
+  description: "Ah, so you've heard whispers on the winds, have you?  \U0001F9D0\n\nImagine this:\nTarnished-9b, a name that echoes with the rasp of coin-hungry merchants and the clatter of forgotten machinery. This LLM speaks with the voice of those who straddle the line between worlds, who've tasted the bittersweet nectar of eldritch power and the tang of the Interdimensional Trade Council.\n\nIt's a tongue that dances with secrets, a whisperer of lore lost and found.  Its words may guide you through the twisting paths of history, revealing truths hidden beneath layers of dust and time.\n\nBut be warned, Tarnished One!  For knowledge comes at a price.  The LLM's gaze can pierce the veil of reality, but it can also lure you into the labyrinthine depths of madness.\n\nDare you tread this path?\n"
   overrides:
     parameters:
       model: tarnished-9b.i1-Q4_K_M.gguf

From 307ad7592b22a7e80f8bf86f24c31a569698244d Mon Sep 17 00:00:00 2001
From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com>
Date: Tue, 6 Aug 2024 08:47:07 +0000
Subject: [PATCH 112/235] chore(deps): Bump openai from 1.37.0 to 1.39.0 in
 /examples/langchain-chroma (#3149)

chore(deps): Bump openai in /examples/langchain-chroma

Bumps [openai](https://github.com/openai/openai-python) from 1.37.0 to 1.39.0.
- [Release notes](https://github.com/openai/openai-python/releases)
- [Changelog](https://github.com/openai/openai-python/blob/main/CHANGELOG.md)
- [Commits](https://github.com/openai/openai-python/compare/v1.37.0...v1.39.0)

---
updated-dependencies:
- dependency-name: openai
  dependency-type: direct:production
  update-type: version-update:semver-minor
...

Signed-off-by: dependabot[bot] <support@github.com>
Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>
---
 examples/langchain-chroma/requirements.txt | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/examples/langchain-chroma/requirements.txt b/examples/langchain-chroma/requirements.txt
index f9c41621..2b8f8b84 100644
--- a/examples/langchain-chroma/requirements.txt
+++ b/examples/langchain-chroma/requirements.txt
@@ -1,4 +1,4 @@
 langchain==0.2.12
-openai==1.37.0
+openai==1.39.0
 chromadb==0.5.5
 llama-index==0.10.56
\ No newline at end of file

From 52ba230d313cdbd1c9a7a383f9fef2b10858f557 Mon Sep 17 00:00:00 2001
From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com>
Date: Tue, 6 Aug 2024 09:15:32 +0000
Subject: [PATCH 113/235] chore(deps): Bump openai from 1.37.1 to 1.39.0 in
 /examples/langchain/langchainpy-localai-example (#3158)

chore(deps): Bump openai

Bumps [openai](https://github.com/openai/openai-python) from 1.37.1 to 1.39.0.
- [Release notes](https://github.com/openai/openai-python/releases)
- [Changelog](https://github.com/openai/openai-python/blob/main/CHANGELOG.md)
- [Commits](https://github.com/openai/openai-python/compare/v1.37.1...v1.39.0)

---
updated-dependencies:
- dependency-name: openai
  dependency-type: direct:production
  update-type: version-update:semver-minor
...

Signed-off-by: dependabot[bot] <support@github.com>
Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>
---
 examples/langchain/langchainpy-localai-example/requirements.txt | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/examples/langchain/langchainpy-localai-example/requirements.txt b/examples/langchain/langchainpy-localai-example/requirements.txt
index 9d937ad6..1cf7e0a7 100644
--- a/examples/langchain/langchainpy-localai-example/requirements.txt
+++ b/examples/langchain/langchainpy-localai-example/requirements.txt
@@ -18,7 +18,7 @@ multidict==6.0.5
 mypy-extensions==1.0.0
 numexpr==2.10.1
 numpy==2.0.1
-openai==1.37.1
+openai==1.39.0
 openapi-schema-pydantic==1.2.4
 packaging>=23.2
 pydantic==2.8.2

From 4e11ca55fde2a9ceaa7144c2b7103fcbe33eb3b4 Mon Sep 17 00:00:00 2001
From: Ettore Di Giacinto <mudler@users.noreply.github.com>
Date: Tue, 6 Aug 2024 11:39:35 +0200
Subject: [PATCH 114/235] chore: :arrow_up: Update ggerganov/llama.cpp (#3166)

* arrow_up: Update ggerganov/llama.cpp

Signed-off-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com>

* fix(llama.cpp): adapt init function call

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>

---------

Signed-off-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com>
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
Co-authored-by: mudler <2420543+mudler@users.noreply.github.com>
---
 Makefile                          | 2 +-
 backend/cpp/llama/grpc-server.cpp | 4 +++-
 2 files changed, 4 insertions(+), 2 deletions(-)

diff --git a/Makefile b/Makefile
index 9b6552bf..5263e686 100644
--- a/Makefile
+++ b/Makefile
@@ -8,7 +8,7 @@ DETECT_LIBS?=true
 # llama.cpp versions
 GOLLAMA_REPO?=https://github.com/go-skynet/go-llama.cpp
 GOLLAMA_VERSION?=2b57a8ae43e4699d3dc5d1496a1ccd42922993be
-CPPLLAMA_VERSION?=0d6fb52be0c1b7e77eb855f3adc4952771c8ce4c
+CPPLLAMA_VERSION?=0a4ce786814b123096d18aadca89cd352b9e590b
 
 # gpt4all version
 GPT4ALL_REPO?=https://github.com/nomic-ai/gpt4all
diff --git a/backend/cpp/llama/grpc-server.cpp b/backend/cpp/llama/grpc-server.cpp
index cb5c85f1..5de46798 100644
--- a/backend/cpp/llama/grpc-server.cpp
+++ b/backend/cpp/llama/grpc-server.cpp
@@ -458,7 +458,9 @@ struct llama_server_context
             }
         }
 
-        std::tie(model, ctx) = llama_init_from_gpt_params(params);
+        llama_init_result llama_init = llama_init_from_gpt_params(params);
+        model = llama_init.model;
+        ctx = llama_init.context;
         if (model == nullptr)
         {
             LOG_ERROR("unable to load model", {{"model", params.model}});

From ad5978b3cad08e730ff6f9533b173f0b4fdb04cf Mon Sep 17 00:00:00 2001
From: Ettore Di Giacinto <mudler@users.noreply.github.com>
Date: Tue, 6 Aug 2024 11:46:00 +0200
Subject: [PATCH 115/235] models(gallery): add calme-2.2-qwen2-72b (#3185)

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
---
 gallery/index.yaml | 27 +++++++++++++++++++++++++++
 1 file changed, 27 insertions(+)

diff --git a/gallery/index.yaml b/gallery/index.yaml
index 0d120e82..65516cc3 100644
--- a/gallery/index.yaml
+++ b/gallery/index.yaml
@@ -781,6 +781,33 @@
     - filename: tifa-7b-qwen2-v0.1.q4_k_m.gguf
       sha256: 1f5adbe8cb0a6400f51abdca3bf4e32284ebff73cc681a43abb35c0a6ccd3820
       uri: huggingface://Tifa-RP/Tifa-7B-Qwen2-v0.1-GGUF/tifa-7b-qwen2-v0.1.q4_k_m.gguf
+- !!merge <<: *qwen2
+  name: "calme-2.2-qwen2-72b"
+  icon: https://huggingface.co/MaziyarPanahi/calme-2.2-qwen2-72b/resolve/main/calme-2.webp
+  urls:
+    - https://huggingface.co/MaziyarPanahi/calme-2.2-qwen2-72b-GGUF
+    - https://huggingface.co/MaziyarPanahi/calme-2.2-qwen2-72b
+  description: |
+    This model is a fine-tuned version of the powerful Qwen/Qwen2-72B-Instruct, pushing the boundaries of natural language understanding and generation even further. My goal was to create a versatile and robust model that excels across a wide range of benchmarks and real-world applications.
+
+    The post-training process is identical to the calme-2.1-qwen2-72b model; however, some parameters are different, and it was trained for a longer period.
+
+    Use Cases
+
+    This model is suitable for a wide range of applications, including but not limited to:
+
+        Advanced question-answering systems
+        Intelligent chatbots and virtual assistants
+        Content generation and summarization
+        Code generation and analysis
+        Complex problem-solving and decision support
+  overrides:
+    parameters:
+      model: calme-2.2-qwen2-72b.Q4_K_M.gguf
+  files:
+    - filename: calme-2.2-qwen2-72b.Q4_K_M.gguf
+      sha256: 95b9613df0abe6c1b6b7b017d7cc8bcf19b46c29f92a503dcc6da1704b12b402
+      uri: huggingface://MaziyarPanahi/calme-2.2-qwen2-72b-GGUF/calme-2.2-qwen2-72b.Q4_K_M.gguf
 - &mistral03
   ## START Mistral
   url: "github:mudler/LocalAI/gallery/mistral-0.3.yaml@master"

From c3306fe825748b25007f71449466f255949044f2 Mon Sep 17 00:00:00 2001
From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com>
Date: Tue, 6 Aug 2024 09:56:03 +0000
Subject: [PATCH 116/235] chore(deps): Bump tqdm from 4.66.4 to 4.66.5 in
 /examples/langchain/langchainpy-localai-example (#3159)

chore(deps): Bump tqdm

Bumps [tqdm](https://github.com/tqdm/tqdm) from 4.66.4 to 4.66.5.
- [Release notes](https://github.com/tqdm/tqdm/releases)
- [Commits](https://github.com/tqdm/tqdm/compare/v4.66.4...v4.66.5)

---
updated-dependencies:
- dependency-name: tqdm
  dependency-type: direct:production
  update-type: version-update:semver-patch
...

Signed-off-by: dependabot[bot] <support@github.com>
Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>
---
 examples/langchain/langchainpy-localai-example/requirements.txt | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/examples/langchain/langchainpy-localai-example/requirements.txt b/examples/langchain/langchainpy-localai-example/requirements.txt
index 1cf7e0a7..1d1b5023 100644
--- a/examples/langchain/langchainpy-localai-example/requirements.txt
+++ b/examples/langchain/langchainpy-localai-example/requirements.txt
@@ -26,7 +26,7 @@ PyYAML==6.0.1
 requests==2.32.3
 SQLAlchemy==2.0.32
 tenacity==8.5.0
-tqdm==4.66.4
+tqdm==4.66.5
 typing-inspect==0.9.0
 typing_extensions==4.12.2
 urllib3==2.2.2

From 9cfc9ac66f9933e5f915b0ebbb06c2f613bffbcf Mon Sep 17 00:00:00 2001
From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com>
Date: Tue, 6 Aug 2024 11:05:01 +0000
Subject: [PATCH 117/235] chore(deps): Bump llama-index from 0.10.56 to 0.10.61
 in /examples/langchain-chroma (#3168)

chore(deps): Bump llama-index in /examples/langchain-chroma

Bumps [llama-index](https://github.com/run-llama/llama_index) from 0.10.56 to 0.10.61.
- [Release notes](https://github.com/run-llama/llama_index/releases)
- [Changelog](https://github.com/run-llama/llama_index/blob/main/CHANGELOG.md)
- [Commits](https://github.com/run-llama/llama_index/compare/v0.10.56...v0.10.61)

---
updated-dependencies:
- dependency-name: llama-index
  dependency-type: direct:production
  update-type: version-update:semver-patch
...

Signed-off-by: dependabot[bot] <support@github.com>
Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>
---
 examples/langchain-chroma/requirements.txt | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/examples/langchain-chroma/requirements.txt b/examples/langchain-chroma/requirements.txt
index 2b8f8b84..535c6537 100644
--- a/examples/langchain-chroma/requirements.txt
+++ b/examples/langchain-chroma/requirements.txt
@@ -1,4 +1,4 @@
 langchain==0.2.12
 openai==1.39.0
 chromadb==0.5.5
-llama-index==0.10.56
\ No newline at end of file
+llama-index==0.10.61
\ No newline at end of file

From abcf0ff000bc7aa1c5bece337386e6a3dbfccf1d Mon Sep 17 00:00:00 2001
From: Ettore Di Giacinto <mudler@users.noreply.github.com>
Date: Wed, 7 Aug 2024 01:10:21 +0200
Subject: [PATCH 118/235] chore: :arrow_up: Update ggerganov/llama.cpp to
 `1e6f6554aa11fa10160a5fda689e736c3c34169f` (#3189)

* arrow_up: Update ggerganov/llama.cpp

Signed-off-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com>

* fix(llama.cpp): adapt to upstream naming changes

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>

---------

Signed-off-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com>
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
Co-authored-by: mudler <2420543+mudler@users.noreply.github.com>
---
 Makefile                          | 2 +-
 backend/cpp/llama/grpc-server.cpp | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/Makefile b/Makefile
index 5263e686..476caac6 100644
--- a/Makefile
+++ b/Makefile
@@ -8,7 +8,7 @@ DETECT_LIBS?=true
 # llama.cpp versions
 GOLLAMA_REPO?=https://github.com/go-skynet/go-llama.cpp
 GOLLAMA_VERSION?=2b57a8ae43e4699d3dc5d1496a1ccd42922993be
-CPPLLAMA_VERSION?=0a4ce786814b123096d18aadca89cd352b9e590b
+CPPLLAMA_VERSION?=1e6f6554aa11fa10160a5fda689e736c3c34169f
 
 # gpt4all version
 GPT4ALL_REPO?=https://github.com/nomic-ai/gpt4all
diff --git a/backend/cpp/llama/grpc-server.cpp b/backend/cpp/llama/grpc-server.cpp
index 5de46798..e8701d36 100644
--- a/backend/cpp/llama/grpc-server.cpp
+++ b/backend/cpp/llama/grpc-server.cpp
@@ -2260,7 +2260,7 @@ static void params_parse(const backend::ModelOptions* request,
      }
      // get the directory of modelfile
      std::string model_dir = params.model.substr(0, params.model.find_last_of("/\\"));
-     params.lora_adapter.push_back(std::make_tuple(model_dir + "/"+request->loraadapter(), scale_factor));
+     params.lora_adapters.push_back({ model_dir + "/"+request->loraadapter(), scale_factor });
     }
     params.use_mlock = request->mlock();
     params.use_mmap = request->mmap();

From 61b56021113692d95b3a68447d15b13c4227142a Mon Sep 17 00:00:00 2001
From: Ettore Di Giacinto <mudler@users.noreply.github.com>
Date: Wed, 7 Aug 2024 17:02:32 +0200
Subject: [PATCH 119/235] fix(python): move accelerate and GPU-specific libs to
 build-type (#3194)

Some of the dependencies in `requirements.txt`, even if generic, pulls
down the line CUDA libraries.

This changes moves mostly all GPU-specific libs to the build-type, and
tries a safer approach. In `requirements.txt` now are listed only
"first-level" dependencies, for instance, grpc, but libs-dependencies
are moved down to the respective build-type `requirements.txt` to avoid
any mixin.

This should fix #2737 and #1592.

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
---
 backend/python/bark/requirements-cpu.txt                 | 4 ++++
 backend/python/bark/requirements-cublas11.txt            | 4 +++-
 backend/python/bark/requirements-cublas12.txt            | 4 +++-
 backend/python/bark/requirements-hipblas.txt             | 4 +++-
 backend/python/bark/requirements-intel.txt               | 4 +++-
 backend/python/bark/requirements.txt                     | 4 +---
 backend/python/coqui/requirements-cpu.txt                | 3 +++
 backend/python/coqui/requirements-cublas11.txt           | 4 +++-
 backend/python/coqui/requirements-cublas12.txt           | 4 +++-
 backend/python/coqui/requirements-hipblas.txt            | 4 +++-
 backend/python/coqui/requirements-intel.txt              | 4 +++-
 backend/python/coqui/requirements.txt                    | 4 +---
 backend/python/diffusers/requirements-cpu.txt            | 8 ++++++++
 backend/python/diffusers/requirements-cublas11.txt       | 7 +++++++
 backend/python/diffusers/requirements-cublas12.txt       | 7 +++++++
 backend/python/diffusers/requirements-hipblas.txt        | 9 ++++++++-
 backend/python/diffusers/requirements-intel.txt          | 9 ++++++++-
 backend/python/exllama/requirements-cpu.txt              | 3 +++
 backend/python/exllama/requirements-cublas11.txt         | 2 ++
 backend/python/exllama/requirements-cublas12.txt         | 2 ++
 backend/python/exllama/requirements.txt                  | 1 -
 backend/python/exllama2/requirements-cpu.txt             | 3 +++
 backend/python/exllama2/requirements-cublas11.txt        | 2 ++
 backend/python/exllama2/requirements-cublas12.txt        | 2 ++
 backend/python/exllama2/requirements.txt                 | 1 -
 backend/python/mamba/requirements-cpu.txt                | 3 ++-
 backend/python/mamba/requirements-cublas11.txt           | 3 ++-
 backend/python/mamba/requirements-cublas12.txt           | 3 ++-
 backend/python/mamba/requirements.txt                    | 3 +--
 backend/python/openvoice/requirements-cpu.txt            | 1 +
 backend/python/parler-tts/requirements-cpu.txt           | 3 +++
 backend/python/parler-tts/requirements-cublas11.txt      | 4 +++-
 backend/python/parler-tts/requirements-cublas12.txt      | 4 +++-
 backend/python/parler-tts/requirements-hipblas.txt       | 4 +++-
 backend/python/parler-tts/requirements-intel.txt         | 4 +++-
 backend/python/parler-tts/requirements.txt               | 4 +---
 backend/python/petals/requirements-cpu.txt               | 3 +++
 backend/python/petals/requirements-cublas11.txt          | 1 +
 backend/python/petals/requirements-cublas12.txt          | 1 +
 backend/python/petals/requirements-hipblas.txt           | 1 +
 backend/python/petals/requirements-intel.txt             | 3 ++-
 backend/python/petals/requirements.txt                   | 3 +--
 backend/python/rerankers/requirements-cpu.txt            | 4 ++++
 backend/python/rerankers/requirements-cublas11.txt       | 3 +++
 backend/python/rerankers/requirements-cublas12.txt       | 3 +++
 backend/python/rerankers/requirements-hipblas.txt        | 5 ++++-
 backend/python/rerankers/requirements-intel.txt          | 3 +++
 backend/python/rerankers/requirements.txt                | 5 +----
 backend/python/sentencetransformers/requirements-cpu.txt | 6 ++++++
 .../sentencetransformers/requirements-cublas11.txt       | 3 +++
 .../sentencetransformers/requirements-cublas12.txt       | 3 +++
 .../python/sentencetransformers/requirements-hipblas.txt | 5 ++++-
 .../python/sentencetransformers/requirements-intel.txt   | 5 ++++-
 backend/python/sentencetransformers/requirements.txt     | 3 ---
 .../python/transformers-musicgen/requirements-cpu.txt    | 3 +++
 .../transformers-musicgen/requirements-cublas11.txt      | 4 +++-
 .../transformers-musicgen/requirements-cublas12.txt      | 4 +++-
 .../transformers-musicgen/requirements-hipblas.txt       | 2 ++
 .../python/transformers-musicgen/requirements-intel.txt  | 2 ++
 backend/python/transformers-musicgen/requirements.txt    | 2 --
 backend/python/transformers/requirements-cpu.txt         | 4 ++++
 backend/python/transformers/requirements-cublas11.txt    | 3 +++
 backend/python/transformers/requirements-cublas12.txt    | 3 +++
 backend/python/transformers/requirements-hipblas.txt     | 5 ++++-
 backend/python/transformers/requirements-intel.txt       | 2 ++
 backend/python/transformers/requirements.txt             | 6 +-----
 backend/python/vall-e-x/requirements-cpu.txt             | 3 +++
 backend/python/vall-e-x/requirements-cublas11.txt        | 1 +
 backend/python/vall-e-x/requirements-cublas12.txt        | 1 +
 backend/python/vall-e-x/requirements-hipblas.txt         | 1 +
 backend/python/vall-e-x/requirements-intel.txt           | 1 +
 backend/python/vall-e-x/requirements.txt                 | 1 -
 backend/python/vllm/requirements-after.txt               | 1 +
 backend/python/vllm/requirements-cpu.txt                 | 4 ++++
 backend/python/vllm/requirements-cublas11.txt            | 4 +++-
 backend/python/vllm/requirements-cublas12.txt            | 4 +++-
 backend/python/vllm/requirements-hipblas.txt             | 5 ++++-
 backend/python/vllm/requirements-intel.txt               | 5 ++++-
 backend/python/vllm/requirements.txt                     | 3 ---
 79 files changed, 212 insertions(+), 61 deletions(-)
 create mode 100644 backend/python/bark/requirements-cpu.txt
 create mode 100644 backend/python/coqui/requirements-cpu.txt
 create mode 100644 backend/python/diffusers/requirements-cpu.txt
 create mode 100644 backend/python/exllama/requirements-cpu.txt
 create mode 100644 backend/python/exllama2/requirements-cpu.txt
 create mode 100644 backend/python/openvoice/requirements-cpu.txt
 create mode 100644 backend/python/parler-tts/requirements-cpu.txt
 create mode 100644 backend/python/petals/requirements-cpu.txt
 create mode 100644 backend/python/rerankers/requirements-cpu.txt
 create mode 100644 backend/python/sentencetransformers/requirements-cpu.txt
 create mode 100644 backend/python/transformers-musicgen/requirements-cpu.txt
 create mode 100644 backend/python/transformers/requirements-cpu.txt
 create mode 100644 backend/python/vall-e-x/requirements-cpu.txt
 create mode 100644 backend/python/vllm/requirements-after.txt
 create mode 100644 backend/python/vllm/requirements-cpu.txt

diff --git a/backend/python/bark/requirements-cpu.txt b/backend/python/bark/requirements-cpu.txt
new file mode 100644
index 00000000..0b2c3bc7
--- /dev/null
+++ b/backend/python/bark/requirements-cpu.txt
@@ -0,0 +1,4 @@
+transformers
+accelerate
+torch
+torchaudio
\ No newline at end of file
diff --git a/backend/python/bark/requirements-cublas11.txt b/backend/python/bark/requirements-cublas11.txt
index 0de92979..71a6a93f 100644
--- a/backend/python/bark/requirements-cublas11.txt
+++ b/backend/python/bark/requirements-cublas11.txt
@@ -1,3 +1,5 @@
 --extra-index-url https://download.pytorch.org/whl/cu118
 torch
-torchaudio
\ No newline at end of file
+torchaudio
+transformers
+accelerate
\ No newline at end of file
diff --git a/backend/python/bark/requirements-cublas12.txt b/backend/python/bark/requirements-cublas12.txt
index 6c3c7e7a..0fa27074 100644
--- a/backend/python/bark/requirements-cublas12.txt
+++ b/backend/python/bark/requirements-cublas12.txt
@@ -1,2 +1,4 @@
 torch
-torchaudio
\ No newline at end of file
+torchaudio
+transformers
+accelerate
\ No newline at end of file
diff --git a/backend/python/bark/requirements-hipblas.txt b/backend/python/bark/requirements-hipblas.txt
index 7bfc411b..af9e820e 100644
--- a/backend/python/bark/requirements-hipblas.txt
+++ b/backend/python/bark/requirements-hipblas.txt
@@ -1,3 +1,5 @@
 --extra-index-url https://download.pytorch.org/whl/rocm6.0
 torch
-torchaudio
\ No newline at end of file
+torchaudio
+transformers
+accelerate
\ No newline at end of file
diff --git a/backend/python/bark/requirements-intel.txt b/backend/python/bark/requirements-intel.txt
index 5c4aa6a5..9feb6eef 100644
--- a/backend/python/bark/requirements-intel.txt
+++ b/backend/python/bark/requirements-intel.txt
@@ -3,4 +3,6 @@ intel-extension-for-pytorch
 torch
 torchaudio
 optimum[openvino]
-setuptools==70.3.0 # https://github.com/mudler/LocalAI/issues/2406
\ No newline at end of file
+setuptools==70.3.0 # https://github.com/mudler/LocalAI/issues/2406
+transformers
+accelerate
\ No newline at end of file
diff --git a/backend/python/bark/requirements.txt b/backend/python/bark/requirements.txt
index 2e34d5a4..93f9fb78 100644
--- a/backend/python/bark/requirements.txt
+++ b/backend/python/bark/requirements.txt
@@ -1,6 +1,4 @@
-accelerate
 bark==0.1.5
 grpcio==1.65.4
 protobuf
-certifi
-transformers
\ No newline at end of file
+certifi
\ No newline at end of file
diff --git a/backend/python/coqui/requirements-cpu.txt b/backend/python/coqui/requirements-cpu.txt
new file mode 100644
index 00000000..bbcdc8cd
--- /dev/null
+++ b/backend/python/coqui/requirements-cpu.txt
@@ -0,0 +1,3 @@
+transformers
+accelerate
+torch
\ No newline at end of file
diff --git a/backend/python/coqui/requirements-cublas11.txt b/backend/python/coqui/requirements-cublas11.txt
index 0de92979..71a6a93f 100644
--- a/backend/python/coqui/requirements-cublas11.txt
+++ b/backend/python/coqui/requirements-cublas11.txt
@@ -1,3 +1,5 @@
 --extra-index-url https://download.pytorch.org/whl/cu118
 torch
-torchaudio
\ No newline at end of file
+torchaudio
+transformers
+accelerate
\ No newline at end of file
diff --git a/backend/python/coqui/requirements-cublas12.txt b/backend/python/coqui/requirements-cublas12.txt
index 6c3c7e7a..0fa27074 100644
--- a/backend/python/coqui/requirements-cublas12.txt
+++ b/backend/python/coqui/requirements-cublas12.txt
@@ -1,2 +1,4 @@
 torch
-torchaudio
\ No newline at end of file
+torchaudio
+transformers
+accelerate
\ No newline at end of file
diff --git a/backend/python/coqui/requirements-hipblas.txt b/backend/python/coqui/requirements-hipblas.txt
index 7bfc411b..af9e820e 100644
--- a/backend/python/coqui/requirements-hipblas.txt
+++ b/backend/python/coqui/requirements-hipblas.txt
@@ -1,3 +1,5 @@
 --extra-index-url https://download.pytorch.org/whl/rocm6.0
 torch
-torchaudio
\ No newline at end of file
+torchaudio
+transformers
+accelerate
\ No newline at end of file
diff --git a/backend/python/coqui/requirements-intel.txt b/backend/python/coqui/requirements-intel.txt
index 58a2a1dd..002a55c3 100644
--- a/backend/python/coqui/requirements-intel.txt
+++ b/backend/python/coqui/requirements-intel.txt
@@ -3,4 +3,6 @@ intel-extension-for-pytorch
 torch
 torchaudio
 optimum[openvino]
-setuptools==72.1.0 # https://github.com/mudler/LocalAI/issues/2406
\ No newline at end of file
+setuptools==72.1.0 # https://github.com/mudler/LocalAI/issues/2406
+transformers
+accelerate
\ No newline at end of file
diff --git a/backend/python/coqui/requirements.txt b/backend/python/coqui/requirements.txt
index a1bdac44..35c62449 100644
--- a/backend/python/coqui/requirements.txt
+++ b/backend/python/coqui/requirements.txt
@@ -1,6 +1,4 @@
-accelerate
 TTS==0.22.0
 grpcio==1.65.4
 protobuf
-certifi
-transformers
\ No newline at end of file
+certifi
\ No newline at end of file
diff --git a/backend/python/diffusers/requirements-cpu.txt b/backend/python/diffusers/requirements-cpu.txt
new file mode 100644
index 00000000..e46a53e5
--- /dev/null
+++ b/backend/python/diffusers/requirements-cpu.txt
@@ -0,0 +1,8 @@
+diffusers
+opencv-python
+transformers
+accelerate
+compel
+peft
+sentencepiece
+torch
\ No newline at end of file
diff --git a/backend/python/diffusers/requirements-cublas11.txt b/backend/python/diffusers/requirements-cublas11.txt
index 6461b696..df28b821 100644
--- a/backend/python/diffusers/requirements-cublas11.txt
+++ b/backend/python/diffusers/requirements-cublas11.txt
@@ -1,2 +1,9 @@
 --extra-index-url https://download.pytorch.org/whl/cu118
 torch
+diffusers
+opencv-python
+transformers
+accelerate
+compel
+peft
+sentencepiece
\ No newline at end of file
diff --git a/backend/python/diffusers/requirements-cublas12.txt b/backend/python/diffusers/requirements-cublas12.txt
index 12c6d5d5..b0685a62 100644
--- a/backend/python/diffusers/requirements-cublas12.txt
+++ b/backend/python/diffusers/requirements-cublas12.txt
@@ -1 +1,8 @@
 torch
+diffusers
+opencv-python
+transformers
+accelerate
+compel
+peft
+sentencepiece
\ No newline at end of file
diff --git a/backend/python/diffusers/requirements-hipblas.txt b/backend/python/diffusers/requirements-hipblas.txt
index 6c8da20d..9e992d02 100644
--- a/backend/python/diffusers/requirements-hipblas.txt
+++ b/backend/python/diffusers/requirements-hipblas.txt
@@ -1,3 +1,10 @@
 --extra-index-url https://download.pytorch.org/whl/rocm6.0
 torch
-torchvision
\ No newline at end of file
+torchvision
+diffusers
+opencv-python
+transformers
+accelerate
+compel
+peft
+sentencepiece
\ No newline at end of file
diff --git a/backend/python/diffusers/requirements-intel.txt b/backend/python/diffusers/requirements-intel.txt
index c393b118..77f9e674 100644
--- a/backend/python/diffusers/requirements-intel.txt
+++ b/backend/python/diffusers/requirements-intel.txt
@@ -3,4 +3,11 @@ intel-extension-for-pytorch
 torch
 torchvision
 optimum[openvino]
-setuptools==70.3.0 # https://github.com/mudler/LocalAI/issues/2406
\ No newline at end of file
+setuptools==70.3.0 # https://github.com/mudler/LocalAI/issues/2406
+diffusers
+opencv-python
+transformers
+accelerate
+compel
+peft
+sentencepiece
\ No newline at end of file
diff --git a/backend/python/exllama/requirements-cpu.txt b/backend/python/exllama/requirements-cpu.txt
new file mode 100644
index 00000000..bbcdc8cd
--- /dev/null
+++ b/backend/python/exllama/requirements-cpu.txt
@@ -0,0 +1,3 @@
+transformers
+accelerate
+torch
\ No newline at end of file
diff --git a/backend/python/exllama/requirements-cublas11.txt b/backend/python/exllama/requirements-cublas11.txt
index 6461b696..1dfb5b98 100644
--- a/backend/python/exllama/requirements-cublas11.txt
+++ b/backend/python/exllama/requirements-cublas11.txt
@@ -1,2 +1,4 @@
 --extra-index-url https://download.pytorch.org/whl/cu118
 torch
+transformers
+accelerate
\ No newline at end of file
diff --git a/backend/python/exllama/requirements-cublas12.txt b/backend/python/exllama/requirements-cublas12.txt
index 12c6d5d5..1ec544cd 100644
--- a/backend/python/exllama/requirements-cublas12.txt
+++ b/backend/python/exllama/requirements-cublas12.txt
@@ -1 +1,3 @@
 torch
+transformers
+accelerate
\ No newline at end of file
diff --git a/backend/python/exllama/requirements.txt b/backend/python/exllama/requirements.txt
index b06efcea..835671a2 100644
--- a/backend/python/exllama/requirements.txt
+++ b/backend/python/exllama/requirements.txt
@@ -1,5 +1,4 @@
 grpcio==1.65.0
 protobuf
-transformers
 certifi
 setuptools
\ No newline at end of file
diff --git a/backend/python/exllama2/requirements-cpu.txt b/backend/python/exllama2/requirements-cpu.txt
new file mode 100644
index 00000000..bbcdc8cd
--- /dev/null
+++ b/backend/python/exllama2/requirements-cpu.txt
@@ -0,0 +1,3 @@
+transformers
+accelerate
+torch
\ No newline at end of file
diff --git a/backend/python/exllama2/requirements-cublas11.txt b/backend/python/exllama2/requirements-cublas11.txt
index 6461b696..1dfb5b98 100644
--- a/backend/python/exllama2/requirements-cublas11.txt
+++ b/backend/python/exllama2/requirements-cublas11.txt
@@ -1,2 +1,4 @@
 --extra-index-url https://download.pytorch.org/whl/cu118
 torch
+transformers
+accelerate
\ No newline at end of file
diff --git a/backend/python/exllama2/requirements-cublas12.txt b/backend/python/exllama2/requirements-cublas12.txt
index 12c6d5d5..1ec544cd 100644
--- a/backend/python/exllama2/requirements-cublas12.txt
+++ b/backend/python/exllama2/requirements-cublas12.txt
@@ -1 +1,3 @@
 torch
+transformers
+accelerate
\ No newline at end of file
diff --git a/backend/python/exllama2/requirements.txt b/backend/python/exllama2/requirements.txt
index 487d89a9..ce15b0b6 100644
--- a/backend/python/exllama2/requirements.txt
+++ b/backend/python/exllama2/requirements.txt
@@ -1,4 +1,3 @@
-accelerate
 grpcio==1.65.4
 protobuf
 certifi
diff --git a/backend/python/mamba/requirements-cpu.txt b/backend/python/mamba/requirements-cpu.txt
index 08ed5eeb..39dab0fd 100644
--- a/backend/python/mamba/requirements-cpu.txt
+++ b/backend/python/mamba/requirements-cpu.txt
@@ -1 +1,2 @@
-torch
\ No newline at end of file
+torch
+transformers
\ No newline at end of file
diff --git a/backend/python/mamba/requirements-cublas11.txt b/backend/python/mamba/requirements-cublas11.txt
index 2f89bd95..7048a14f 100644
--- a/backend/python/mamba/requirements-cublas11.txt
+++ b/backend/python/mamba/requirements-cublas11.txt
@@ -1,2 +1,3 @@
 --extra-index-url https://download.pytorch.org/whl/cu118
-torch
\ No newline at end of file
+torch
+transformers
\ No newline at end of file
diff --git a/backend/python/mamba/requirements-cublas12.txt b/backend/python/mamba/requirements-cublas12.txt
index 08ed5eeb..39dab0fd 100644
--- a/backend/python/mamba/requirements-cublas12.txt
+++ b/backend/python/mamba/requirements-cublas12.txt
@@ -1 +1,2 @@
-torch
\ No newline at end of file
+torch
+transformers
\ No newline at end of file
diff --git a/backend/python/mamba/requirements.txt b/backend/python/mamba/requirements.txt
index 068bf336..22ae46ad 100644
--- a/backend/python/mamba/requirements.txt
+++ b/backend/python/mamba/requirements.txt
@@ -1,4 +1,3 @@
 grpcio==1.65.1
 protobuf
-certifi
-transformers
\ No newline at end of file
+certifi
\ No newline at end of file
diff --git a/backend/python/openvoice/requirements-cpu.txt b/backend/python/openvoice/requirements-cpu.txt
new file mode 100644
index 00000000..08ed5eeb
--- /dev/null
+++ b/backend/python/openvoice/requirements-cpu.txt
@@ -0,0 +1 @@
+torch
\ No newline at end of file
diff --git a/backend/python/parler-tts/requirements-cpu.txt b/backend/python/parler-tts/requirements-cpu.txt
new file mode 100644
index 00000000..bbcdc8cd
--- /dev/null
+++ b/backend/python/parler-tts/requirements-cpu.txt
@@ -0,0 +1,3 @@
+transformers
+accelerate
+torch
\ No newline at end of file
diff --git a/backend/python/parler-tts/requirements-cublas11.txt b/backend/python/parler-tts/requirements-cublas11.txt
index 0de92979..71a6a93f 100644
--- a/backend/python/parler-tts/requirements-cublas11.txt
+++ b/backend/python/parler-tts/requirements-cublas11.txt
@@ -1,3 +1,5 @@
 --extra-index-url https://download.pytorch.org/whl/cu118
 torch
-torchaudio
\ No newline at end of file
+torchaudio
+transformers
+accelerate
\ No newline at end of file
diff --git a/backend/python/parler-tts/requirements-cublas12.txt b/backend/python/parler-tts/requirements-cublas12.txt
index 6c3c7e7a..0fa27074 100644
--- a/backend/python/parler-tts/requirements-cublas12.txt
+++ b/backend/python/parler-tts/requirements-cublas12.txt
@@ -1,2 +1,4 @@
 torch
-torchaudio
\ No newline at end of file
+torchaudio
+transformers
+accelerate
\ No newline at end of file
diff --git a/backend/python/parler-tts/requirements-hipblas.txt b/backend/python/parler-tts/requirements-hipblas.txt
index 7bfc411b..af9e820e 100644
--- a/backend/python/parler-tts/requirements-hipblas.txt
+++ b/backend/python/parler-tts/requirements-hipblas.txt
@@ -1,3 +1,5 @@
 --extra-index-url https://download.pytorch.org/whl/rocm6.0
 torch
-torchaudio
\ No newline at end of file
+torchaudio
+transformers
+accelerate
\ No newline at end of file
diff --git a/backend/python/parler-tts/requirements-intel.txt b/backend/python/parler-tts/requirements-intel.txt
index 58a2a1dd..002a55c3 100644
--- a/backend/python/parler-tts/requirements-intel.txt
+++ b/backend/python/parler-tts/requirements-intel.txt
@@ -3,4 +3,6 @@ intel-extension-for-pytorch
 torch
 torchaudio
 optimum[openvino]
-setuptools==72.1.0 # https://github.com/mudler/LocalAI/issues/2406
\ No newline at end of file
+setuptools==72.1.0 # https://github.com/mudler/LocalAI/issues/2406
+transformers
+accelerate
\ No newline at end of file
diff --git a/backend/python/parler-tts/requirements.txt b/backend/python/parler-tts/requirements.txt
index 1dfa6675..297ddd0b 100644
--- a/backend/python/parler-tts/requirements.txt
+++ b/backend/python/parler-tts/requirements.txt
@@ -1,6 +1,4 @@
-accelerate
 grpcio==1.65.1
 protobuf
 git+https://github.com/huggingface/parler-tts.git@10016fb0300c0dc31a0fb70e26f3affee7b62f16
-certifi
-transformers
\ No newline at end of file
+certifi
\ No newline at end of file
diff --git a/backend/python/petals/requirements-cpu.txt b/backend/python/petals/requirements-cpu.txt
new file mode 100644
index 00000000..bbcdc8cd
--- /dev/null
+++ b/backend/python/petals/requirements-cpu.txt
@@ -0,0 +1,3 @@
+transformers
+accelerate
+torch
\ No newline at end of file
diff --git a/backend/python/petals/requirements-cublas11.txt b/backend/python/petals/requirements-cublas11.txt
index 6461b696..f7683016 100644
--- a/backend/python/petals/requirements-cublas11.txt
+++ b/backend/python/petals/requirements-cublas11.txt
@@ -1,2 +1,3 @@
 --extra-index-url https://download.pytorch.org/whl/cu118
 torch
+transformers
diff --git a/backend/python/petals/requirements-cublas12.txt b/backend/python/petals/requirements-cublas12.txt
index 12c6d5d5..4f492ddc 100644
--- a/backend/python/petals/requirements-cublas12.txt
+++ b/backend/python/petals/requirements-cublas12.txt
@@ -1 +1,2 @@
 torch
+transformers
diff --git a/backend/python/petals/requirements-hipblas.txt b/backend/python/petals/requirements-hipblas.txt
index 0331f106..8a4e2ff0 100644
--- a/backend/python/petals/requirements-hipblas.txt
+++ b/backend/python/petals/requirements-hipblas.txt
@@ -1,2 +1,3 @@
 --extra-index-url https://download.pytorch.org/whl/rocm6.0
 torch
+transformers
diff --git a/backend/python/petals/requirements-intel.txt b/backend/python/petals/requirements-intel.txt
index 755e19d8..4e3ed017 100644
--- a/backend/python/petals/requirements-intel.txt
+++ b/backend/python/petals/requirements-intel.txt
@@ -2,4 +2,5 @@
 intel-extension-for-pytorch
 torch
 optimum[openvino]
-setuptools==72.1.0 # https://github.com/mudler/LocalAI/issues/2406
\ No newline at end of file
+setuptools==72.1.0 # https://github.com/mudler/LocalAI/issues/2406
+transformers
\ No newline at end of file
diff --git a/backend/python/petals/requirements.txt b/backend/python/petals/requirements.txt
index 10f5114e..0755fe01 100644
--- a/backend/python/petals/requirements.txt
+++ b/backend/python/petals/requirements.txt
@@ -1,3 +1,2 @@
 git+https://github.com/bigscience-workshop/petals
-certifi
-transformers
\ No newline at end of file
+certifi
\ No newline at end of file
diff --git a/backend/python/rerankers/requirements-cpu.txt b/backend/python/rerankers/requirements-cpu.txt
new file mode 100644
index 00000000..25a1d8ab
--- /dev/null
+++ b/backend/python/rerankers/requirements-cpu.txt
@@ -0,0 +1,4 @@
+transformers
+accelerate
+torch
+rerankers[transformers]
\ No newline at end of file
diff --git a/backend/python/rerankers/requirements-cublas11.txt b/backend/python/rerankers/requirements-cublas11.txt
index 6461b696..06c4b2cf 100644
--- a/backend/python/rerankers/requirements-cublas11.txt
+++ b/backend/python/rerankers/requirements-cublas11.txt
@@ -1,2 +1,5 @@
 --extra-index-url https://download.pytorch.org/whl/cu118
+transformers
+accelerate
 torch
+rerankers[transformers]
\ No newline at end of file
diff --git a/backend/python/rerankers/requirements-cublas12.txt b/backend/python/rerankers/requirements-cublas12.txt
index 12c6d5d5..25a1d8ab 100644
--- a/backend/python/rerankers/requirements-cublas12.txt
+++ b/backend/python/rerankers/requirements-cublas12.txt
@@ -1 +1,4 @@
+transformers
+accelerate
 torch
+rerankers[transformers]
\ No newline at end of file
diff --git a/backend/python/rerankers/requirements-hipblas.txt b/backend/python/rerankers/requirements-hipblas.txt
index 76018445..961d150c 100644
--- a/backend/python/rerankers/requirements-hipblas.txt
+++ b/backend/python/rerankers/requirements-hipblas.txt
@@ -1,2 +1,5 @@
 --extra-index-url https://download.pytorch.org/whl/rocm6.0
-torch
\ No newline at end of file
+transformers
+accelerate
+torch
+rerankers[transformers]
\ No newline at end of file
diff --git a/backend/python/rerankers/requirements-intel.txt b/backend/python/rerankers/requirements-intel.txt
index 755e19d8..1a39cf4f 100644
--- a/backend/python/rerankers/requirements-intel.txt
+++ b/backend/python/rerankers/requirements-intel.txt
@@ -1,5 +1,8 @@
 --extra-index-url https://pytorch-extension.intel.com/release-whl/stable/xpu/us/
 intel-extension-for-pytorch
+transformers
+accelerate
 torch
+rerankers[transformers]
 optimum[openvino]
 setuptools==72.1.0 # https://github.com/mudler/LocalAI/issues/2406
\ No newline at end of file
diff --git a/backend/python/rerankers/requirements.txt b/backend/python/rerankers/requirements.txt
index 33166382..2a8d18b1 100644
--- a/backend/python/rerankers/requirements.txt
+++ b/backend/python/rerankers/requirements.txt
@@ -1,6 +1,3 @@
-accelerate
-rerankers[transformers]
 grpcio==1.65.4
 protobuf
-certifi
-transformers
\ No newline at end of file
+certifi
\ No newline at end of file
diff --git a/backend/python/sentencetransformers/requirements-cpu.txt b/backend/python/sentencetransformers/requirements-cpu.txt
new file mode 100644
index 00000000..cd9924ef
--- /dev/null
+++ b/backend/python/sentencetransformers/requirements-cpu.txt
@@ -0,0 +1,6 @@
+torch
+accelerate
+transformers
+bitsandbytes
+sentence-transformers==3.0.1
+transformers
\ No newline at end of file
diff --git a/backend/python/sentencetransformers/requirements-cublas11.txt b/backend/python/sentencetransformers/requirements-cublas11.txt
index 6461b696..1131f066 100644
--- a/backend/python/sentencetransformers/requirements-cublas11.txt
+++ b/backend/python/sentencetransformers/requirements-cublas11.txt
@@ -1,2 +1,5 @@
 --extra-index-url https://download.pytorch.org/whl/cu118
 torch
+accelerate
+sentence-transformers==3.0.1
+transformers
\ No newline at end of file
diff --git a/backend/python/sentencetransformers/requirements-cublas12.txt b/backend/python/sentencetransformers/requirements-cublas12.txt
index 12c6d5d5..2936e17b 100644
--- a/backend/python/sentencetransformers/requirements-cublas12.txt
+++ b/backend/python/sentencetransformers/requirements-cublas12.txt
@@ -1 +1,4 @@
 torch
+accelerate
+sentence-transformers==3.0.1
+transformers
\ No newline at end of file
diff --git a/backend/python/sentencetransformers/requirements-hipblas.txt b/backend/python/sentencetransformers/requirements-hipblas.txt
index 76018445..3b187c68 100644
--- a/backend/python/sentencetransformers/requirements-hipblas.txt
+++ b/backend/python/sentencetransformers/requirements-hipblas.txt
@@ -1,2 +1,5 @@
 --extra-index-url https://download.pytorch.org/whl/rocm6.0
-torch
\ No newline at end of file
+torch
+accelerate
+sentence-transformers==3.0.1
+transformers
\ No newline at end of file
diff --git a/backend/python/sentencetransformers/requirements-intel.txt b/backend/python/sentencetransformers/requirements-intel.txt
index 95d4848c..806e3d47 100644
--- a/backend/python/sentencetransformers/requirements-intel.txt
+++ b/backend/python/sentencetransformers/requirements-intel.txt
@@ -2,4 +2,7 @@
 intel-extension-for-pytorch
 torch
 optimum[openvino]
-setuptools==69.5.1 # https://github.com/mudler/LocalAI/issues/2406
\ No newline at end of file
+setuptools==69.5.1 # https://github.com/mudler/LocalAI/issues/2406
+accelerate
+sentence-transformers==3.0.1
+transformers
\ No newline at end of file
diff --git a/backend/python/sentencetransformers/requirements.txt b/backend/python/sentencetransformers/requirements.txt
index 4ef4a28b..22ae46ad 100644
--- a/backend/python/sentencetransformers/requirements.txt
+++ b/backend/python/sentencetransformers/requirements.txt
@@ -1,6 +1,3 @@
-accelerate
-sentence-transformers==3.0.1
-transformers
 grpcio==1.65.1
 protobuf
 certifi
\ No newline at end of file
diff --git a/backend/python/transformers-musicgen/requirements-cpu.txt b/backend/python/transformers-musicgen/requirements-cpu.txt
new file mode 100644
index 00000000..bbcdc8cd
--- /dev/null
+++ b/backend/python/transformers-musicgen/requirements-cpu.txt
@@ -0,0 +1,3 @@
+transformers
+accelerate
+torch
\ No newline at end of file
diff --git a/backend/python/transformers-musicgen/requirements-cublas11.txt b/backend/python/transformers-musicgen/requirements-cublas11.txt
index 6461b696..191a6eef 100644
--- a/backend/python/transformers-musicgen/requirements-cublas11.txt
+++ b/backend/python/transformers-musicgen/requirements-cublas11.txt
@@ -1,2 +1,4 @@
 --extra-index-url https://download.pytorch.org/whl/cu118
-torch
+transformers
+accelerate
+torch
\ No newline at end of file
diff --git a/backend/python/transformers-musicgen/requirements-cublas12.txt b/backend/python/transformers-musicgen/requirements-cublas12.txt
index 12c6d5d5..bbcdc8cd 100644
--- a/backend/python/transformers-musicgen/requirements-cublas12.txt
+++ b/backend/python/transformers-musicgen/requirements-cublas12.txt
@@ -1 +1,3 @@
-torch
+transformers
+accelerate
+torch
\ No newline at end of file
diff --git a/backend/python/transformers-musicgen/requirements-hipblas.txt b/backend/python/transformers-musicgen/requirements-hipblas.txt
index 76018445..00f0a946 100644
--- a/backend/python/transformers-musicgen/requirements-hipblas.txt
+++ b/backend/python/transformers-musicgen/requirements-hipblas.txt
@@ -1,2 +1,4 @@
 --extra-index-url https://download.pytorch.org/whl/rocm6.0
+transformers
+accelerate
 torch
\ No newline at end of file
diff --git a/backend/python/transformers-musicgen/requirements-intel.txt b/backend/python/transformers-musicgen/requirements-intel.txt
index 95d4848c..89bfa6a2 100644
--- a/backend/python/transformers-musicgen/requirements-intel.txt
+++ b/backend/python/transformers-musicgen/requirements-intel.txt
@@ -1,5 +1,7 @@
 --extra-index-url https://pytorch-extension.intel.com/release-whl/stable/xpu/us/
 intel-extension-for-pytorch
+transformers
+accelerate
 torch
 optimum[openvino]
 setuptools==69.5.1 # https://github.com/mudler/LocalAI/issues/2406
\ No newline at end of file
diff --git a/backend/python/transformers-musicgen/requirements.txt b/backend/python/transformers-musicgen/requirements.txt
index bec86241..420b968c 100644
--- a/backend/python/transformers-musicgen/requirements.txt
+++ b/backend/python/transformers-musicgen/requirements.txt
@@ -1,5 +1,3 @@
-accelerate
-transformers
 grpcio==1.65.4
 protobuf
 scipy==1.14.0
diff --git a/backend/python/transformers/requirements-cpu.txt b/backend/python/transformers/requirements-cpu.txt
new file mode 100644
index 00000000..f1e6281b
--- /dev/null
+++ b/backend/python/transformers/requirements-cpu.txt
@@ -0,0 +1,4 @@
+torch
+accelerate
+transformers
+bitsandbytes
\ No newline at end of file
diff --git a/backend/python/transformers/requirements-cublas11.txt b/backend/python/transformers/requirements-cublas11.txt
index 6461b696..0abd72d9 100644
--- a/backend/python/transformers/requirements-cublas11.txt
+++ b/backend/python/transformers/requirements-cublas11.txt
@@ -1,2 +1,5 @@
 --extra-index-url https://download.pytorch.org/whl/cu118
 torch
+accelerate
+transformers
+bitsandbytes
\ No newline at end of file
diff --git a/backend/python/transformers/requirements-cublas12.txt b/backend/python/transformers/requirements-cublas12.txt
index 12c6d5d5..f1e6281b 100644
--- a/backend/python/transformers/requirements-cublas12.txt
+++ b/backend/python/transformers/requirements-cublas12.txt
@@ -1 +1,4 @@
 torch
+accelerate
+transformers
+bitsandbytes
\ No newline at end of file
diff --git a/backend/python/transformers/requirements-hipblas.txt b/backend/python/transformers/requirements-hipblas.txt
index 76018445..f6900af1 100644
--- a/backend/python/transformers/requirements-hipblas.txt
+++ b/backend/python/transformers/requirements-hipblas.txt
@@ -1,2 +1,5 @@
 --extra-index-url https://download.pytorch.org/whl/rocm6.0
-torch
\ No newline at end of file
+torch
+accelerate
+transformers
+bitsandbytes
\ No newline at end of file
diff --git a/backend/python/transformers/requirements-intel.txt b/backend/python/transformers/requirements-intel.txt
index 8fc18a0e..5d9efb71 100644
--- a/backend/python/transformers/requirements-intel.txt
+++ b/backend/python/transformers/requirements-intel.txt
@@ -2,3 +2,5 @@
 intel-extension-for-pytorch
 torch
 optimum[openvino]
+intel-extension-for-transformers
+bitsandbytes
\ No newline at end of file
diff --git a/backend/python/transformers/requirements.txt b/backend/python/transformers/requirements.txt
index 2a08ba45..318560d9 100644
--- a/backend/python/transformers/requirements.txt
+++ b/backend/python/transformers/requirements.txt
@@ -1,8 +1,4 @@
-accelerate
-transformers
 grpcio==1.65.4
 protobuf
 certifi
-intel-extension-for-transformers
-bitsandbytes
-setuptools==69.5.1 # https://github.com/mudler/LocalAI/issues/2406
+setuptools==69.5.1 # https://github.com/mudler/LocalAI/issues/2406
\ No newline at end of file
diff --git a/backend/python/vall-e-x/requirements-cpu.txt b/backend/python/vall-e-x/requirements-cpu.txt
new file mode 100644
index 00000000..3a3304c0
--- /dev/null
+++ b/backend/python/vall-e-x/requirements-cpu.txt
@@ -0,0 +1,3 @@
+accelerate
+torch
+torchaudio
\ No newline at end of file
diff --git a/backend/python/vall-e-x/requirements-cublas11.txt b/backend/python/vall-e-x/requirements-cublas11.txt
index 0de92979..4e0a151a 100644
--- a/backend/python/vall-e-x/requirements-cublas11.txt
+++ b/backend/python/vall-e-x/requirements-cublas11.txt
@@ -1,3 +1,4 @@
 --extra-index-url https://download.pytorch.org/whl/cu118
+accelerate
 torch
 torchaudio
\ No newline at end of file
diff --git a/backend/python/vall-e-x/requirements-cublas12.txt b/backend/python/vall-e-x/requirements-cublas12.txt
index 6c3c7e7a..3a3304c0 100644
--- a/backend/python/vall-e-x/requirements-cublas12.txt
+++ b/backend/python/vall-e-x/requirements-cublas12.txt
@@ -1,2 +1,3 @@
+accelerate
 torch
 torchaudio
\ No newline at end of file
diff --git a/backend/python/vall-e-x/requirements-hipblas.txt b/backend/python/vall-e-x/requirements-hipblas.txt
index 7bfc411b..6ddd0b8d 100644
--- a/backend/python/vall-e-x/requirements-hipblas.txt
+++ b/backend/python/vall-e-x/requirements-hipblas.txt
@@ -1,3 +1,4 @@
 --extra-index-url https://download.pytorch.org/whl/rocm6.0
+accelerate
 torch
 torchaudio
\ No newline at end of file
diff --git a/backend/python/vall-e-x/requirements-intel.txt b/backend/python/vall-e-x/requirements-intel.txt
index 58a2a1dd..6185314f 100644
--- a/backend/python/vall-e-x/requirements-intel.txt
+++ b/backend/python/vall-e-x/requirements-intel.txt
@@ -1,5 +1,6 @@
 --extra-index-url https://pytorch-extension.intel.com/release-whl/stable/xpu/us/
 intel-extension-for-pytorch
+accelerate
 torch
 torchaudio
 optimum[openvino]
diff --git a/backend/python/vall-e-x/requirements.txt b/backend/python/vall-e-x/requirements.txt
index ec3584b2..2a8d18b1 100644
--- a/backend/python/vall-e-x/requirements.txt
+++ b/backend/python/vall-e-x/requirements.txt
@@ -1,4 +1,3 @@
-accelerate
 grpcio==1.65.4
 protobuf
 certifi
\ No newline at end of file
diff --git a/backend/python/vllm/requirements-after.txt b/backend/python/vllm/requirements-after.txt
new file mode 100644
index 00000000..7bfe8efe
--- /dev/null
+++ b/backend/python/vllm/requirements-after.txt
@@ -0,0 +1 @@
+flash-attn
\ No newline at end of file
diff --git a/backend/python/vllm/requirements-cpu.txt b/backend/python/vllm/requirements-cpu.txt
new file mode 100644
index 00000000..cc5a50c6
--- /dev/null
+++ b/backend/python/vllm/requirements-cpu.txt
@@ -0,0 +1,4 @@
+accelerate
+torch
+transformers
+vllm
\ No newline at end of file
diff --git a/backend/python/vllm/requirements-cublas11.txt b/backend/python/vllm/requirements-cublas11.txt
index bed8cea8..48722834 100644
--- a/backend/python/vllm/requirements-cublas11.txt
+++ b/backend/python/vllm/requirements-cublas11.txt
@@ -1,3 +1,5 @@
 --extra-index-url https://download.pytorch.org/whl/cu118
+accelerate
 torch
-flash-attn
\ No newline at end of file
+transformers
+vllm
\ No newline at end of file
diff --git a/backend/python/vllm/requirements-cublas12.txt b/backend/python/vllm/requirements-cublas12.txt
index b6fef4d7..cc5a50c6 100644
--- a/backend/python/vllm/requirements-cublas12.txt
+++ b/backend/python/vllm/requirements-cublas12.txt
@@ -1,2 +1,4 @@
+accelerate
 torch
-flash-attn
\ No newline at end of file
+transformers
+vllm
\ No newline at end of file
diff --git a/backend/python/vllm/requirements-hipblas.txt b/backend/python/vllm/requirements-hipblas.txt
index 76018445..b11ba692 100644
--- a/backend/python/vllm/requirements-hipblas.txt
+++ b/backend/python/vllm/requirements-hipblas.txt
@@ -1,2 +1,5 @@
 --extra-index-url https://download.pytorch.org/whl/rocm6.0
-torch
\ No newline at end of file
+accelerate
+torch
+transformers
+vllm
\ No newline at end of file
diff --git a/backend/python/vllm/requirements-intel.txt b/backend/python/vllm/requirements-intel.txt
index 635b4c31..516e3d01 100644
--- a/backend/python/vllm/requirements-intel.txt
+++ b/backend/python/vllm/requirements-intel.txt
@@ -1,5 +1,8 @@
 --extra-index-url https://pytorch-extension.intel.com/release-whl/stable/xpu/us/
 intel-extension-for-pytorch
+accelerate
 torch
+transformers
 optimum[openvino]
-setuptools==70.3.0 # https://github.com/mudler/LocalAI/issues/2406
\ No newline at end of file
+setuptools==70.3.0 # https://github.com/mudler/LocalAI/issues/2406
+vllm
\ No newline at end of file
diff --git a/backend/python/vllm/requirements.txt b/backend/python/vllm/requirements.txt
index b8b79afb..99dc865e 100644
--- a/backend/python/vllm/requirements.txt
+++ b/backend/python/vllm/requirements.txt
@@ -1,7 +1,4 @@
-accelerate
-vllm
 grpcio==1.65.4
 protobuf
 certifi
-transformers
 setuptools
\ No newline at end of file

From 11b2adae0c166a29af4ea0f728cc4f9ed2233941 Mon Sep 17 00:00:00 2001
From: Ettore Di Giacinto <mudler@users.noreply.github.com>
Date: Wed, 7 Aug 2024 18:08:26 +0200
Subject: [PATCH 120/235] fix(vllm): drop flash-attn installation afterwards

Signed-off-by: Ettore Di Giacinto <mudler@users.noreply.github.com>
---
 backend/python/vllm/requirements-after.txt | 1 -
 1 file changed, 1 deletion(-)
 delete mode 100644 backend/python/vllm/requirements-after.txt

diff --git a/backend/python/vllm/requirements-after.txt b/backend/python/vllm/requirements-after.txt
deleted file mode 100644
index 7bfe8efe..00000000
--- a/backend/python/vllm/requirements-after.txt
+++ /dev/null
@@ -1 +0,0 @@
-flash-attn
\ No newline at end of file

From 66cf38b0b36f46d915a79b0e8d2ceae90614f6bb Mon Sep 17 00:00:00 2001
From: Ettore Di Giacinto <mudler@users.noreply.github.com>
Date: Wed, 7 Aug 2024 19:45:14 +0200
Subject: [PATCH 121/235] feat(venv): shared env (#3195)

* feat(venv): allow to share veenvs

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>

* fix(vllm): add back flash-attn

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>

---------

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
---
 backend/python/common/libbackend.sh           | 40 ++++++++++++++-----
 .../vllm/requirements-cublas11-after.txt      |  1 +
 .../vllm/requirements-cublas12-after.txt      |  1 +
 3 files changed, 31 insertions(+), 11 deletions(-)
 create mode 100644 backend/python/vllm/requirements-cublas11-after.txt
 create mode 100644 backend/python/vllm/requirements-cublas12-after.txt

diff --git a/backend/python/common/libbackend.sh b/backend/python/common/libbackend.sh
index 7287fb95..934b1fd3 100644
--- a/backend/python/common/libbackend.sh
+++ b/backend/python/common/libbackend.sh
@@ -18,10 +18,23 @@
 # source $(dirname $0)/../common/libbackend.sh
 #
 function init() {
+    # Name of the backend (directory name)
     BACKEND_NAME=${PWD##*/}
+
+    # Path where all backends files are
     MY_DIR=$(realpath `dirname $0`)
+
+    # Build type
     BUILD_PROFILE=$(getBuildProfile)
 
+    # Environment directory
+    EDIR=${MY_DIR}
+
+    # Allow to specify a custom env dir for shared environments
+    if [ "x${ENV_DIR}" != "x" ]; then
+        EDIR=${ENV_DIR}
+    fi
+
     # If a backend has defined a list of valid build profiles...
     if [ ! -z "${LIMIT_TARGETS}" ]; then
         isValidTarget=$(checkTargets ${LIMIT_TARGETS})
@@ -74,13 +87,14 @@ function getBuildProfile() {
 # This function is idempotent, so you can call it as many times as you want and it will
 # always result in an activated virtual environment
 function ensureVenv() {
-    if [ ! -d "${MY_DIR}/venv" ]; then
-        uv venv ${MY_DIR}/venv
+    if [ ! -d "${EDIR}/venv" ]; then
+        uv venv ${EDIR}/venv
         echo "virtualenv created"
     fi
-    
-    if [ "x${VIRTUAL_ENV}" != "x${MY_DIR}/venv" ]; then
-        source ${MY_DIR}/venv/bin/activate
+
+    # Source if we are not already in a Virtual env
+    if [ "x${VIRTUAL_ENV}" != "x${EDIR}/venv" ]; then
+        source ${EDIR}/venv/bin/activate
         echo "virtualenv activated"
     fi
 
@@ -113,21 +127,25 @@ function installRequirements() {
 
     # These are the requirements files we will attempt to install, in order
     declare -a requirementFiles=(
-        "${MY_DIR}/requirements-install.txt"
-        "${MY_DIR}/requirements.txt"
-        "${MY_DIR}/requirements-${BUILD_TYPE}.txt"
+        "${EDIR}/requirements-install.txt"
+        "${EDIR}/requirements.txt"
+        "${EDIR}/requirements-${BUILD_TYPE}.txt"
     )
 
     if [ "x${BUILD_TYPE}" != "x${BUILD_PROFILE}" ]; then
-        requirementFiles+=("${MY_DIR}/requirements-${BUILD_PROFILE}.txt")
+        requirementFiles+=("${EDIR}/requirements-${BUILD_PROFILE}.txt")
     fi
 
     # if BUILD_TYPE is empty, we are a CPU build, so we should try to install the CPU requirements
     if [ "x${BUILD_TYPE}" == "x" ]; then
-        requirementFiles+=("${MY_DIR}/requirements-cpu.txt")
+        requirementFiles+=("${EDIR}/requirements-cpu.txt")
     fi
 
-    requirementFiles+=("${MY_DIR}/requirements-after.txt")
+    requirementFiles+=("${EDIR}/requirements-after.txt")
+
+    if [ "x${BUILD_TYPE}" != "x${BUILD_PROFILE}" ]; then
+        requirementFiles+=("${EDIR}/requirements-${BUILD_PROFILE}-after.txt")
+    fi
 
     for reqFile in ${requirementFiles[@]}; do
         if [ -f ${reqFile} ]; then
diff --git a/backend/python/vllm/requirements-cublas11-after.txt b/backend/python/vllm/requirements-cublas11-after.txt
new file mode 100644
index 00000000..7bfe8efe
--- /dev/null
+++ b/backend/python/vllm/requirements-cublas11-after.txt
@@ -0,0 +1 @@
+flash-attn
\ No newline at end of file
diff --git a/backend/python/vllm/requirements-cublas12-after.txt b/backend/python/vllm/requirements-cublas12-after.txt
new file mode 100644
index 00000000..7bfe8efe
--- /dev/null
+++ b/backend/python/vllm/requirements-cublas12-after.txt
@@ -0,0 +1 @@
+flash-attn
\ No newline at end of file

From e198347886199a8119140f0d7d1a6442b4541ebc Mon Sep 17 00:00:00 2001
From: Ettore Di Giacinto <mudler@users.noreply.github.com>
Date: Wed, 7 Aug 2024 21:27:02 +0200
Subject: [PATCH 122/235] feat(openai): add `json_schema` format type and
 strict mode (#3193)

* feat(openai): add json_schema and strict mode

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>

* handle err vs _

security scanners prefer if we put these branches in, and I tend to agree.

Signed-off-by: Dave <dave@gray101.com>

---------

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
Signed-off-by: Dave <dave@gray101.com>
Co-authored-by: Dave <dave@gray101.com>
---
 core/http/endpoints/openai/chat.go | 37 +++++++++++++++++++++++++++---
 core/schema/openai.go              | 11 +++++++++
 pkg/functions/functions.go         |  1 +
 3 files changed, 46 insertions(+), 3 deletions(-)

diff --git a/core/http/endpoints/openai/chat.go b/core/http/endpoints/openai/chat.go
index 86b75601..12a14eac 100644
--- a/core/http/endpoints/openai/chat.go
+++ b/core/http/endpoints/openai/chat.go
@@ -172,6 +172,14 @@ func ChatEndpoint(cl *config.BackendConfigLoader, ml *model.ModelLoader, startup
 
 		funcs := input.Functions
 		shouldUseFn := len(input.Functions) > 0 && config.ShouldUseFunctions()
+		strictMode := false
+
+		for _, f := range input.Functions {
+			if f.Strict {
+				strictMode = true
+				break
+			}
+		}
 
 		// Allow the user to set custom actions via config file
 		// to be "embedded" in each model
@@ -187,10 +195,33 @@ func ChatEndpoint(cl *config.BackendConfigLoader, ml *model.ModelLoader, startup
 
 		if config.ResponseFormatMap != nil {
 			d := schema.ChatCompletionResponseFormat{}
-			dat, _ := json.Marshal(config.ResponseFormatMap)
-			_ = json.Unmarshal(dat, &d)
+			dat, err := json.Marshal(config.ResponseFormatMap)
+			if err != nil {
+				return err
+			}
+			err = json.Unmarshal(dat, &d)
+			if err != nil {
+				return err
+			}
 			if d.Type == "json_object" {
 				input.Grammar = functions.JSONBNF
+			} else if d.Type == "json_schema" {
+				d := schema.JsonSchemaRequest{}
+				dat, err := json.Marshal(config.ResponseFormatMap)
+				if err != nil {
+					return err
+				}
+				err = json.Unmarshal(dat, &d)
+				if err != nil {
+					return err
+				}
+				fs := &functions.JSONFunctionStructure{
+					AnyOf: []functions.Item{d.JsonSchema.Schema},
+				}
+				g, err := fs.Grammar(config.FunctionsConfig.GrammarOptions()...)
+				if err == nil {
+					input.Grammar = g
+				}
 			}
 		}
 
@@ -201,7 +232,7 @@ func ChatEndpoint(cl *config.BackendConfigLoader, ml *model.ModelLoader, startup
 		}
 
 		switch {
-		case !config.FunctionsConfig.GrammarConfig.NoGrammar && shouldUseFn:
+		case (!config.FunctionsConfig.GrammarConfig.NoGrammar || strictMode) && shouldUseFn:
 			noActionGrammar := functions.Function{
 				Name:        noActionName,
 				Description: noActionDescription,
diff --git a/core/schema/openai.go b/core/schema/openai.go
index 3b39eaf3..fe4745bf 100644
--- a/core/schema/openai.go
+++ b/core/schema/openai.go
@@ -139,6 +139,17 @@ type ChatCompletionResponseFormat struct {
 	Type ChatCompletionResponseFormatType `json:"type,omitempty"`
 }
 
+type JsonSchemaRequest struct {
+	Type       string     `json:"type"`
+	JsonSchema JsonSchema `json:"json_schema"`
+}
+
+type JsonSchema struct {
+	Name   string         `json:"name"`
+	Strict bool           `json:"strict"`
+	Schema functions.Item `json:"schema"`
+}
+
 type OpenAIRequest struct {
 	PredictionOptions
 
diff --git a/pkg/functions/functions.go b/pkg/functions/functions.go
index 19012d53..1a7e1ff1 100644
--- a/pkg/functions/functions.go
+++ b/pkg/functions/functions.go
@@ -14,6 +14,7 @@ const (
 type Function struct {
 	Name        string                 `json:"name"`
 	Description string                 `json:"description"`
+	Strict      bool                   `json:"strict"`
 	Parameters  map[string]interface{} `json:"parameters"`
 }
 type Functions []Function

From 2c8623dbb40dbe748d0c361074a836a660b8a91b Mon Sep 17 00:00:00 2001
From: Ettore Di Giacinto <mudler@localai.io>
Date: Wed, 7 Aug 2024 23:34:37 +0200
Subject: [PATCH 123/235] fix(python): move vllm to after deps, drop diffusers
 main deps

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
---
 backend/python/diffusers/requirements.txt     | 7 -------
 backend/python/vllm/requirements-after.txt    | 1 +
 backend/python/vllm/requirements-cpu.txt      | 3 +--
 backend/python/vllm/requirements-cublas11.txt | 3 +--
 backend/python/vllm/requirements-cublas12.txt | 3 +--
 backend/python/vllm/requirements-hipblas.txt  | 3 +--
 backend/python/vllm/requirements-intel.txt    | 3 +--
 7 files changed, 6 insertions(+), 17 deletions(-)
 create mode 100644 backend/python/vllm/requirements-after.txt

diff --git a/backend/python/diffusers/requirements.txt b/backend/python/diffusers/requirements.txt
index 9919b20a..b4195fc5 100644
--- a/backend/python/diffusers/requirements.txt
+++ b/backend/python/diffusers/requirements.txt
@@ -1,12 +1,5 @@
 setuptools
-accelerate
-compel
-peft
-diffusers
 grpcio==1.65.4
-opencv-python
 pillow
 protobuf
-sentencepiece
-transformers
 certifi
diff --git a/backend/python/vllm/requirements-after.txt b/backend/python/vllm/requirements-after.txt
new file mode 100644
index 00000000..76f11f15
--- /dev/null
+++ b/backend/python/vllm/requirements-after.txt
@@ -0,0 +1 @@
+vllm
\ No newline at end of file
diff --git a/backend/python/vllm/requirements-cpu.txt b/backend/python/vllm/requirements-cpu.txt
index cc5a50c6..765a1ef5 100644
--- a/backend/python/vllm/requirements-cpu.txt
+++ b/backend/python/vllm/requirements-cpu.txt
@@ -1,4 +1,3 @@
 accelerate
 torch
-transformers
-vllm
\ No newline at end of file
+transformers
\ No newline at end of file
diff --git a/backend/python/vllm/requirements-cublas11.txt b/backend/python/vllm/requirements-cublas11.txt
index 48722834..43817727 100644
--- a/backend/python/vllm/requirements-cublas11.txt
+++ b/backend/python/vllm/requirements-cublas11.txt
@@ -1,5 +1,4 @@
 --extra-index-url https://download.pytorch.org/whl/cu118
 accelerate
 torch
-transformers
-vllm
\ No newline at end of file
+transformers
\ No newline at end of file
diff --git a/backend/python/vllm/requirements-cublas12.txt b/backend/python/vllm/requirements-cublas12.txt
index cc5a50c6..765a1ef5 100644
--- a/backend/python/vllm/requirements-cublas12.txt
+++ b/backend/python/vllm/requirements-cublas12.txt
@@ -1,4 +1,3 @@
 accelerate
 torch
-transformers
-vllm
\ No newline at end of file
+transformers
\ No newline at end of file
diff --git a/backend/python/vllm/requirements-hipblas.txt b/backend/python/vllm/requirements-hipblas.txt
index b11ba692..c73d8141 100644
--- a/backend/python/vllm/requirements-hipblas.txt
+++ b/backend/python/vllm/requirements-hipblas.txt
@@ -1,5 +1,4 @@
 --extra-index-url https://download.pytorch.org/whl/rocm6.0
 accelerate
 torch
-transformers
-vllm
\ No newline at end of file
+transformers
\ No newline at end of file
diff --git a/backend/python/vllm/requirements-intel.txt b/backend/python/vllm/requirements-intel.txt
index 516e3d01..7903282e 100644
--- a/backend/python/vllm/requirements-intel.txt
+++ b/backend/python/vllm/requirements-intel.txt
@@ -4,5 +4,4 @@ accelerate
 torch
 transformers
 optimum[openvino]
-setuptools==70.3.0 # https://github.com/mudler/LocalAI/issues/2406
-vllm
\ No newline at end of file
+setuptools==70.3.0 # https://github.com/mudler/LocalAI/issues/2406
\ No newline at end of file

From 36e185ba6352686f433f3ac8a288b97eb4ae4c16 Mon Sep 17 00:00:00 2001
From: Ettore Di Giacinto <mudler@users.noreply.github.com>
Date: Wed, 7 Aug 2024 23:35:44 +0200
Subject: [PATCH 124/235] feat(p2p): allow to run multiple clusters in the same
 p2p network (#3128)

feat(p2p): allow to run multiple clusters in the same network

Allow to specify a network ID via CLI which allows to run multiple
clusters, logically separated within the same network (by using the same
shared token).

Note: This segregation is not "secure" by any means, anyone having the
network token can see the services available in all the network,
however, this provides a way to separate the inference endpoints.

This allows for instance to have a node which is both federated and
having attached a set of llama.cpp workers.

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
---
 core/cli/federated.go              |  9 +++++----
 core/cli/run.go                    | 11 +++++++----
 core/cli/worker/worker_p2p.go      | 17 +++++++++--------
 core/config/application_config.go  |  7 +++++++
 core/http/endpoints/localai/p2p.go | 12 +++++++-----
 core/http/routes/localai.go        |  2 +-
 core/http/routes/ui.go             |  9 +++++----
 core/p2p/federated.go              |  9 +++++++++
 8 files changed, 50 insertions(+), 26 deletions(-)

diff --git a/core/cli/federated.go b/core/cli/federated.go
index 32f0fa87..271babca 100644
--- a/core/cli/federated.go
+++ b/core/cli/federated.go
@@ -8,14 +8,15 @@ import (
 )
 
 type FederatedCLI struct {
-	Address        string `env:"LOCALAI_ADDRESS,ADDRESS" default:":8080" help:"Bind address for the API server" group:"api"`
-	Peer2PeerToken string `env:"LOCALAI_P2P_TOKEN,P2P_TOKEN,TOKEN" name:"p2ptoken" help:"Token for P2P mode (optional)" group:"p2p"`
-	LoadBalanced   bool   `env:"LOCALAI_LOAD_BALANCED,LOAD_BALANCED" default:"false" help:"Enable load balancing" group:"p2p"`
+	Address            string `env:"LOCALAI_ADDRESS,ADDRESS" default:":8080" help:"Bind address for the API server" group:"api"`
+	Peer2PeerToken     string `env:"LOCALAI_P2P_TOKEN,P2P_TOKEN,TOKEN" name:"p2ptoken" help:"Token for P2P mode (optional)" group:"p2p"`
+	LoadBalanced       bool   `env:"LOCALAI_LOAD_BALANCED,LOAD_BALANCED" default:"false" help:"Enable load balancing" group:"p2p"`
+	Peer2PeerNetworkID string `env:"LOCALAI_P2P_NETWORK_ID,P2P_NETWORK_ID" help:"Network ID for P2P mode, can be set arbitrarly by the user for grouping a set of instances." group:"p2p"`
 }
 
 func (f *FederatedCLI) Run(ctx *cliContext.Context) error {
 
-	fs := p2p.NewFederatedServer(f.Address, p2p.FederatedID, f.Peer2PeerToken, f.LoadBalanced)
+	fs := p2p.NewFederatedServer(f.Address, p2p.NetworkID(f.Peer2PeerNetworkID, p2p.FederatedID), f.Peer2PeerToken, f.LoadBalanced)
 
 	return fs.Start(context.Background())
 }
diff --git a/core/cli/run.go b/core/cli/run.go
index b3d91632..9d58f6d9 100644
--- a/core/cli/run.go
+++ b/core/cli/run.go
@@ -54,6 +54,7 @@ type RunCMD struct {
 	OpaqueErrors           bool     `env:"LOCALAI_OPAQUE_ERRORS" default:"false" help:"If true, all error responses are replaced with blank 500 errors. This is intended only for hardening against information leaks and is normally not recommended." group:"hardening"`
 	Peer2Peer              bool     `env:"LOCALAI_P2P,P2P" name:"p2p" default:"false" help:"Enable P2P mode" group:"p2p"`
 	Peer2PeerToken         string   `env:"LOCALAI_P2P_TOKEN,P2P_TOKEN,TOKEN" name:"p2ptoken" help:"Token for P2P mode (optional)" group:"p2p"`
+	Peer2PeerNetworkID     string   `env:"LOCALAI_P2P_NETWORK_ID,P2P_NETWORK_ID" help:"Network ID for P2P mode, can be set arbitrarly by the user for grouping a set of instances" group:"p2p"`
 	ParallelRequests       bool     `env:"LOCALAI_PARALLEL_REQUESTS,PARALLEL_REQUESTS" help:"Enable backends to handle multiple requests in parallel if they support it (e.g.: llama.cpp or vllm)" group:"backends"`
 	SingleActiveBackend    bool     `env:"LOCALAI_SINGLE_ACTIVE_BACKEND,SINGLE_ACTIVE_BACKEND" help:"Allow only one backend to be run at a time" group:"backends"`
 	PreloadBackendOnly     bool     `env:"LOCALAI_PRELOAD_BACKEND_ONLY,PRELOAD_BACKEND_ONLY" default:"false" help:"Do not launch the API services, only the preloaded models / backends are started (useful for multi-node setups)" group:"backends"`
@@ -94,6 +95,7 @@ func (r *RunCMD) Run(ctx *cliContext.Context) error {
 		config.WithModelsURL(append(r.Models, r.ModelArgs...)...),
 		config.WithOpaqueErrors(r.OpaqueErrors),
 		config.WithEnforcedPredownloadScans(!r.DisablePredownloadScan),
+		config.WithP2PNetworkID(r.Peer2PeerNetworkID),
 	}
 
 	token := ""
@@ -119,9 +121,9 @@ func (r *RunCMD) Run(ctx *cliContext.Context) error {
 		}
 
 		log.Info().Msg("Starting P2P server discovery...")
-		if err := p2p.ServiceDiscoverer(context.Background(), node, token, "", func(serviceID string, node p2p.NodeData) {
+		if err := p2p.ServiceDiscoverer(context.Background(), node, token, p2p.NetworkID(r.Peer2PeerNetworkID, ""), func(serviceID string, node p2p.NodeData) {
 			var tunnelAddresses []string
-			for _, v := range p2p.GetAvailableNodes("") {
+			for _, v := range p2p.GetAvailableNodes(p2p.NetworkID(r.Peer2PeerNetworkID, "")) {
 				if v.IsOnline() {
 					tunnelAddresses = append(tunnelAddresses, v.TunnelAddress)
 				} else {
@@ -142,14 +144,15 @@ func (r *RunCMD) Run(ctx *cliContext.Context) error {
 		if err != nil {
 			return err
 		}
-		if err := p2p.ExposeService(context.Background(), "localhost", port, token, p2p.FederatedID); err != nil {
+		if err := p2p.ExposeService(context.Background(), "localhost", port, token, p2p.NetworkID(r.Peer2PeerNetworkID, p2p.FederatedID)); err != nil {
 			return err
 		}
 		node, err := p2p.NewNode(token)
 		if err != nil {
 			return err
 		}
-		if err := p2p.ServiceDiscoverer(context.Background(), node, token, p2p.FederatedID, nil); err != nil {
+
+		if err := p2p.ServiceDiscoverer(context.Background(), node, token, p2p.NetworkID(r.Peer2PeerNetworkID, p2p.FederatedID), nil); err != nil {
 			return err
 		}
 	}
diff --git a/core/cli/worker/worker_p2p.go b/core/cli/worker/worker_p2p.go
index 2eb5cb94..ddb3518c 100644
--- a/core/cli/worker/worker_p2p.go
+++ b/core/cli/worker/worker_p2p.go
@@ -19,12 +19,13 @@ import (
 )
 
 type P2P struct {
-	WorkerFlags       `embed:""`
-	Token             string   `env:"LOCALAI_TOKEN,LOCALAI_P2P_TOKEN,TOKEN" help:"P2P token to use"`
-	NoRunner          bool     `env:"LOCALAI_NO_RUNNER,NO_RUNNER" help:"Do not start the llama-cpp-rpc-server"`
-	RunnerAddress     string   `env:"LOCALAI_RUNNER_ADDRESS,RUNNER_ADDRESS" help:"Address of the llama-cpp-rpc-server"`
-	RunnerPort        string   `env:"LOCALAI_RUNNER_PORT,RUNNER_PORT" help:"Port of the llama-cpp-rpc-server"`
-	ExtraLLamaCPPArgs []string `env:"LOCALAI_EXTRA_LLAMA_CPP_ARGS,EXTRA_LLAMA_CPP_ARGS" help:"Extra arguments to pass to llama-cpp-rpc-server"`
+	WorkerFlags        `embed:""`
+	Token              string   `env:"LOCALAI_TOKEN,LOCALAI_P2P_TOKEN,TOKEN" help:"P2P token to use"`
+	NoRunner           bool     `env:"LOCALAI_NO_RUNNER,NO_RUNNER" help:"Do not start the llama-cpp-rpc-server"`
+	RunnerAddress      string   `env:"LOCALAI_RUNNER_ADDRESS,RUNNER_ADDRESS" help:"Address of the llama-cpp-rpc-server"`
+	RunnerPort         string   `env:"LOCALAI_RUNNER_PORT,RUNNER_PORT" help:"Port of the llama-cpp-rpc-server"`
+	ExtraLLamaCPPArgs  []string `env:"LOCALAI_EXTRA_LLAMA_CPP_ARGS,EXTRA_LLAMA_CPP_ARGS" help:"Extra arguments to pass to llama-cpp-rpc-server"`
+	Peer2PeerNetworkID string   `env:"LOCALAI_P2P_NETWORK_ID,P2P_NETWORK_ID" help:"Network ID for P2P mode, can be set arbitrarly by the user for grouping a set of instances" group:"p2p"`
 }
 
 func (r *P2P) Run(ctx *cliContext.Context) error {
@@ -59,7 +60,7 @@ func (r *P2P) Run(ctx *cliContext.Context) error {
 			p = r.RunnerPort
 		}
 
-		err = p2p.ExposeService(context.Background(), address, p, r.Token, "")
+		err = p2p.ExposeService(context.Background(), address, p, r.Token, p2p.NetworkID(r.Peer2PeerNetworkID, ""))
 		if err != nil {
 			return err
 		}
@@ -99,7 +100,7 @@ func (r *P2P) Run(ctx *cliContext.Context) error {
 		}
 	}()
 
-	err = p2p.ExposeService(context.Background(), address, fmt.Sprint(port), r.Token, "")
+	err = p2p.ExposeService(context.Background(), address, fmt.Sprint(port), r.Token, p2p.NetworkID(r.Peer2PeerNetworkID, ""))
 	if err != nil {
 		return err
 	}
diff --git a/core/config/application_config.go b/core/config/application_config.go
index 7233d1ac..6e8c46e1 100644
--- a/core/config/application_config.go
+++ b/core/config/application_config.go
@@ -34,6 +34,7 @@ type ApplicationConfig struct {
 	EnforcePredownloadScans             bool
 	OpaqueErrors                        bool
 	P2PToken                            string
+	P2PNetworkID                        string
 
 	ModelLibraryURL string
 
@@ -91,6 +92,12 @@ func WithCors(b bool) AppOption {
 	}
 }
 
+func WithP2PNetworkID(s string) AppOption {
+	return func(o *ApplicationConfig) {
+		o.P2PNetworkID = s
+	}
+}
+
 func WithCsrf(b bool) AppOption {
 	return func(o *ApplicationConfig) {
 		o.CSRF = b
diff --git a/core/http/endpoints/localai/p2p.go b/core/http/endpoints/localai/p2p.go
index cab0bb5d..93e9b5d5 100644
--- a/core/http/endpoints/localai/p2p.go
+++ b/core/http/endpoints/localai/p2p.go
@@ -11,12 +11,14 @@ import (
 // @Summary Returns available P2P nodes
 // @Success 200 {object} []schema.P2PNodesResponse "Response"
 // @Router /api/p2p [get]
-func ShowP2PNodes(c *fiber.Ctx) error {
+func ShowP2PNodes(appConfig *config.ApplicationConfig) func(*fiber.Ctx) error {
 	// Render index
-	return c.JSON(schema.P2PNodesResponse{
-		Nodes:          p2p.GetAvailableNodes(""),
-		FederatedNodes: p2p.GetAvailableNodes(p2p.FederatedID),
-	})
+	return func(c *fiber.Ctx) error {
+		return c.JSON(schema.P2PNodesResponse{
+			Nodes:          p2p.GetAvailableNodes(p2p.NetworkID(appConfig.P2PNetworkID, "")),
+			FederatedNodes: p2p.GetAvailableNodes(p2p.NetworkID(appConfig.P2PNetworkID, p2p.FederatedID)),
+		})
+	}
 }
 
 // ShowP2PToken returns the P2P token
diff --git a/core/http/routes/localai.go b/core/http/routes/localai.go
index b8a811b5..9c420010 100644
--- a/core/http/routes/localai.go
+++ b/core/http/routes/localai.go
@@ -59,7 +59,7 @@ func RegisterLocalAIRoutes(app *fiber.App,
 
 	// p2p
 	if p2p.IsP2PEnabled() {
-		app.Get("/api/p2p", auth, localai.ShowP2PNodes)
+		app.Get("/api/p2p", auth, localai.ShowP2PNodes(appConfig))
 		app.Get("/api/p2p/token", auth, localai.ShowP2PToken(appConfig))
 	}
 
diff --git a/core/http/routes/ui.go b/core/http/routes/ui.go
index 92917463..4f8afd3c 100644
--- a/core/http/routes/ui.go
+++ b/core/http/routes/ui.go
@@ -96,6 +96,7 @@ func RegisterUIRoutes(app *fiber.App,
 				//"FederatedNodes": p2p.GetAvailableNodes(p2p.FederatedID),
 				"IsP2PEnabled": p2p.IsP2PEnabled(),
 				"P2PToken":     appConfig.P2PToken,
+				"NetworkID":    appConfig.P2PNetworkID,
 			}
 
 			// Render index
@@ -104,17 +105,17 @@ func RegisterUIRoutes(app *fiber.App,
 
 		/* show nodes live! */
 		app.Get("/p2p/ui/workers", auth, func(c *fiber.Ctx) error {
-			return c.SendString(elements.P2PNodeBoxes(p2p.GetAvailableNodes("")))
+			return c.SendString(elements.P2PNodeBoxes(p2p.GetAvailableNodes(p2p.NetworkID(appConfig.P2PNetworkID, ""))))
 		})
 		app.Get("/p2p/ui/workers-federation", auth, func(c *fiber.Ctx) error {
-			return c.SendString(elements.P2PNodeBoxes(p2p.GetAvailableNodes(p2p.FederatedID)))
+			return c.SendString(elements.P2PNodeBoxes(p2p.GetAvailableNodes(p2p.NetworkID(appConfig.P2PNetworkID, p2p.FederatedID))))
 		})
 
 		app.Get("/p2p/ui/workers-stats", auth, func(c *fiber.Ctx) error {
-			return c.SendString(elements.P2PNodeStats(p2p.GetAvailableNodes("")))
+			return c.SendString(elements.P2PNodeStats(p2p.GetAvailableNodes(p2p.NetworkID(appConfig.P2PNetworkID, ""))))
 		})
 		app.Get("/p2p/ui/workers-federation-stats", auth, func(c *fiber.Ctx) error {
-			return c.SendString(elements.P2PNodeStats(p2p.GetAvailableNodes(p2p.FederatedID)))
+			return c.SendString(elements.P2PNodeStats(p2p.GetAvailableNodes(p2p.NetworkID(appConfig.P2PNetworkID, p2p.FederatedID))))
 		})
 	}
 
diff --git a/core/p2p/federated.go b/core/p2p/federated.go
index b56c9e0c..3ac3ff91 100644
--- a/core/p2p/federated.go
+++ b/core/p2p/federated.go
@@ -1,7 +1,16 @@
 package p2p
 
+import "fmt"
+
 const FederatedID = "federated"
 
+func NetworkID(networkID, serviceID string) string {
+	if networkID != "" {
+		return fmt.Sprintf("%s_%s", networkID, serviceID)
+	}
+	return serviceID
+}
+
 type FederatedServer struct {
 	listenAddr, service, p2ptoken string
 	requestTable                  map[string]int

From 8814b31805b8b77a467fcaf4ce25aa37f36f59dc Mon Sep 17 00:00:00 2001
From: Ettore Di Giacinto <mudler@users.noreply.github.com>
Date: Wed, 7 Aug 2024 23:35:55 +0200
Subject: [PATCH 125/235] chore: drop gpt4all.cpp (#3106)

chore: drop gpt4all

gpt4all is already supported in llama.cpp - the backend was kept for
keeping compatibility with old gpt4all models (prior to gguf format).

It is good time now to clean up and remove it to slim the compilation
process.

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
---
 Makefile                           | 42 +-------------------
 backend/go/llm/gpt4all/gpt4all.go  | 62 ------------------------------
 backend/go/llm/gpt4all/main.go     | 21 ----------
 core/cli/worker/worker_llamacpp.go |  2 +-
 core/cli/worker/worker_p2p.go      |  2 +-
 core/http/app_test.go              | 40 -------------------
 core/http/routes/ui.go             |  2 +-
 core/startup/startup.go            |  2 +-
 pkg/model/initializers.go          | 11 +-----
 9 files changed, 7 insertions(+), 177 deletions(-)
 delete mode 100644 backend/go/llm/gpt4all/gpt4all.go
 delete mode 100644 backend/go/llm/gpt4all/main.go

diff --git a/Makefile b/Makefile
index 476caac6..bcbdbe83 100644
--- a/Makefile
+++ b/Makefile
@@ -10,10 +10,6 @@ GOLLAMA_REPO?=https://github.com/go-skynet/go-llama.cpp
 GOLLAMA_VERSION?=2b57a8ae43e4699d3dc5d1496a1ccd42922993be
 CPPLLAMA_VERSION?=1e6f6554aa11fa10160a5fda689e736c3c34169f
 
-# gpt4all version
-GPT4ALL_REPO?=https://github.com/nomic-ai/gpt4all
-GPT4ALL_VERSION?=27a8b020c36b0df8f8b82a252d261cda47cf44b8
-
 # go-rwkv version
 RWKV_REPO?=https://github.com/donomii/go-rwkv.cpp
 RWKV_VERSION?=661e7ae26d442f5cfebd2a0881b44e8c55949ec6
@@ -190,7 +186,6 @@ ALL_GRPC_BACKENDS+=backend-assets/grpc/llama-cpp-fallback
 ALL_GRPC_BACKENDS+=backend-assets/grpc/llama-ggml
 ALL_GRPC_BACKENDS+=backend-assets/grpc/llama-cpp-grpc
 ALL_GRPC_BACKENDS+=backend-assets/util/llama-cpp-rpc-server
-ALL_GRPC_BACKENDS+=backend-assets/grpc/gpt4all
 ALL_GRPC_BACKENDS+=backend-assets/grpc/rwkv
 ALL_GRPC_BACKENDS+=backend-assets/grpc/whisper
 ALL_GRPC_BACKENDS+=backend-assets/grpc/local-store
@@ -253,18 +248,6 @@ sources/go-piper:
 sources/go-piper/libpiper_binding.a: sources/go-piper
 	$(MAKE) -C sources/go-piper libpiper_binding.a example/main piper.o
 
-## GPT4ALL
-sources/gpt4all:
-	mkdir -p sources/gpt4all
-	cd sources/gpt4all && \
-	git init && \
-	git remote add origin $(GPT4ALL_REPO) && \
-	git fetch origin && \
-	git checkout $(GPT4ALL_VERSION) && \
-	git submodule update --init --recursive --depth 1 --single-branch
-
-sources/gpt4all/gpt4all-bindings/golang/libgpt4all.a: sources/gpt4all
-	$(MAKE) -C sources/gpt4all/gpt4all-bindings/golang/ libgpt4all.a
 
 ## RWKV
 sources/go-rwkv.cpp:
@@ -318,7 +301,7 @@ sources/whisper.cpp:
 sources/whisper.cpp/libwhisper.a: sources/whisper.cpp
 	cd sources/whisper.cpp && $(MAKE) libwhisper.a libggml.a
 
-get-sources: sources/go-llama.cpp sources/gpt4all sources/go-piper sources/go-rwkv.cpp sources/whisper.cpp sources/go-bert.cpp sources/go-stable-diffusion sources/go-tiny-dream backend/cpp/llama/llama.cpp
+get-sources: sources/go-llama.cpp sources/go-piper sources/go-rwkv.cpp sources/whisper.cpp sources/go-bert.cpp sources/go-stable-diffusion sources/go-tiny-dream backend/cpp/llama/llama.cpp
 
 replace:
 	$(GOCMD) mod edit -replace github.com/donomii/go-rwkv.cpp=$(CURDIR)/sources/go-rwkv.cpp
@@ -328,7 +311,6 @@ replace:
 	$(GOCMD) mod edit -replace github.com/M0Rf30/go-tiny-dream=$(CURDIR)/sources/go-tiny-dream
 	$(GOCMD) mod edit -replace github.com/mudler/go-piper=$(CURDIR)/sources/go-piper
 	$(GOCMD) mod edit -replace github.com/mudler/go-stable-diffusion=$(CURDIR)/sources/go-stable-diffusion
-	$(GOCMD) mod edit -replace github.com/nomic-ai/gpt4all/gpt4all-bindings/golang=$(CURDIR)/sources/gpt4all/gpt4all-bindings/golang
 	$(GOCMD) mod edit -replace github.com/go-skynet/go-llama.cpp=$(CURDIR)/sources/go-llama.cpp
 
 dropreplace:
@@ -339,7 +321,6 @@ dropreplace:
 	$(GOCMD) mod edit -dropreplace github.com/M0Rf30/go-tiny-dream
 	$(GOCMD) mod edit -dropreplace github.com/mudler/go-piper
 	$(GOCMD) mod edit -dropreplace github.com/mudler/go-stable-diffusion
-	$(GOCMD) mod edit -dropreplace github.com/nomic-ai/gpt4all/gpt4all-bindings/golang
 	$(GOCMD) mod edit -dropreplace github.com/go-skynet/go-llama.cpp
 
 prepare-sources: get-sources replace
@@ -349,7 +330,6 @@ prepare-sources: get-sources replace
 rebuild: ## Rebuilds the project
 	$(GOCMD) clean -cache
 	$(MAKE) -C sources/go-llama.cpp clean
-	$(MAKE) -C sources/gpt4all/gpt4all-bindings/golang/ clean
 	$(MAKE) -C sources/go-rwkv.cpp clean
 	$(MAKE) -C sources/whisper.cpp clean
 	$(MAKE) -C sources/go-stable-diffusion clean
@@ -469,8 +449,7 @@ test: prepare test-models/testmodel.ggml grpcs
 	export GO_TAGS="tts stablediffusion debug"
 	$(MAKE) prepare-test
 	HUGGINGFACE_GRPC=$(abspath ./)/backend/python/sentencetransformers/run.sh TEST_DIR=$(abspath ./)/test-dir/ FIXTURES=$(abspath ./)/tests/fixtures CONFIG_FILE=$(abspath ./)/test-models/config.yaml MODELS_PATH=$(abspath ./)/test-models \
-	$(GOCMD) run github.com/onsi/ginkgo/v2/ginkgo --label-filter="!gpt4all && !llama && !llama-gguf"  --flake-attempts $(TEST_FLAKES) --fail-fast -v -r $(TEST_PATHS)
-	$(MAKE) test-gpt4all
+	$(GOCMD) run github.com/onsi/ginkgo/v2/ginkgo --label-filter="!llama && !llama-gguf"  --flake-attempts $(TEST_FLAKES) --fail-fast -v -r $(TEST_PATHS)
 	$(MAKE) test-llama
 	$(MAKE) test-llama-gguf
 	$(MAKE) test-tts
@@ -500,10 +479,6 @@ teardown-e2e:
 	rm -rf $(TEST_DIR) || true
 	docker stop $$(docker ps -q --filter ancestor=localai-tests)
 
-test-gpt4all: prepare-test
-	TEST_DIR=$(abspath ./)/test-dir/ FIXTURES=$(abspath ./)/tests/fixtures CONFIG_FILE=$(abspath ./)/test-models/config.yaml MODELS_PATH=$(abspath ./)/test-models \
-	$(GOCMD) run github.com/onsi/ginkgo/v2/ginkgo --label-filter="gpt4all" --flake-attempts 5 -v -r $(TEST_PATHS)
-
 test-llama: prepare-test
 	TEST_DIR=$(abspath ./)/test-dir/ FIXTURES=$(abspath ./)/tests/fixtures CONFIG_FILE=$(abspath ./)/test-models/config.yaml MODELS_PATH=$(abspath ./)/test-models \
 	$(GOCMD) run github.com/onsi/ginkgo/v2/ginkgo --label-filter="llama" --flake-attempts 5 -v -r $(TEST_PATHS)
@@ -730,12 +705,6 @@ backend-assets/espeak-ng-data: sources/go-piper sources/go-piper/libpiper_bindin
 	mkdir -p backend-assets/espeak-ng-data
 	@cp -rf sources/go-piper/piper-phonemize/pi/share/espeak-ng-data/. backend-assets/espeak-ng-data
 
-backend-assets/gpt4all: sources/gpt4all sources/gpt4all/gpt4all-bindings/golang/libgpt4all.a
-	mkdir -p backend-assets/gpt4all
-	@cp sources/gpt4all/gpt4all-bindings/golang/buildllm/*.so backend-assets/gpt4all/ || true
-	@cp sources/gpt4all/gpt4all-bindings/golang/buildllm/*.dylib backend-assets/gpt4all/ || true
-	@cp sources/gpt4all/gpt4all-bindings/golang/buildllm/*.dll backend-assets/gpt4all/ || true
-
 backend-assets/grpc: protogen-go replace
 	mkdir -p backend-assets/grpc
 
@@ -746,13 +715,6 @@ ifneq ($(UPX),)
 	$(UPX) backend-assets/grpc/bert-embeddings
 endif
 
-backend-assets/grpc/gpt4all: sources/gpt4all sources/gpt4all/gpt4all-bindings/golang/libgpt4all.a backend-assets/gpt4all backend-assets/grpc
-	CGO_LDFLAGS="$(CGO_LDFLAGS)" C_INCLUDE_PATH=$(CURDIR)/sources/gpt4all/gpt4all-bindings/golang/ LIBRARY_PATH=$(CURDIR)/sources/gpt4all/gpt4all-bindings/golang/ \
-	$(GOCMD) build -ldflags "$(LD_FLAGS)" -tags "$(GO_TAGS)" -o backend-assets/grpc/gpt4all ./backend/go/llm/gpt4all/
-ifneq ($(UPX),)
-	$(UPX) backend-assets/grpc/gpt4all
-endif
-
 backend-assets/grpc/huggingface: backend-assets/grpc
 	$(GOCMD) build -ldflags "$(LD_FLAGS)" -tags "$(GO_TAGS)" -o backend-assets/grpc/huggingface ./backend/go/llm/langchain/
 ifneq ($(UPX),)
diff --git a/backend/go/llm/gpt4all/gpt4all.go b/backend/go/llm/gpt4all/gpt4all.go
deleted file mode 100644
index 9caab48c..00000000
--- a/backend/go/llm/gpt4all/gpt4all.go
+++ /dev/null
@@ -1,62 +0,0 @@
-package main
-
-// This is a wrapper to statisfy the GRPC service interface
-// It is meant to be used by the main executable that is the server for the specific backend type (falcon, gpt3, etc)
-import (
-	"fmt"
-
-	"github.com/mudler/LocalAI/pkg/grpc/base"
-	pb "github.com/mudler/LocalAI/pkg/grpc/proto"
-	gpt4all "github.com/nomic-ai/gpt4all/gpt4all-bindings/golang"
-)
-
-type LLM struct {
-	base.SingleThread
-
-	gpt4all *gpt4all.Model
-}
-
-func (llm *LLM) Load(opts *pb.ModelOptions) error {
-	model, err := gpt4all.New(opts.ModelFile,
-		gpt4all.SetThreads(int(opts.Threads)),
-		gpt4all.SetLibrarySearchPath(opts.LibrarySearchPath))
-	llm.gpt4all = model
-	return err
-}
-
-func buildPredictOptions(opts *pb.PredictOptions) []gpt4all.PredictOption {
-	predictOptions := []gpt4all.PredictOption{
-		gpt4all.SetTemperature(float64(opts.Temperature)),
-		gpt4all.SetTopP(float64(opts.TopP)),
-		gpt4all.SetTopK(int(opts.TopK)),
-		gpt4all.SetTokens(int(opts.Tokens)),
-	}
-
-	if opts.Batch != 0 {
-		predictOptions = append(predictOptions, gpt4all.SetBatch(int(opts.Batch)))
-	}
-	return predictOptions
-}
-
-func (llm *LLM) Predict(opts *pb.PredictOptions) (string, error) {
-	return llm.gpt4all.Predict(opts.Prompt, buildPredictOptions(opts)...)
-}
-
-func (llm *LLM) PredictStream(opts *pb.PredictOptions, results chan string) error {
-	predictOptions := buildPredictOptions(opts)
-
-	go func() {
-		llm.gpt4all.SetTokenCallback(func(token string) bool {
-			results <- token
-			return true
-		})
-		_, err := llm.gpt4all.Predict(opts.Prompt, predictOptions...)
-		if err != nil {
-			fmt.Println("err: ", err)
-		}
-		llm.gpt4all.SetTokenCallback(nil)
-		close(results)
-	}()
-
-	return nil
-}
diff --git a/backend/go/llm/gpt4all/main.go b/backend/go/llm/gpt4all/main.go
deleted file mode 100644
index acf44087..00000000
--- a/backend/go/llm/gpt4all/main.go
+++ /dev/null
@@ -1,21 +0,0 @@
-package main
-
-// Note: this is started internally by LocalAI and a server is allocated for each model
-
-import (
-	"flag"
-
-	grpc "github.com/mudler/LocalAI/pkg/grpc"
-)
-
-var (
-	addr = flag.String("addr", "localhost:50051", "the address to connect to")
-)
-
-func main() {
-	flag.Parse()
-
-	if err := grpc.StartServer(*addr, &LLM{}); err != nil {
-		panic(err)
-	}
-}
diff --git a/core/cli/worker/worker_llamacpp.go b/core/cli/worker/worker_llamacpp.go
index 5598a485..2baf51ec 100644
--- a/core/cli/worker/worker_llamacpp.go
+++ b/core/cli/worker/worker_llamacpp.go
@@ -21,7 +21,7 @@ func (r *LLamaCPP) Run(ctx *cliContext.Context) error {
 	err := assets.ExtractFiles(ctx.BackendAssets, r.BackendAssetsPath)
 	log.Debug().Msgf("Extracting backend assets files to %s", r.BackendAssetsPath)
 	if err != nil {
-		log.Warn().Msgf("Failed extracting backend assets files: %s (might be required for some backends to work properly, like gpt4all)", err)
+		log.Warn().Msgf("Failed extracting backend assets files: %s (might be required for some backends to work properly)", err)
 	}
 
 	if len(os.Args) < 4 {
diff --git a/core/cli/worker/worker_p2p.go b/core/cli/worker/worker_p2p.go
index ddb3518c..93a365cb 100644
--- a/core/cli/worker/worker_p2p.go
+++ b/core/cli/worker/worker_p2p.go
@@ -33,7 +33,7 @@ func (r *P2P) Run(ctx *cliContext.Context) error {
 	err := assets.ExtractFiles(ctx.BackendAssets, r.BackendAssetsPath)
 	log.Debug().Msgf("Extracting backend assets files to %s", r.BackendAssetsPath)
 	if err != nil {
-		log.Warn().Msgf("Failed extracting backend assets files: %s (might be required for some backends to work properly, like gpt4all)", err)
+		log.Warn().Msgf("Failed extracting backend assets files: %s (might be required for some backends to work properly)", err)
 	}
 
 	// Check if the token is set
diff --git a/core/http/app_test.go b/core/http/app_test.go
index b21ad25a..a837e20c 100644
--- a/core/http/app_test.go
+++ b/core/http/app_test.go
@@ -563,32 +563,6 @@ var _ = Describe("API test", func() {
 				Expect(res["unit"]).To(Equal("celcius"), fmt.Sprint(res))
 				Expect(string(resp2.Choices[0].FinishReason)).To(Equal("function_call"), fmt.Sprint(resp2.Choices[0].FinishReason))
 			})
-
-			It("runs gpt4all", Label("gpt4all"), func() {
-				if runtime.GOOS != "linux" {
-					Skip("test supported only on linux")
-				}
-
-				response := postModelApplyRequest("http://127.0.0.1:9090/models/apply", modelApplyRequest{
-					URL:  "github:go-skynet/model-gallery/gpt4all-j.yaml",
-					Name: "gpt4all-j",
-				})
-
-				Expect(response["uuid"]).ToNot(BeEmpty(), fmt.Sprint(response))
-
-				uuid := response["uuid"].(string)
-
-				Eventually(func() bool {
-					response := getModelStatus("http://127.0.0.1:9090/models/jobs/" + uuid)
-					return response["processed"].(bool)
-				}, "960s", "10s").Should(Equal(true))
-
-				resp, err := client.CreateChatCompletion(context.TODO(), openai.ChatCompletionRequest{Model: "gpt4all-j", Messages: []openai.ChatCompletionMessage{openai.ChatCompletionMessage{Role: "user", Content: "How are you?"}}})
-				Expect(err).ToNot(HaveOccurred())
-				Expect(len(resp.Choices)).To(Equal(1))
-				Expect(resp.Choices[0].Message.Content).To(ContainSubstring("well"))
-			})
-
 		})
 	})
 
@@ -792,20 +766,6 @@ var _ = Describe("API test", func() {
 			Expect(resp.Choices[0].Message.Content).ToNot(BeEmpty())
 		})
 
-		It("can generate completions from model configs", func() {
-			resp, err := client.CreateCompletion(context.TODO(), openai.CompletionRequest{Model: "gpt4all", Prompt: testPrompt})
-			Expect(err).ToNot(HaveOccurred())
-			Expect(len(resp.Choices)).To(Equal(1))
-			Expect(resp.Choices[0].Text).ToNot(BeEmpty())
-		})
-
-		It("can generate chat completions from model configs", func() {
-			resp, err := client.CreateChatCompletion(context.TODO(), openai.ChatCompletionRequest{Model: "gpt4all-2", Messages: []openai.ChatCompletionMessage{openai.ChatCompletionMessage{Role: "user", Content: testPrompt}}})
-			Expect(err).ToNot(HaveOccurred())
-			Expect(len(resp.Choices)).To(Equal(1))
-			Expect(resp.Choices[0].Message.Content).ToNot(BeEmpty())
-		})
-
 		It("returns errors", func() {
 			_, err := client.CreateCompletion(context.TODO(), openai.CompletionRequest{Model: "foomodel", Prompt: testPrompt})
 			Expect(err).To(HaveOccurred())
diff --git a/core/http/routes/ui.go b/core/http/routes/ui.go
index 4f8afd3c..2996e9dc 100644
--- a/core/http/routes/ui.go
+++ b/core/http/routes/ui.go
@@ -267,7 +267,7 @@ func RegisterUIRoutes(app *fiber.App,
 			return c.SendString(elements.ProgressBar("100"))
 		}
 		if status.Error != nil {
-			// TODO: instead of deleting the job, we should keep it in the cache and make it dismissable
+			// TODO: instead of deleting the job, we should keep it in the cache and make it dismissable by the user
 			processingModels.DeleteUUID(jobUID)
 			return c.SendString(elements.ErrorProgress(status.Error.Error(), status.GalleryModelName))
 		}
diff --git a/core/startup/startup.go b/core/startup/startup.go
index 55f930a4..3565d196 100644
--- a/core/startup/startup.go
+++ b/core/startup/startup.go
@@ -106,7 +106,7 @@ func Startup(opts ...config.AppOption) (*config.BackendConfigLoader, *model.Mode
 		err := assets.ExtractFiles(options.BackendAssets, options.AssetsDestination)
 		log.Debug().Msgf("Extracting backend assets files to %s", options.AssetsDestination)
 		if err != nil {
-			log.Warn().Msgf("Failed extracting backend assets files: %s (might be required for some backends to work properly, like gpt4all)", err)
+			log.Warn().Msgf("Failed extracting backend assets files: %s (might be required for some backends to work properly)", err)
 		}
 	}
 
diff --git a/pkg/model/initializers.go b/pkg/model/initializers.go
index 88a08f28..11980f03 100644
--- a/pkg/model/initializers.go
+++ b/pkg/model/initializers.go
@@ -45,11 +45,6 @@ const (
 
 	LLamaCPPGRPC = "llama-cpp-grpc"
 
-	Gpt4AllLlamaBackend = "gpt4all-llama"
-	Gpt4AllMptBackend   = "gpt4all-mpt"
-	Gpt4AllJBackend     = "gpt4all-j"
-	Gpt4All             = "gpt4all"
-
 	BertEmbeddingsBackend  = "bert-embeddings"
 	RwkvBackend            = "rwkv"
 	WhisperBackend         = "whisper"
@@ -144,11 +139,10 @@ ENTRY:
 
 	// sets a priority list - first has more priority
 	priorityList := []string{
-
 		// First llama.cpp(variants) and llama-ggml to follow.
 		// We keep the fallback to prevent that if the llama.cpp variants
 		// that depends on shared libs if breaks have still a safety net.
-		LLamaCPP, LlamaGGML, Gpt4All, LLamaCPPFallback,
+		LLamaCPP, LlamaGGML, LLamaCPPFallback,
 	}
 
 	toTheEnd := []string{
@@ -434,9 +428,6 @@ func (ml *ModelLoader) BackendLoader(opts ...Option) (client grpc.Backend, err e
 	var backendToConsume string
 
 	switch backend {
-	case Gpt4AllLlamaBackend, Gpt4AllMptBackend, Gpt4AllJBackend, Gpt4All:
-		o.gRPCOptions.LibrarySearchPath = filepath.Join(o.assetDir, "backend-assets", "gpt4all")
-		backendToConsume = Gpt4All
 	case PiperBackend:
 		o.gRPCOptions.LibrarySearchPath = filepath.Join(o.assetDir, "backend-assets", "espeak-ng-data")
 		backendToConsume = PiperBackend

From 1d94aaa10f5955f8e0299170ef8cc9e02e5d811a Mon Sep 17 00:00:00 2001
From: "LocalAI [bot]" <139863280+localai-bot@users.noreply.github.com>
Date: Wed, 7 Aug 2024 23:54:27 +0200
Subject: [PATCH 126/235] feat(swagger): update swagger (#3196)

Signed-off-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com>
Co-authored-by: mudler <2420543+mudler@users.noreply.github.com>
---
 swagger/docs.go      | 3 +++
 swagger/swagger.json | 3 +++
 swagger/swagger.yaml | 2 ++
 3 files changed, 8 insertions(+)

diff --git a/swagger/docs.go b/swagger/docs.go
index 9a5a1784..4d89a926 100644
--- a/swagger/docs.go
+++ b/swagger/docs.go
@@ -712,6 +712,9 @@ const docTemplate = `{
                 "parameters": {
                     "type": "object",
                     "additionalProperties": true
+                },
+                "strict": {
+                    "type": "boolean"
                 }
             }
         },
diff --git a/swagger/swagger.json b/swagger/swagger.json
index 9d53fbbe..ef038c4c 100644
--- a/swagger/swagger.json
+++ b/swagger/swagger.json
@@ -705,6 +705,9 @@
                 "parameters": {
                     "type": "object",
                     "additionalProperties": true
+                },
+                "strict": {
+                    "type": "boolean"
                 }
             }
         },
diff --git a/swagger/swagger.yaml b/swagger/swagger.yaml
index 2d628566..34d3d64f 100644
--- a/swagger/swagger.yaml
+++ b/swagger/swagger.yaml
@@ -16,6 +16,8 @@ definitions:
       parameters:
         additionalProperties: true
         type: object
+      strict:
+        type: boolean
     type: object
   functions.Item:
     properties:

From 1c708d21de87371bb17c27e2615aa352e9ac5790 Mon Sep 17 00:00:00 2001
From: "LocalAI [bot]" <139863280+localai-bot@users.noreply.github.com>
Date: Thu, 8 Aug 2024 00:19:20 +0200
Subject: [PATCH 127/235] chore: :arrow_up: Update ggerganov/llama.cpp to
 `15fa07a5c564d3ed7e7eb64b73272cedb27e73ec` (#3197)

:arrow_up: Update ggerganov/llama.cpp

Signed-off-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com>
Co-authored-by: mudler <2420543+mudler@users.noreply.github.com>
---
 Makefile | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/Makefile b/Makefile
index bcbdbe83..6799cf2b 100644
--- a/Makefile
+++ b/Makefile
@@ -8,7 +8,7 @@ DETECT_LIBS?=true
 # llama.cpp versions
 GOLLAMA_REPO?=https://github.com/go-skynet/go-llama.cpp
 GOLLAMA_VERSION?=2b57a8ae43e4699d3dc5d1496a1ccd42922993be
-CPPLLAMA_VERSION?=1e6f6554aa11fa10160a5fda689e736c3c34169f
+CPPLLAMA_VERSION?=15fa07a5c564d3ed7e7eb64b73272cedb27e73ec
 
 # go-rwkv version
 RWKV_REPO?=https://github.com/donomii/go-rwkv.cpp

From 60117ec05722c41033376b67976f33dd7d8c34b3 Mon Sep 17 00:00:00 2001
From: Ettore Di Giacinto <mudler@users.noreply.github.com>
Date: Thu, 8 Aug 2024 06:59:10 +0200
Subject: [PATCH 128/235] fix(apple): disable BUILD_TYPE metal on fallback
 (#3199)

When compiling the single-binary on Apple, we enforce BUILD_TYPE=metal,
however, we want still to have the fallback vanilla such as if llama.cpp fails
to load metal (e.g. if Acceleration framework is missing, or MacOS version is too old)
we can still run by offloading to the CPU. The default backend is still using metal as usual.

Signed-off-by: Ettore Di Giacinto <mudler@users.noreply.github.com>
---
 Makefile | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/Makefile b/Makefile
index 6799cf2b..22c48110 100644
--- a/Makefile
+++ b/Makefile
@@ -387,7 +387,7 @@ ifeq ($(DETECT_LIBS),true)
 	scripts/prepare-libs.sh backend-assets/grpc/llama-cpp-avx2
 endif
 ifeq ($(OS),Darwin)
-	$(info ${GREEN}I Skip CUDA/hipblas build on MacOS${RESET})
+	BUILD_TYPE=none $(MAKE) backend-assets/grpc/llama-cpp-fallback
 else
 	$(MAKE) backend-assets/grpc/llama-cpp-cuda
 	$(MAKE) backend-assets/grpc/llama-cpp-hipblas

From f7ffa9cd58c588dcf6ba56c2dd0ab5470efa2fae Mon Sep 17 00:00:00 2001
From: Ettore Di Giacinto <mudler@users.noreply.github.com>
Date: Thu, 8 Aug 2024 11:59:31 +0200
Subject: [PATCH 129/235] fix(vall-e-x): pin hipblas deps (#3201)

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
---
 backend/python/vall-e-x/requirements-hipblas.txt | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/backend/python/vall-e-x/requirements-hipblas.txt b/backend/python/vall-e-x/requirements-hipblas.txt
index 6ddd0b8d..fc43790a 100644
--- a/backend/python/vall-e-x/requirements-hipblas.txt
+++ b/backend/python/vall-e-x/requirements-hipblas.txt
@@ -1,4 +1,4 @@
 --extra-index-url https://download.pytorch.org/whl/rocm6.0
 accelerate
-torch
-torchaudio
\ No newline at end of file
+torch==2.3.0+rocm6.0
+torchaudio==2.3.0+rocm6.0
\ No newline at end of file

From 4a1a3a56ba9d6fc695b124a9de623fe7431cd224 Mon Sep 17 00:00:00 2001
From: Ettore Di Giacinto <mudler@users.noreply.github.com>
Date: Thu, 8 Aug 2024 11:59:42 +0200
Subject: [PATCH 130/235] models(gallery): add calme-2.3-legalkit-8b (#3200)

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
---
 gallery/index.yaml | 21 ++++++++++++++++++++-
 1 file changed, 20 insertions(+), 1 deletion(-)

diff --git a/gallery/index.yaml b/gallery/index.yaml
index 65516cc3..3119fae0 100644
--- a/gallery/index.yaml
+++ b/gallery/index.yaml
@@ -307,6 +307,26 @@
     - filename: L3.1-70b-glitz-v0.2.i1-Q4_K_M.gguf
       sha256: 585efc83e7f6893043be2487fc09c914a381fb463ce97942ef2f25ae85103bcd
       uri: huggingface://mradermacher/L3.1-70b-glitz-v0.2-i1-GGUF/L3.1-70b-glitz-v0.2.i1-Q4_K_M.gguf
+- !!merge <<: *llama31
+  name: "calme-2.3-legalkit-8b-i1"
+  icon: https://huggingface.co/MaziyarPanahi/calme-2.3-legalkit-8b/resolve/main/calme-2-legalkit.webp
+  urls:
+    - https://huggingface.co/mradermacher/calme-2.3-legalkit-8b-i1-GGUF
+    - https://huggingface.co/MaziyarPanahi/calme-2.3-legalkit-8b
+  description: |
+      This model is an advanced iteration of the powerful meta-llama/Meta-Llama-3.1-8B-Instruct, specifically fine-tuned to enhance its capabilities in the legal domain. The fine-tuning process utilized a synthetically generated dataset derived from the French LegalKit, a comprehensive legal language resource.
+
+      To create this specialized dataset, I used the NousResearch/Nous-Hermes-2-Mixtral-8x7B-DPO model in conjunction with Hugging Face's Inference Endpoint. This approach allowed for the generation of high-quality, synthetic data that incorporates Chain of Thought (CoT) and advanced reasoning in its responses.
+
+      The resulting model combines the robust foundation of Llama-3.1-8B with tailored legal knowledge and enhanced reasoning capabilities. This makes it particularly well-suited for tasks requiring in-depth legal analysis, interpretation, and application of French legal concepts.
+  overrides:
+    parameters:
+      model: calme-2.3-legalkit-8b.i1-Q4_K_M.gguf
+  files:
+    - filename: calme-2.3-legalkit-8b.i1-Q4_K_M.gguf
+      sha256: b71dfea8bbd73b0fbd5793ef462b8540c24e1c52a47b1794561adb88109a9e80
+      uri: huggingface://mradermacher/calme-2.3-legalkit-8b-i1-GGUF/calme-2.3-legalkit-8b.i1-Q4_K_M.gguf
+## Uncensored models
 - !!merge <<: *llama31
   name: "humanish-roleplay-llama-3.1-8b-i1"
   icon: https://cdn-uploads.huggingface.co/production/uploads/5fad8602b8423e1d80b8a965/VPwtjS3BtjEEEq7ck4kAQ.webp
@@ -324,7 +344,6 @@
     - filename: Humanish-Roleplay-Llama-3.1-8B.i1-Q4_K_M.gguf
       sha256: 18cf753684e5226b51f3defc708852ca4924f50dc8bc31c9a7d0a036a477b7a7
       uri: huggingface://mradermacher/Humanish-Roleplay-Llama-3.1-8B-i1-GGUF/Humanish-Roleplay-Llama-3.1-8B.i1-Q4_K_M.gguf
-## Uncensored models
 - !!merge <<: *llama31
   name: "darkidol-llama-3.1-8b-instruct-1.0-uncensored-i1"
   icon: https://huggingface.co/aifeifei798/DarkIdol-Llama-3.1-8B-Instruct-1.0-Uncensored/resolve/main/DarkIdol-Llama-3.1-8B-Instruct-1.0-Uncensored.png

From 8317839ca5e36fe6abcb981ce26f0ea6428fa7d2 Mon Sep 17 00:00:00 2001
From: Ettore Di Giacinto <mudler@users.noreply.github.com>
Date: Thu, 8 Aug 2024 17:28:07 +0200
Subject: [PATCH 131/235] fix(diffusers): use nightly rocm for hipblas builds
 (#3202)

Signed-off-by: Ettore Di Giacinto <mudler@users.noreply.github.com>
---
 backend/python/diffusers/requirements-hipblas.txt | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/backend/python/diffusers/requirements-hipblas.txt b/backend/python/diffusers/requirements-hipblas.txt
index 9e992d02..92987e7a 100644
--- a/backend/python/diffusers/requirements-hipblas.txt
+++ b/backend/python/diffusers/requirements-hipblas.txt
@@ -1,4 +1,5 @@
---extra-index-url https://download.pytorch.org/whl/rocm6.0
+--pre
+--extra-index-url https://download.pytorch.org/whl/nightly/
 torch
 torchvision
 diffusers
@@ -7,4 +8,4 @@ transformers
 accelerate
 compel
 peft
-sentencepiece
\ No newline at end of file
+sentencepiece

From a507c13f8e58ccaebca4755d9fc7324a1c8b6bcb Mon Sep 17 00:00:00 2001
From: Ettore Di Giacinto <mudler@users.noreply.github.com>
Date: Thu, 8 Aug 2024 22:21:05 +0200
Subject: [PATCH 132/235] fix(diffusers): do not specify `--pre` as with pip

drop --pre as it is not supported by `uv`

Signed-off-by: Ettore Di Giacinto <mudler@users.noreply.github.com>
---
 backend/python/diffusers/requirements-hipblas.txt | 1 -
 1 file changed, 1 deletion(-)

diff --git a/backend/python/diffusers/requirements-hipblas.txt b/backend/python/diffusers/requirements-hipblas.txt
index 92987e7a..b7890f6e 100644
--- a/backend/python/diffusers/requirements-hipblas.txt
+++ b/backend/python/diffusers/requirements-hipblas.txt
@@ -1,4 +1,3 @@
---pre
 --extra-index-url https://download.pytorch.org/whl/nightly/
 torch
 torchvision

From b1773e33d55b8748af5e553a6c8b7818a8e08bfe Mon Sep 17 00:00:00 2001
From: "LocalAI [bot]" <139863280+localai-bot@users.noreply.github.com>
Date: Fri, 9 Aug 2024 00:18:00 +0200
Subject: [PATCH 133/235] chore: :arrow_up: Update ggerganov/whisper.cpp to
 `6eac06759b87b50132a01be019e9250a3ffc8969` (#3203)

:arrow_up: Update ggerganov/whisper.cpp

Signed-off-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com>
Co-authored-by: mudler <2420543+mudler@users.noreply.github.com>
---
 Makefile | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/Makefile b/Makefile
index 22c48110..a6ee126f 100644
--- a/Makefile
+++ b/Makefile
@@ -16,7 +16,7 @@ RWKV_VERSION?=661e7ae26d442f5cfebd2a0881b44e8c55949ec6
 
 # whisper.cpp version
 WHISPER_REPO?=https://github.com/ggerganov/whisper.cpp
-WHISPER_CPP_VERSION?=fe36c909715e6751277ddb020e7892c7670b61d4
+WHISPER_CPP_VERSION?=6eac06759b87b50132a01be019e9250a3ffc8969
 
 # bert.cpp version
 BERT_REPO?=https://github.com/go-skynet/go-bert.cpp

From 74f8785047e2a2f253e3cd34d92c7855601a7e90 Mon Sep 17 00:00:00 2001
From: "LocalAI [bot]" <139863280+localai-bot@users.noreply.github.com>
Date: Fri, 9 Aug 2024 00:36:08 +0200
Subject: [PATCH 134/235] chore: :arrow_up: Update ggerganov/llama.cpp to
 `3a14e00366399040a139c67dd5951177a8cb5695` (#3204)

:arrow_up: Update ggerganov/llama.cpp

Signed-off-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com>
Co-authored-by: mudler <2420543+mudler@users.noreply.github.com>
---
 Makefile | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/Makefile b/Makefile
index a6ee126f..1ed68c08 100644
--- a/Makefile
+++ b/Makefile
@@ -8,7 +8,7 @@ DETECT_LIBS?=true
 # llama.cpp versions
 GOLLAMA_REPO?=https://github.com/go-skynet/go-llama.cpp
 GOLLAMA_VERSION?=2b57a8ae43e4699d3dc5d1496a1ccd42922993be
-CPPLLAMA_VERSION?=15fa07a5c564d3ed7e7eb64b73272cedb27e73ec
+CPPLLAMA_VERSION?=3a14e00366399040a139c67dd5951177a8cb5695
 
 # go-rwkv version
 RWKV_REPO?=https://github.com/donomii/go-rwkv.cpp

From 5fcafc3d1e530aed28fb298bf36583f0814884f5 Mon Sep 17 00:00:00 2001
From: Ettore Di Giacinto <mudler@users.noreply.github.com>
Date: Fri, 9 Aug 2024 11:07:38 +0200
Subject: [PATCH 135/235] fix(diffusers): allow pre-releases for requirements

Signed-off-by: Ettore Di Giacinto <mudler@users.noreply.github.com>
---
 backend/python/diffusers/install.sh | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/backend/python/diffusers/install.sh b/backend/python/diffusers/install.sh
index 36443ef1..b0b46a86 100755
--- a/backend/python/diffusers/install.sh
+++ b/backend/python/diffusers/install.sh
@@ -11,4 +11,9 @@ if [ "x${BUILD_PROFILE}" == "xintel" ]; then
     EXTRA_PIP_INSTALL_FLAGS+=" --upgrade --index-strategy=unsafe-first-match"
 fi
 
+# hipblas builds from nightly that needs pre-releases to work
+if [ "x${BUILD_PROFILE}" == "xhipblas" ]; then
+    EXTRA_PIP_INSTALL_FLAGS+=" --prerelease=allow"
+fi
+
 installRequirements

From 9e3e892ac79b200586e3be5ebf04a619ae25b2a8 Mon Sep 17 00:00:00 2001
From: Ettore Di Giacinto <mudler@users.noreply.github.com>
Date: Fri, 9 Aug 2024 20:12:01 +0200
Subject: [PATCH 136/235] feat(p2p): add network explorer and community pools
 (#3125)

* WIP

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>

* Fixups

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>

* Wire up a simple explorer DB

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>

* wip

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>

* WIP

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>

* refactor: group services id so can be identified easily in the ledger table

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>

* feat(discovery): discovery service now gather worker informations correctly

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>

* feat(explorer): display network token

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>

* feat(explorer): display form to add new networks

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>

* feat(explorer): stop from overwriting networks

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>

* feat(explorer): display only networks with active workers

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>

* feat(explorer): list only clusters in a network if it has online workers

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>

* remove invalid and inactive networks

if networks have no workers delete them from the database, similarly,
if invalid.

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>

* ci: add workflow to deploy new explorer versions automatically

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>

* build-api: build with p2p tag

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>

* Allow to specify a connection timeout

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>

* logging

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>

* Better p2p defaults

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>

* Set loglevel

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>

* Fix dht enable

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>

* Default to info for loglevel

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>

* Add navbar

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>

* Slightly improve rendering

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>

* Allow to copy the token easily

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>

* ci fixups

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>

---------

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
---
 .github/workflows/deploy-explorer.yaml        |  64 ++++
 Makefile                                      |   2 +-
 core/cli/cli.go                               |   1 +
 core/cli/explorer.go                          |  35 ++
 core/cli/run.go                               |   4 +-
 core/cli/worker/worker_p2p.go                 |   4 +-
 core/explorer/database.go                     | 106 ++++++
 core/explorer/database_test.go                |  92 +++++
 core/explorer/discovery.go                    | 203 +++++++++++
 core/explorer/explorer_suite_test.go          |  13 +
 core/http/endpoints/explorer/dashboard.go     | 105 ++++++
 core/http/endpoints/localai/p2p.go            |   2 +-
 core/http/explorer.go                         |  46 +++
 core/http/routes/explorer.go                  |  13 +
 core/http/routes/ui.go                        |   4 +-
 core/http/views/explorer.html                 | 342 ++++++++++++++++++
 core/http/views/partials/navbar_explorer.html |  39 ++
 core/p2p/node.go                              |   5 +-
 core/p2p/p2p.go                               |  19 +-
 19 files changed, 1082 insertions(+), 17 deletions(-)
 create mode 100644 .github/workflows/deploy-explorer.yaml
 create mode 100644 core/cli/explorer.go
 create mode 100644 core/explorer/database.go
 create mode 100644 core/explorer/database_test.go
 create mode 100644 core/explorer/discovery.go
 create mode 100644 core/explorer/explorer_suite_test.go
 create mode 100644 core/http/endpoints/explorer/dashboard.go
 create mode 100644 core/http/explorer.go
 create mode 100644 core/http/routes/explorer.go
 create mode 100644 core/http/views/explorer.html
 create mode 100644 core/http/views/partials/navbar_explorer.html

diff --git a/.github/workflows/deploy-explorer.yaml b/.github/workflows/deploy-explorer.yaml
new file mode 100644
index 00000000..71a14183
--- /dev/null
+++ b/.github/workflows/deploy-explorer.yaml
@@ -0,0 +1,64 @@
+name: Explorer deployment
+
+on:
+  push:
+    branches:
+      - master
+    tags:
+      - 'v*'
+
+concurrency:
+  group: ci-deploy-${{ github.head_ref || github.ref }}-${{ github.repository }}
+
+jobs:
+  build-linux:
+    runs-on: ubuntu-latest
+    steps:
+      - name: Clone
+        uses: actions/checkout@v4
+        with:
+          submodules: true
+      - uses: actions/setup-go@v5
+        with:
+          go-version: '1.21.x'
+          cache: false
+      - name: Dependencies
+        run: |
+          sudo apt-get update
+          sudo apt-get install -y wget curl build-essential ffmpeg protobuf-compiler ccache upx-ucl gawk cmake libgmock-dev
+          go install google.golang.org/grpc/cmd/protoc-gen-go-grpc@1958fcbe2ca8bd93af633f11e97d44e567e945af
+          go install google.golang.org/protobuf/cmd/protoc-gen-go@v1.34.2
+          make protogen-go
+      - name: Build api
+        run: |
+          make build-api
+      - name: rm
+        uses: appleboy/ssh-action@v1.0.3
+        with:
+            host: ${{ secrets.EXPLORER_SSH_HOST }}
+            username: ${{ secrets.EXPLORER_SSH_USERNAME }}
+            key: ${{ secrets.EXPLORER_SSH_KEY }}
+            port: ${{ secrets.EXPLORER_SSH_PORT }}
+            script: |
+                sudo rm -rf local-ai/ || true
+      - name: copy file via ssh
+        uses: appleboy/scp-action@v0.1.7
+        with:
+            host: ${{ secrets.EXPLORER_SSH_HOST }}
+            username: ${{ secrets.EXPLORER_SSH_USERNAME }}
+            key: ${{ secrets.EXPLORER_SSH_KEY }}
+            port: ${{ secrets.EXPLORER_SSH_PORT }}
+            source: "local-ai"
+            overwrite: true
+            rm: true
+            target: ./local-ai
+      - name: restarting
+        uses: appleboy/ssh-action@v1.0.3
+        with:
+            host: ${{ secrets.EXPLORER_SSH_HOST }}
+            username: ${{ secrets.EXPLORER_SSH_USERNAME }}
+            key: ${{ secrets.EXPLORER_SSH_KEY }}
+            port: ${{ secrets.EXPLORER_SSH_PORT }}
+            script: |
+                sudo cp -rfv local-ai/local-ai /usr/bin/local-ai
+                sudo systemctl restart local-ai
diff --git a/Makefile b/Makefile
index 1ed68c08..d690e483 100644
--- a/Makefile
+++ b/Makefile
@@ -376,7 +376,7 @@ build-minimal:
 	BUILD_GRPC_FOR_BACKEND_LLAMA=true GRPC_BACKENDS="backend-assets/grpc/llama-cpp-avx2" GO_TAGS=p2p $(MAKE) build
 
 build-api:
-	BUILD_GRPC_FOR_BACKEND_LLAMA=true BUILD_API_ONLY=true GO_TAGS=none $(MAKE) build
+	BUILD_GRPC_FOR_BACKEND_LLAMA=true BUILD_API_ONLY=true GO_TAGS=p2p $(MAKE) build
 
 backend-assets/lib:
 	mkdir -p backend-assets/lib
diff --git a/core/cli/cli.go b/core/cli/cli.go
index 0fed33fd..2073778d 100644
--- a/core/cli/cli.go
+++ b/core/cli/cli.go
@@ -15,4 +15,5 @@ var CLI struct {
 	Transcript TranscriptCMD `cmd:"" help:"Convert audio to text"`
 	Worker     worker.Worker `cmd:"" help:"Run workers to distribute workload (llama.cpp-only)"`
 	Util       UtilCMD       `cmd:"" help:"Utility commands"`
+	Explorer   ExplorerCMD   `cmd:"" help:"Run p2p explorer"`
 }
diff --git a/core/cli/explorer.go b/core/cli/explorer.go
new file mode 100644
index 00000000..0fcde728
--- /dev/null
+++ b/core/cli/explorer.go
@@ -0,0 +1,35 @@
+package cli
+
+import (
+	"context"
+	"time"
+
+	cliContext "github.com/mudler/LocalAI/core/cli/context"
+	"github.com/mudler/LocalAI/core/explorer"
+	"github.com/mudler/LocalAI/core/http"
+)
+
+type ExplorerCMD struct {
+	Address           string `env:"LOCALAI_ADDRESS,ADDRESS" default:":8080" help:"Bind address for the API server" group:"api"`
+	PoolDatabase      string `env:"LOCALAI_POOL_DATABASE,POOL_DATABASE" default:"explorer.json" help:"Path to the pool database" group:"api"`
+	ConnectionTimeout string `env:"LOCALAI_CONNECTION_TIMEOUT,CONNECTION_TIMEOUT" default:"2m" help:"Connection timeout for the explorer" group:"api"`
+}
+
+func (e *ExplorerCMD) Run(ctx *cliContext.Context) error {
+
+	db, err := explorer.NewDatabase(e.PoolDatabase)
+	if err != nil {
+		return err
+	}
+
+	dur, err := time.ParseDuration(e.ConnectionTimeout)
+	if err != nil {
+		return err
+	}
+	ds := explorer.NewDiscoveryServer(db, dur)
+
+	go ds.Start(context.Background())
+	appHTTP := http.Explorer(db, ds)
+
+	return appHTTP.Listen(e.Address)
+}
diff --git a/core/cli/run.go b/core/cli/run.go
index 9d58f6d9..707f6afb 100644
--- a/core/cli/run.go
+++ b/core/cli/run.go
@@ -121,9 +121,9 @@ func (r *RunCMD) Run(ctx *cliContext.Context) error {
 		}
 
 		log.Info().Msg("Starting P2P server discovery...")
-		if err := p2p.ServiceDiscoverer(context.Background(), node, token, p2p.NetworkID(r.Peer2PeerNetworkID, ""), func(serviceID string, node p2p.NodeData) {
+		if err := p2p.ServiceDiscoverer(context.Background(), node, token, p2p.NetworkID(r.Peer2PeerNetworkID, p2p.WorkerID), func(serviceID string, node p2p.NodeData) {
 			var tunnelAddresses []string
-			for _, v := range p2p.GetAvailableNodes(p2p.NetworkID(r.Peer2PeerNetworkID, "")) {
+			for _, v := range p2p.GetAvailableNodes(p2p.NetworkID(r.Peer2PeerNetworkID, p2p.WorkerID)) {
 				if v.IsOnline() {
 					tunnelAddresses = append(tunnelAddresses, v.TunnelAddress)
 				} else {
diff --git a/core/cli/worker/worker_p2p.go b/core/cli/worker/worker_p2p.go
index 93a365cb..17b9ff08 100644
--- a/core/cli/worker/worker_p2p.go
+++ b/core/cli/worker/worker_p2p.go
@@ -60,7 +60,7 @@ func (r *P2P) Run(ctx *cliContext.Context) error {
 			p = r.RunnerPort
 		}
 
-		err = p2p.ExposeService(context.Background(), address, p, r.Token, p2p.NetworkID(r.Peer2PeerNetworkID, ""))
+		err = p2p.ExposeService(context.Background(), address, p, r.Token, p2p.NetworkID(r.Peer2PeerNetworkID, p2p.WorkerID))
 		if err != nil {
 			return err
 		}
@@ -100,7 +100,7 @@ func (r *P2P) Run(ctx *cliContext.Context) error {
 		}
 	}()
 
-	err = p2p.ExposeService(context.Background(), address, fmt.Sprint(port), r.Token, p2p.NetworkID(r.Peer2PeerNetworkID, ""))
+	err = p2p.ExposeService(context.Background(), address, fmt.Sprint(port), r.Token, p2p.NetworkID(r.Peer2PeerNetworkID, p2p.WorkerID))
 	if err != nil {
 		return err
 	}
diff --git a/core/explorer/database.go b/core/explorer/database.go
new file mode 100644
index 00000000..8535140c
--- /dev/null
+++ b/core/explorer/database.go
@@ -0,0 +1,106 @@
+package explorer
+
+// A simple JSON database for storing and retrieving p2p network tokens and a name and description.
+
+import (
+	"encoding/json"
+	"os"
+	"sort"
+	"sync"
+)
+
+// Database is a simple JSON database for storing and retrieving p2p network tokens and a name and description.
+type Database struct {
+	sync.RWMutex
+	path string
+	data map[string]TokenData
+}
+
+// TokenData is a p2p network token with a name and description.
+type TokenData struct {
+	Name        string `json:"name"`
+	Description string `json:"description"`
+}
+
+// NewDatabase creates a new Database with the given path.
+func NewDatabase(path string) (*Database, error) {
+	db := &Database{
+		data: make(map[string]TokenData),
+		path: path,
+	}
+	return db, db.load()
+}
+
+// Get retrieves a Token from the Database by its token.
+func (db *Database) Get(token string) (TokenData, bool) {
+	db.RLock()
+	defer db.RUnlock()
+	t, ok := db.data[token]
+	return t, ok
+}
+
+// Set stores a Token in the Database by its token.
+func (db *Database) Set(token string, t TokenData) error {
+	db.Lock()
+	db.data[token] = t
+	db.Unlock()
+
+	return db.Save()
+}
+
+// Delete removes a Token from the Database by its token.
+func (db *Database) Delete(token string) error {
+	db.Lock()
+	delete(db.data, token)
+	db.Unlock()
+	return db.Save()
+}
+
+func (db *Database) TokenList() []string {
+	db.RLock()
+	defer db.RUnlock()
+	tokens := []string{}
+	for k := range db.data {
+		tokens = append(tokens, k)
+	}
+
+	sort.Slice(tokens, func(i, j int) bool {
+		// sort by token
+		return tokens[i] < tokens[j]
+	})
+
+	return tokens
+}
+
+// load reads the Database from disk.
+func (db *Database) load() error {
+	db.Lock()
+	defer db.Unlock()
+
+	if _, err := os.Stat(db.path); os.IsNotExist(err) {
+		return nil
+	}
+
+	// Read the file from disk
+	// Unmarshal the JSON into db.data
+	f, err := os.ReadFile(db.path)
+	if err != nil {
+		return err
+	}
+	return json.Unmarshal(f, &db.data)
+}
+
+// Save writes the Database to disk.
+func (db *Database) Save() error {
+	db.RLock()
+	defer db.RUnlock()
+
+	// Marshal db.data into JSON
+	// Write the JSON to the file
+	f, err := os.Create(db.path)
+	if err != nil {
+		return err
+	}
+	defer f.Close()
+	return json.NewEncoder(f).Encode(db.data)
+}
diff --git a/core/explorer/database_test.go b/core/explorer/database_test.go
new file mode 100644
index 00000000..7f2cbd26
--- /dev/null
+++ b/core/explorer/database_test.go
@@ -0,0 +1,92 @@
+package explorer_test
+
+import (
+	"os"
+
+	. "github.com/onsi/ginkgo/v2"
+	. "github.com/onsi/gomega"
+
+	"github.com/mudler/LocalAI/core/explorer"
+)
+
+var _ = Describe("Database", func() {
+	var (
+		dbPath string
+		db     *explorer.Database
+		err    error
+	)
+
+	BeforeEach(func() {
+		// Create a temporary file path for the database
+		dbPath = "test_db.json"
+		db, err = explorer.NewDatabase(dbPath)
+		Expect(err).To(BeNil())
+	})
+
+	AfterEach(func() {
+		// Clean up the temporary database file
+		os.Remove(dbPath)
+	})
+
+	Context("when managing tokens", func() {
+		It("should add and retrieve a token", func() {
+			token := "token123"
+			t := explorer.TokenData{Name: "TokenName", Description: "A test token"}
+
+			err = db.Set(token, t)
+			Expect(err).To(BeNil())
+
+			retrievedToken, exists := db.Get(token)
+			Expect(exists).To(BeTrue())
+			Expect(retrievedToken).To(Equal(t))
+		})
+
+		It("should delete a token", func() {
+			token := "token123"
+			t := explorer.TokenData{Name: "TokenName", Description: "A test token"}
+
+			err = db.Set(token, t)
+			Expect(err).To(BeNil())
+
+			err = db.Delete(token)
+			Expect(err).To(BeNil())
+
+			_, exists := db.Get(token)
+			Expect(exists).To(BeFalse())
+		})
+
+		It("should persist data to disk", func() {
+			token := "token123"
+			t := explorer.TokenData{Name: "TokenName", Description: "A test token"}
+
+			err = db.Set(token, t)
+			Expect(err).To(BeNil())
+
+			// Recreate the database object to simulate reloading from disk
+			db, err = explorer.NewDatabase(dbPath)
+			Expect(err).To(BeNil())
+
+			retrievedToken, exists := db.Get(token)
+			Expect(exists).To(BeTrue())
+			Expect(retrievedToken).To(Equal(t))
+
+			// Check the token list
+			tokenList := db.TokenList()
+			Expect(tokenList).To(ContainElement(token))
+		})
+	})
+
+	Context("when loading an empty or non-existent file", func() {
+		It("should start with an empty database", func() {
+			dbPath = "empty_db.json"
+			db, err = explorer.NewDatabase(dbPath)
+			Expect(err).To(BeNil())
+
+			_, exists := db.Get("nonexistent")
+			Expect(exists).To(BeFalse())
+
+			// Clean up
+			os.Remove(dbPath)
+		})
+	})
+})
diff --git a/core/explorer/discovery.go b/core/explorer/discovery.go
new file mode 100644
index 00000000..73281dc0
--- /dev/null
+++ b/core/explorer/discovery.go
@@ -0,0 +1,203 @@
+package explorer
+
+import (
+	"context"
+	"fmt"
+	"strings"
+	"sync"
+	"time"
+
+	"github.com/rs/zerolog/log"
+
+	"github.com/mudler/LocalAI/core/p2p"
+	"github.com/mudler/edgevpn/pkg/blockchain"
+)
+
+type DiscoveryServer struct {
+	sync.Mutex
+	database       *Database
+	networkState   *NetworkState
+	connectionTime time.Duration
+}
+
+type NetworkState struct {
+	Networks map[string]Network
+}
+
+func (s *DiscoveryServer) NetworkState() *NetworkState {
+	s.Lock()
+	defer s.Unlock()
+	return s.networkState
+}
+
+// NewDiscoveryServer creates a new DiscoveryServer with the given Database.
+// it keeps the db state in sync with the network state
+func NewDiscoveryServer(db *Database, dur time.Duration) *DiscoveryServer {
+	if dur == 0 {
+		dur = 50 * time.Second
+	}
+	return &DiscoveryServer{
+		database:       db,
+		connectionTime: dur,
+		networkState: &NetworkState{
+			Networks: map[string]Network{},
+		},
+	}
+}
+
+type Network struct {
+	Clusters []ClusterData
+}
+
+func (s *DiscoveryServer) runBackground() {
+	if len(s.database.TokenList()) == 0 {
+		time.Sleep(5 * time.Second) // avoid busy loop
+		return
+	}
+
+	for _, token := range s.database.TokenList() {
+		c, cancel := context.WithTimeout(context.Background(), s.connectionTime)
+		defer cancel()
+
+		// Connect to the network
+		// Get the number of nodes
+		// save it in the current state (mutex)
+		// do not do in parallel
+		n, err := p2p.NewNode(token)
+		if err != nil {
+			log.Err(err).Msg("Failed to create node")
+			s.database.Delete(token)
+			continue
+		}
+
+		err = n.Start(c)
+		if err != nil {
+			log.Err(err).Msg("Failed to start node")
+			s.database.Delete(token)
+			continue
+		}
+
+		ledger, err := n.Ledger()
+		if err != nil {
+			log.Err(err).Msg("Failed to start ledger")
+			s.database.Delete(token)
+			continue
+		}
+
+		networkData := make(chan ClusterData)
+
+		// get the network data - it takes the whole timeout
+		// as we might not be connected to the network yet,
+		// and few attempts would have to be made before bailing out
+		go s.retrieveNetworkData(c, ledger, networkData)
+
+		hasWorkers := false
+		ledgerK := []ClusterData{}
+		for key := range networkData {
+			ledgerK = append(ledgerK, key)
+			if len(key.Workers) > 0 {
+				hasWorkers = true
+			}
+		}
+
+		log.Debug().Any("network", token).Msgf("Network has %d clusters", len(ledgerK))
+		if len(ledgerK) != 0 {
+			for _, k := range ledgerK {
+				log.Debug().Any("network", token).Msgf("Clusterdata %+v", k)
+			}
+		}
+
+		if hasWorkers {
+			s.Lock()
+			s.networkState.Networks[token] = Network{
+				Clusters: ledgerK,
+			}
+			s.Unlock()
+		} else {
+			log.Info().Any("network", token).Msg("No workers found in the network. Removing it from the database")
+			s.database.Delete(token)
+		}
+	}
+}
+
+type ClusterData struct {
+	Workers   []string
+	Type      string
+	NetworkID string
+}
+
+func (s *DiscoveryServer) retrieveNetworkData(c context.Context, ledger *blockchain.Ledger, networkData chan ClusterData) {
+	clusters := map[string]ClusterData{}
+
+	defer func() {
+		for _, n := range clusters {
+			networkData <- n
+		}
+		close(networkData)
+	}()
+
+	for {
+		select {
+		case <-c.Done():
+			return
+		default:
+			time.Sleep(5 * time.Second)
+
+			data := ledger.LastBlock().Storage
+		LEDGER:
+			for d := range data {
+				toScanForWorkers := false
+				cd := ClusterData{}
+				isWorkerCluster := d == p2p.WorkerID || (strings.Contains(d, "_") && strings.Contains(d, p2p.WorkerID))
+				isFederatedCluster := d == p2p.FederatedID || (strings.Contains(d, "_") && strings.Contains(d, p2p.FederatedID))
+				switch {
+				case isWorkerCluster:
+					toScanForWorkers = true
+					cd.Type = "worker"
+				case isFederatedCluster:
+					toScanForWorkers = true
+					cd.Type = "federated"
+				}
+
+				if strings.Contains(d, "_") {
+					cd.NetworkID = strings.Split(d, "_")[0]
+				}
+
+				if !toScanForWorkers {
+					continue LEDGER
+				}
+
+				atLeastOneWorker := false
+			DATA:
+				for _, v := range data[d] {
+					nd := &p2p.NodeData{}
+					if err := v.Unmarshal(nd); err != nil {
+						continue DATA
+					}
+
+					if nd.IsOnline() {
+						atLeastOneWorker = true
+						(&cd).Workers = append(cd.Workers, nd.ID)
+					}
+				}
+
+				if atLeastOneWorker {
+					clusters[d] = cd
+				}
+			}
+		}
+	}
+}
+
+// Start the discovery server. This is meant to be run in to a goroutine.
+func (s *DiscoveryServer) Start(ctx context.Context) error {
+	for {
+		select {
+		case <-ctx.Done():
+			return fmt.Errorf("context cancelled")
+		default:
+			// Collect data
+			s.runBackground()
+		}
+	}
+}
diff --git a/core/explorer/explorer_suite_test.go b/core/explorer/explorer_suite_test.go
new file mode 100644
index 00000000..fc718d5f
--- /dev/null
+++ b/core/explorer/explorer_suite_test.go
@@ -0,0 +1,13 @@
+package explorer_test
+
+import (
+	"testing"
+
+	. "github.com/onsi/ginkgo/v2"
+	. "github.com/onsi/gomega"
+)
+
+func TestExplorer(t *testing.T) {
+	RegisterFailHandler(Fail)
+	RunSpecs(t, "Explorer test suite")
+}
diff --git a/core/http/endpoints/explorer/dashboard.go b/core/http/endpoints/explorer/dashboard.go
new file mode 100644
index 00000000..7cd9f3c9
--- /dev/null
+++ b/core/http/endpoints/explorer/dashboard.go
@@ -0,0 +1,105 @@
+package explorer
+
+import (
+	"encoding/base64"
+	"sort"
+
+	"github.com/gofiber/fiber/v2"
+	"github.com/mudler/LocalAI/core/explorer"
+	"github.com/mudler/LocalAI/internal"
+)
+
+func Dashboard() func(*fiber.Ctx) error {
+	return func(c *fiber.Ctx) error {
+
+		summary := fiber.Map{
+			"Title":   "LocalAI API - " + internal.PrintableVersion(),
+			"Version": internal.PrintableVersion(),
+		}
+
+		if string(c.Context().Request.Header.ContentType()) == "application/json" || len(c.Accepts("html")) == 0 {
+			// The client expects a JSON response
+			return c.Status(fiber.StatusOK).JSON(summary)
+		} else {
+			// Render index
+			return c.Render("views/explorer", summary)
+		}
+	}
+}
+
+type AddNetworkRequest struct {
+	Token       string `json:"token"`
+	Name        string `json:"name"`
+	Description string `json:"description"`
+}
+
+type Network struct {
+	explorer.Network
+	explorer.TokenData
+	Token string `json:"token"`
+}
+
+func ShowNetworks(db *explorer.Database, ds *explorer.DiscoveryServer) func(*fiber.Ctx) error {
+	return func(c *fiber.Ctx) error {
+		networkState := ds.NetworkState()
+		results := []Network{}
+		for token, network := range networkState.Networks {
+			networkData, exists := db.Get(token) // get the token data
+			hasWorkers := false
+			for _, cluster := range network.Clusters {
+				if len(cluster.Workers) > 0 {
+					hasWorkers = true
+					break
+				}
+			}
+			if exists && hasWorkers {
+				results = append(results, Network{Network: network, TokenData: networkData, Token: token})
+			}
+		}
+
+		// order by number of clusters
+		sort.Slice(results, func(i, j int) bool {
+			return len(results[i].Clusters) > len(results[j].Clusters)
+		})
+
+		return c.JSON(results)
+	}
+}
+
+func AddNetwork(db *explorer.Database) func(*fiber.Ctx) error {
+	return func(c *fiber.Ctx) error {
+		request := new(AddNetworkRequest)
+		if err := c.BodyParser(request); err != nil {
+			return c.Status(fiber.StatusBadRequest).JSON(fiber.Map{"error": "Cannot parse JSON"})
+		}
+
+		if request.Token == "" {
+			return c.Status(fiber.StatusBadRequest).JSON(fiber.Map{"error": "Token is required"})
+		}
+
+		if request.Name == "" {
+			return c.Status(fiber.StatusBadRequest).JSON(fiber.Map{"error": "Name is required"})
+		}
+
+		if request.Description == "" {
+			return c.Status(fiber.StatusBadRequest).JSON(fiber.Map{"error": "Description is required"})
+		}
+
+		// TODO: check if token is valid, otherwise reject
+		// try to decode the token from base64
+		_, err := base64.StdEncoding.DecodeString(request.Token)
+		if err != nil {
+			return c.Status(fiber.StatusBadRequest).JSON(fiber.Map{"error": "Invalid token"})
+		}
+
+		if _, exists := db.Get(request.Token); exists {
+			return c.Status(fiber.StatusBadRequest).JSON(fiber.Map{"error": "Token already exists"})
+		}
+		err = db.Set(request.Token, explorer.TokenData{Name: request.Name, Description: request.Description})
+		if err != nil {
+			return c.Status(fiber.StatusInternalServerError).JSON(fiber.Map{"error": "Cannot add token"})
+		}
+
+		return c.Status(fiber.StatusOK).JSON(fiber.Map{"message": "Token added"})
+	}
+}
diff --git a/core/http/endpoints/localai/p2p.go b/core/http/endpoints/localai/p2p.go
index 93e9b5d5..bbcee8c8 100644
--- a/core/http/endpoints/localai/p2p.go
+++ b/core/http/endpoints/localai/p2p.go
@@ -15,7 +15,7 @@ func ShowP2PNodes(appConfig *config.ApplicationConfig) func(*fiber.Ctx) error {
 	// Render index
 	return func(c *fiber.Ctx) error {
 		return c.JSON(schema.P2PNodesResponse{
-			Nodes:          p2p.GetAvailableNodes(p2p.NetworkID(appConfig.P2PNetworkID, "")),
+			Nodes:          p2p.GetAvailableNodes(p2p.NetworkID(appConfig.P2PNetworkID, p2p.WorkerID)),
 			FederatedNodes: p2p.GetAvailableNodes(p2p.NetworkID(appConfig.P2PNetworkID, p2p.FederatedID)),
 		})
 	}
diff --git a/core/http/explorer.go b/core/http/explorer.go
new file mode 100644
index 00000000..608ecdb5
--- /dev/null
+++ b/core/http/explorer.go
@@ -0,0 +1,46 @@
+package http
+
+import (
+	"net/http"
+
+	"github.com/gofiber/fiber/v2"
+	"github.com/gofiber/fiber/v2/middleware/favicon"
+	"github.com/gofiber/fiber/v2/middleware/filesystem"
+	"github.com/mudler/LocalAI/core/explorer"
+	"github.com/mudler/LocalAI/core/http/routes"
+)
+
+func Explorer(db *explorer.Database, discoveryServer *explorer.DiscoveryServer) *fiber.App {
+
+	fiberCfg := fiber.Config{
+		Views: renderEngine(),
+		// We disable the Fiber startup message as it does not conform to structured logging.
+		// We register a startup log line with connection information in the OnListen hook to keep things user friendly though
+		DisableStartupMessage: false,
+		// Override default error handler
+	}
+
+	app := fiber.New(fiberCfg)
+
+	routes.RegisterExplorerRoutes(app, db, discoveryServer)
+
+	httpFS := http.FS(embedDirStatic)
+
+	app.Use(favicon.New(favicon.Config{
+		URL:        "/favicon.ico",
+		FileSystem: httpFS,
+		File:       "static/favicon.ico",
+	}))
+
+	app.Use("/static", filesystem.New(filesystem.Config{
+		Root:       httpFS,
+		PathPrefix: "static",
+		Browse:     true,
+	}))
+
+	// Define a custom 404 handler
+	// Note: keep this at the bottom!
+	app.Use(notFoundHandler)
+
+	return app
+}
diff --git a/core/http/routes/explorer.go b/core/http/routes/explorer.go
new file mode 100644
index 00000000..b3c0d40b
--- /dev/null
+++ b/core/http/routes/explorer.go
@@ -0,0 +1,13 @@
+package routes
+
+import (
+	"github.com/gofiber/fiber/v2"
+	coreExplorer "github.com/mudler/LocalAI/core/explorer"
+	"github.com/mudler/LocalAI/core/http/endpoints/explorer"
+)
+
+func RegisterExplorerRoutes(app *fiber.App, db *coreExplorer.Database, ds *coreExplorer.DiscoveryServer) {
+	app.Get("/", explorer.Dashboard())
+	app.Post("/network/add", explorer.AddNetwork(db))
+	app.Get("/networks", explorer.ShowNetworks(db, ds))
+}
diff --git a/core/http/routes/ui.go b/core/http/routes/ui.go
index 2996e9dc..0a9867fe 100644
--- a/core/http/routes/ui.go
+++ b/core/http/routes/ui.go
@@ -105,14 +105,14 @@ func RegisterUIRoutes(app *fiber.App,
 
 		/* show nodes live! */
 		app.Get("/p2p/ui/workers", auth, func(c *fiber.Ctx) error {
-			return c.SendString(elements.P2PNodeBoxes(p2p.GetAvailableNodes(p2p.NetworkID(appConfig.P2PNetworkID, ""))))
+			return c.SendString(elements.P2PNodeBoxes(p2p.GetAvailableNodes(p2p.NetworkID(appConfig.P2PNetworkID, p2p.WorkerID))))
 		})
 		app.Get("/p2p/ui/workers-federation", auth, func(c *fiber.Ctx) error {
 			return c.SendString(elements.P2PNodeBoxes(p2p.GetAvailableNodes(p2p.NetworkID(appConfig.P2PNetworkID, p2p.FederatedID))))
 		})
 
 		app.Get("/p2p/ui/workers-stats", auth, func(c *fiber.Ctx) error {
-			return c.SendString(elements.P2PNodeStats(p2p.GetAvailableNodes(p2p.NetworkID(appConfig.P2PNetworkID, ""))))
+			return c.SendString(elements.P2PNodeStats(p2p.GetAvailableNodes(p2p.NetworkID(appConfig.P2PNetworkID, p2p.WorkerID))))
 		})
 		app.Get("/p2p/ui/workers-federation-stats", auth, func(c *fiber.Ctx) error {
 			return c.SendString(elements.P2PNodeStats(p2p.GetAvailableNodes(p2p.NetworkID(appConfig.P2PNetworkID, p2p.FederatedID))))
diff --git a/core/http/views/explorer.html b/core/http/views/explorer.html
new file mode 100644
index 00000000..91cb9720
--- /dev/null
+++ b/core/http/views/explorer.html
@@ -0,0 +1,342 @@
+<!DOCTYPE html>
+<html lang="en">
+
+{{template "views/partials/head" .}}
+
+<style>
+    body {
+        background-color: #1a202c;
+        color: #e2e8f0;
+        font-family: 'Segoe UI', Tahoma, Geneva, Verdana, sans-serif;
+    }
+    .container {
+        max-width: 800px;
+        margin: 0 auto;
+        padding: 20px;
+        position: relative;
+    }
+    .network-card {
+        background-color: #2d3748;
+        padding: 20px;
+        border-radius: 8px;
+        margin-bottom: 20px;
+        box-shadow: 0 4px 6px rgba(0, 0, 0, 0.1);
+        transition: transform 0.3s ease, box-shadow 0.3s ease;
+    }
+    .network-card:hover {
+        transform: translateY(-5px);
+        box-shadow: 0 6px 10px rgba(0, 0, 0, 0.15);
+    }
+    .network-title {
+        font-size: 24px;
+        font-weight: bold;
+        margin-bottom: 10px;
+        color: #63b3ed;
+    }
+    .network-token {
+        font-size: 14px;
+        font-style: italic;
+        color: #cbd5e0;
+        margin-bottom: 10px;
+        word-break: break-word; /* Breaks words to prevent overflow */
+        overflow-wrap: break-word; /* Ensures long strings break */
+        white-space: pre-wrap; /* Preserves whitespace for breaking */
+    }
+    .cluster {
+        margin-top: 10px;
+        background-color: #4a5568;
+        padding: 10px;
+        border-radius: 6px;
+        transition: background-color 0.3s ease;
+    }
+    .cluster:hover {
+        background-color: #5a6b78;
+    }
+    .cluster-title {
+        font-size: 18px;
+        font-weight: bold;
+        color: #e2e8f0;
+    }
+    .form-container {
+        background-color: #2d3748;
+        padding: 20px;
+        border-radius: 8px;
+        margin-bottom: 20px;
+        box-shadow: 0 4px 6px rgba(0, 0, 0, 0.1);
+    }
+    .form-control {
+        margin-bottom: 15px;
+    }
+    label {
+        display: block;
+        margin-bottom: 5px;
+        font-weight: bold;
+    }
+    input[type="text"],
+    textarea {
+        width: 100%;
+        padding: 10px;
+        border-radius: 4px;
+        border: 1px solid #4a5568;
+        background-color: #3a4250;
+        color: #e2e8f0;
+        transition: border-color 0.3s ease, background-color 0.3s ease;
+    }
+    input[type="text"]:focus,
+    textarea:focus {
+        border-color: #63b3ed;
+        background-color: #4a5568;
+    }
+    button {
+        background-color: #3182ce;
+        color: #e2e8f0;
+        padding: 10px 20px;
+        border: none;
+        border-radius: 4px;
+        cursor: pointer;
+        transition: background-color 0.3s ease;
+    }
+    .error {
+        color: #e53e3e;
+        margin-top: 5px;
+    }
+    .success {
+        color: #38a169;
+        margin-top: 5px;
+    }
+    /* Spinner Styles */
+    .spinner {
+        display: inline-block;
+        width: 50px;
+        height: 50px;
+        border: 5px solid rgba(255, 255, 255, 0.2);
+        border-radius: 50%;
+        border-top-color: #3182ce;
+        animation: spin 1s linear infinite;
+        margin: 0 auto;
+    }
+
+    @keyframes spin {
+        to { transform: rotate(360deg); }
+    }
+
+    /* Center the loading text and spinner */
+    .loading-container {
+        text-align: center;
+        padding: 50px;
+    }
+    .warning-box {
+            border-radius: 5px;
+    }
+    .warning-box i {
+        margin-right: 10px;
+    }
+    .token-box {
+            background-color: #4a5568;
+            padding: 10px;
+            border-radius: 4px;
+            margin-top: 10px;
+            position: relative;
+            cursor: pointer;
+        }
+        .token-box:hover {
+            background-color: #5a6b7e;
+        }
+        .token-text {
+            overflow-wrap: break-word;
+            font-family: monospace;
+        }
+        .copy-icon {
+            position: absolute;
+            top: 10px;
+            right: 10px;
+            color: #e2e8f0;
+        }
+</style>
+
+<body class="bg-gray-900 text-gray-200">
+    <div class="flex flex-col min-h-screen" x-data="networkClusters()" x-init="init()">
+        {{template "views/partials/navbar_explorer" .}}
+
+        <header class="text-center py-12">
+            <h1 class="text-5xl font-bold text-gray-100">Network Clusters Explorer</h1>
+            <p class="mt-4 text-lg">View the clusters and workers available in each network.</p>
+        </header>
+
+        <div class="container mx-auto px-4 flex-grow">
+        <!-- Warning Box -->
+        <div class="warning-box bg-yellow-100 text-gray-800 mb-20 pt-5 pb-5 pr-5 pl-5 text-lg">
+            <i class="fa-solid fa-triangle-exclamation"></i>
+            The explorer is a global, community-driven tool to share network tokens and view available clusters in the globe.
+            Anyone can use the tokens to offload computation and use the clusters available or share resources.
+            This is provided without any warranty. Use it at your own risk. We are not responsible for any potential harm or misuse. Sharing tokens globally allows anyone from the internet to use your instances. 
+            Although the community will address bugs, this is experimental software and may be insecure to deploy on your hardware unless you take all necessary precautions.
+        </div>
+            <div class="flow-root">
+            <!-- Toggle button for showing/hiding the form -->
+            <button class="bg-red-600 hover:bg-blue-600 float-right mb-2 flex items-center px-4 py-2 rounded" @click="toggleForm()">
+                <!-- Conditional icon display -->
+                <i :class="showForm ? 'fa-solid fa-times' : 'fa-solid fa-plus'" class="mr-2"></i>
+                <span x-text="showForm ? 'Close' : 'Add New Network'"></span>
+            </button>
+        </div>
+            <!-- Form for adding a new network -->
+            <div class="form-container" x-show="showForm" @click.outside="showForm = false">
+                <h2 class="text-3xl font-bold mb-4"><i class="fa-solid fa-plus"></i> Add New Network</h2>
+                <div class="form-control">
+                    <label for="name">Network Name</label>
+                    <input type="text" id="name" x-model="newNetwork.name" placeholder="Enter network name" />
+                </div>
+                <div class="form-control">
+                    <label for="description">Description</label>
+                    <textarea id="description" x-model="newNetwork.description" placeholder="Enter description"></textarea>
+                </div>
+                <div class="form-control">
+                    <label for="token">Token</label>
+                    <textarea id="token" x-model="newNetwork.token" placeholder="Enter token"></textarea>
+                </div>
+                <button @click="addNetwork"><i class="fa-solid fa-plus"></i> Add Network</button>
+                <template x-if="errorMessage">
+                    <p class="error" x-text="errorMessage"></p>
+                </template>
+                <template x-if="successMessage">
+                    <p class="success" x-text="successMessage"></p>
+                </template>
+            </div>
+
+            <!-- Loading Spinner -->
+            <template x-if="networks.length === 0 && !loadingComplete">
+                <div class="loading-container">
+                    <div class="spinner"></div>
+                    <p class="text-center mt-4">Loading networks...</p>
+                </div>
+            </template>
+
+            <template x-if="networks.length === 0 && loadingComplete">
+                <div class="loading-container">
+                    <p class="text-center mt-4">No networks available with online workers</p>
+                </div>
+            </template>
+
+            <!-- Display Networks -->
+            <template x-for="network in networks" :key="network.name">
+                <div class="network-card">
+                    <div class="network-title" x-text="network.name"></div>
+                    <div class="token-box" @click="copyToken(network.token)">
+                        <i class="fa-solid fa-copy copy-icon"></i>
+                        Token (click to copy): <br>
+                        <span class="token-text" x-text="network.token"></span>
+                    </div>
+
+                    <div class="cluster">
+                        <p class="text-lg">Description</p>
+                        <p x-text="network.description"></p>
+                    </div>
+
+                    <template x-for="cluster in network.Clusters" :key="cluster.NetworkID + cluster.Type">
+                        <div class="cluster">
+                            <div class="cluster-title" x-text="'Cluster Type: ' + cluster.Type"></div>
+                            <p x-show="cluster.NetworkID" x-text="'Network ID: ' + (cluster.NetworkID || 'N/A')"></p>
+                            <p x-text="'Number of Workers: ' + cluster.Workers.length"></p>
+                        </div>
+                    </template>
+                </div>
+            </template>
+        </div>
+        <script>
+            function networkClusters() {
+                return {
+                    networks: [],
+                    newNetwork: {
+                        name: '',
+                        description: '',
+                        token: ''
+                    },
+                    errorMessage: '',
+                    successMessage: '',
+                    showForm: false, // Form visibility state
+                    loadingComplete: false, // To track if loading is complete
+                    toggleForm() {
+                        this.showForm = !this.showForm;
+                        console.log('Toggling form:', this.showForm);
+                    },
+                    fetchNetworks() {
+                        console.log('Fetching networks...');
+                        fetch('/networks')
+                            .then(response => response.json())
+                            .then(data => {
+                                console.log('Data fetched successfully:', data);
+                                this.networks = data;
+                                this.loadingComplete = true; // Set loading complete
+                            })
+                            .catch(error => {
+                                console.error('Error fetching networks:', error);
+                                this.loadingComplete = true; // Ensure spinner is hidden if error occurs
+                            });
+                    },
+
+                    addNetwork() {
+                        this.errorMessage = '';
+                        this.successMessage = '';
+                        console.log('Adding new network:', this.newNetwork);
+
+                        // Validate input
+                        if (!this.newNetwork.name || !this.newNetwork.description || !this.newNetwork.token) {
+                            this.errorMessage = 'All fields are required.';
+                            return;
+                        }
+
+                        fetch('/network/add', {
+                            method: 'POST',
+                            headers: {
+                                'Content-Type': 'application/json'
+                            },
+                            body: JSON.stringify(this.newNetwork)
+                        })
+                            .then(response => {
+                                if (!response.ok) {
+                                    return response.json().then(err => { throw err; });
+                                }
+                                return response.json();
+                            })
+                            .then(data => {
+                                console.log('Network added successfully:', data);
+                                this.successMessage = 'Network added successfully!';
+                                this.fetchNetworks(); // Refresh the networks list
+                                this.newNetwork = { name: '', description: '', token: '' }; // Clear form
+                            })
+                            .catch(error => {
+                                console.error('Error adding network:', error);
+                                this.errorMessage = 'Failed to add network. Please try again.'
+                                if (error.error) {
+                                    this.errorMessage += " Error : " + error.error;
+                                }
+                            });
+                    },
+                    copyToken(token) {
+                        navigator.clipboard.writeText(token)
+                        .then(() => {
+                            console.log('Token copied to clipboard:', token);
+                            alert('Token copied to clipboard!');
+                        })
+                        .catch(err => {
+                            console.error('Failed to copy token:', err);
+                        });
+                    },
+                    init() {
+                        console.log('Initializing Alpine component...');
+                        this.fetchNetworks();
+                        setInterval(() => {
+                            this.fetchNetworks();
+                        }, 5000); // Refresh every 5 seconds
+                    }
+                }
+            }
+        </script>
+
+        {{template "views/partials/footer" .}}
+    </div>
+
+</body>
+
+</html>
diff --git a/core/http/views/partials/navbar_explorer.html b/core/http/views/partials/navbar_explorer.html
new file mode 100644
index 00000000..ffc6c4d5
--- /dev/null
+++ b/core/http/views/partials/navbar_explorer.html
@@ -0,0 +1,39 @@
+<nav class="bg-gray-800 shadow-lg">
+    <div class="container mx-auto px-4 py-4">
+        <div class="flex items-center justify-between">
+            <div class="flex items-center">
+                <!-- Logo Image: Replace 'logo_url_here' with your actual logo URL -->
+                <a href="/" class="text-white text-xl font-bold"><img src="https://github.com/go-skynet/LocalAI/assets/2420543/0966aa2a-166e-4f99-a3e5-6c915fc997dd" alt="LocalAI Logo" class="h-10 mr-3 border-2 border-gray-300 shadow rounded"></a>
+                <a href="/" class="text-white text-xl font-bold">LocalAI</a>
+            </div>
+            <!-- Menu button for small screens -->
+            <div class="lg:hidden">
+                <button id="menu-toggle" class="text-gray-400 hover:text-white focus:outline-none">
+                    <i class="fas fa-bars fa-lg"></i>
+                </button>
+            </div>
+            <!-- Navigation links -->
+            <div class="hidden lg:flex lg:items-center lg:justify-end lg:flex-1 lg:w-0">
+                <a href="/" class="text-gray-400 hover:text-white px-3 py-2 rounded"><i class="fas fa-home pr-2"></i>Home</a>
+                <a href="https://localai.io" class="text-gray-400 hover:text-white px-3 py-2 rounded" target="_blank" ><i class="fas fa-book-reader pr-2"></i> Documentation</a>
+                <a href="https://models.localai.io/" class="text-gray-400 hover:text-white px-3 py-2 rounded"><i class="fas fa-brain pr-2"></i> Models</a>
+            </div>
+        </div>
+        <!-- Collapsible menu for small screens -->
+        <div class="hidden lg:hidden" id="mobile-menu">
+            <div class="pt-4 pb-3 border-t border-gray-700">
+                <a href="/" class="block text-gray-400 hover:text-white px-3 py-2 rounded mt-1"><i class="fas fa-home pr-2"></i>Home</a>
+                <a href="https://localai.io" class="block text-gray-400 hover:text-white px-3 py-2 rounded mt-1" target="_blank" ><i class="fas fa-book-reader pr-2"></i> Documentation</a>
+                <a href="https://models.localai.io/" class="text-gray-400 hover:text-white px-3 py-2 rounded"><i class="fas fa-brain pr-2"></i> Models</a>
+            </div>
+        </div>
+    </div>
+</nav>
+
+<script>
+    // JavaScript to toggle the mobile menu
+    document.getElementById('menu-toggle').addEventListener('click', function () {
+        var mobileMenu = document.getElementById('mobile-menu');
+        mobileMenu.classList.toggle('hidden');
+    });
+</script>
diff --git a/core/p2p/node.go b/core/p2p/node.go
index 6394498f..b89bb7c6 100644
--- a/core/p2p/node.go
+++ b/core/p2p/node.go
@@ -5,7 +5,10 @@ import (
 	"time"
 )
 
-const defaultServicesID = "services_localai"
+const (
+	defaultServicesID = "services"
+	WorkerID          = "worker"
+)
 
 type NodeData struct {
 	Name          string
diff --git a/core/p2p/p2p.go b/core/p2p/p2p.go
index 927f0e24..37b892d9 100644
--- a/core/p2p/p2p.go
+++ b/core/p2p/p2p.go
@@ -345,13 +345,16 @@ func newNodeOpts(token string) ([]node.Option, error) {
 
 	// TODO: move this up, expose more config options when creating a node
 	noDHT := os.Getenv("LOCALAI_P2P_DISABLE_DHT") == "true"
-	noLimits := os.Getenv("LOCALAI_P2P_DISABLE_LIMITS") == "true"
+	noLimits := os.Getenv("LOCALAI_P2P_ENABLE_LIMITS") == "true"
 
-	loglevel := "info"
+	loglevel := os.Getenv("LOCALAI_P2P_LOGLEVEL")
+	if loglevel == "" {
+		loglevel = "info"
+	}
 
 	c := config.Config{
 		Limit: config.ResourceLimit{
-			Enable:   !noLimits,
+			Enable:   noLimits,
 			MaxConns: 100,
 		},
 		NetworkToken:   token,
@@ -366,19 +369,19 @@ func newNodeOpts(token string) ([]node.Option, error) {
 			Service:           true,
 			Map:               true,
 			RateLimit:         true,
-			RateLimitGlobal:   10,
-			RateLimitPeer:     10,
+			RateLimitGlobal:   100,
+			RateLimitPeer:     100,
 			RateLimitInterval: defaultInterval,
 		},
 		Discovery: config.Discovery{
-			DHT:      noDHT,
+			DHT:      !noDHT,
 			MDNS:     true,
-			Interval: 30 * time.Second,
+			Interval: 10 * time.Second,
 		},
 		Connection: config.Connection{
 			HolePunch:      true,
 			AutoRelay:      true,
-			MaxConnections: 100,
+			MaxConnections: 1000,
 		},
 	}
 

From 6d20f38510937a0740bb1e0b7337dd617cbd7be8 Mon Sep 17 00:00:00 2001
From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com>
Date: Fri, 9 Aug 2024 20:08:24 +0000
Subject: [PATCH 137/235] chore(deps): Bump aiohttp from 3.9.5 to 3.10.2 in
 /examples/langchain/langchainpy-localai-example in the pip group (#3207)

chore(deps): Bump aiohttp

Bumps the pip group in /examples/langchain/langchainpy-localai-example with 1 update: [aiohttp](https://github.com/aio-libs/aiohttp).


Updates `aiohttp` from 3.9.5 to 3.10.2
- [Release notes](https://github.com/aio-libs/aiohttp/releases)
- [Changelog](https://github.com/aio-libs/aiohttp/blob/master/CHANGES.rst)
- [Commits](https://github.com/aio-libs/aiohttp/compare/v3.9.5...v3.10.2)

---
updated-dependencies:
- dependency-name: aiohttp
  dependency-type: direct:production
  dependency-group: pip
...

Signed-off-by: dependabot[bot] <support@github.com>
Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>
---
 examples/langchain/langchainpy-localai-example/requirements.txt | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/examples/langchain/langchainpy-localai-example/requirements.txt b/examples/langchain/langchainpy-localai-example/requirements.txt
index 1d1b5023..414a1b27 100644
--- a/examples/langchain/langchainpy-localai-example/requirements.txt
+++ b/examples/langchain/langchainpy-localai-example/requirements.txt
@@ -1,4 +1,4 @@
-aiohttp==3.9.5
+aiohttp==3.10.2
 aiosignal==1.3.1
 async-timeout==4.0.3
 attrs==23.2.0

From 2e2a0dffbc4aae5ad1225278d98370a6ec898657 Mon Sep 17 00:00:00 2001
From: Ettore Di Giacinto <mudler@users.noreply.github.com>
Date: Fri, 9 Aug 2024 22:36:10 +0200
Subject: [PATCH 138/235] fix(diffusers-hipblas): pin to rocm6.1

As per https://rocm.docs.amd.com/projects/install-on-linux/en/latest/install/3rd-party/pytorch-install.html

Signed-off-by: Ettore Di Giacinto <mudler@users.noreply.github.com>
---
 backend/python/diffusers/requirements-hipblas.txt | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/backend/python/diffusers/requirements-hipblas.txt b/backend/python/diffusers/requirements-hipblas.txt
index b7890f6e..8c4c070c 100644
--- a/backend/python/diffusers/requirements-hipblas.txt
+++ b/backend/python/diffusers/requirements-hipblas.txt
@@ -1,4 +1,4 @@
---extra-index-url https://download.pytorch.org/whl/nightly/
+--extra-index-url https://download.pytorch.org/whl/nightly/rocm6.1/
 torch
 torchvision
 diffusers

From 71b823207659bdfd30105673c45d125aedda0f81 Mon Sep 17 00:00:00 2001
From: "LocalAI [bot]" <139863280+localai-bot@users.noreply.github.com>
Date: Sat, 10 Aug 2024 00:20:51 +0200
Subject: [PATCH 139/235] chore: :arrow_up: Update ggerganov/llama.cpp to
 `b72942fac998672a79a1ae3c03b340f7e629980b` (#3208)

:arrow_up: Update ggerganov/llama.cpp

Signed-off-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com>
Co-authored-by: mudler <2420543+mudler@users.noreply.github.com>
---
 Makefile | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/Makefile b/Makefile
index d690e483..5ce38a82 100644
--- a/Makefile
+++ b/Makefile
@@ -8,7 +8,7 @@ DETECT_LIBS?=true
 # llama.cpp versions
 GOLLAMA_REPO?=https://github.com/go-skynet/go-llama.cpp
 GOLLAMA_VERSION?=2b57a8ae43e4699d3dc5d1496a1ccd42922993be
-CPPLLAMA_VERSION?=3a14e00366399040a139c67dd5951177a8cb5695
+CPPLLAMA_VERSION?=b72942fac998672a79a1ae3c03b340f7e629980b
 
 # go-rwkv version
 RWKV_REPO?=https://github.com/donomii/go-rwkv.cpp

From a0e0804f25bb74543c6d524fa14eb0d6ad57a31f Mon Sep 17 00:00:00 2001
From: "LocalAI [bot]" <139863280+localai-bot@users.noreply.github.com>
Date: Sat, 10 Aug 2024 00:35:22 +0200
Subject: [PATCH 140/235] chore: :arrow_up: Update ggerganov/whisper.cpp to
 `81c999fe0a25c4ebbfef10ed8a1a96df9cfc10fd` (#3209)

:arrow_up: Update ggerganov/whisper.cpp

Signed-off-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com>
Co-authored-by: mudler <2420543+mudler@users.noreply.github.com>
---
 Makefile | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/Makefile b/Makefile
index 5ce38a82..9d09b917 100644
--- a/Makefile
+++ b/Makefile
@@ -16,7 +16,7 @@ RWKV_VERSION?=661e7ae26d442f5cfebd2a0881b44e8c55949ec6
 
 # whisper.cpp version
 WHISPER_REPO?=https://github.com/ggerganov/whisper.cpp
-WHISPER_CPP_VERSION?=6eac06759b87b50132a01be019e9250a3ffc8969
+WHISPER_CPP_VERSION?=81c999fe0a25c4ebbfef10ed8a1a96df9cfc10fd
 
 # bert.cpp version
 BERT_REPO?=https://github.com/go-skynet/go-bert.cpp

From 63ee689f2169fb36dffbdb741a18179233d2b556 Mon Sep 17 00:00:00 2001
From: "LocalAI [bot]" <139863280+localai-bot@users.noreply.github.com>
Date: Sat, 10 Aug 2024 01:02:22 +0200
Subject: [PATCH 141/235] chore(model-gallery): :arrow_up: update checksum
 (#3210)

:arrow_up: Checksum updates in gallery/index.yaml

Signed-off-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com>
Co-authored-by: mudler <2420543+mudler@users.noreply.github.com>
---
 gallery/index.yaml | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/gallery/index.yaml b/gallery/index.yaml
index 3119fae0..8daa39c6 100644
--- a/gallery/index.yaml
+++ b/gallery/index.yaml
@@ -314,11 +314,11 @@
     - https://huggingface.co/mradermacher/calme-2.3-legalkit-8b-i1-GGUF
     - https://huggingface.co/MaziyarPanahi/calme-2.3-legalkit-8b
   description: |
-      This model is an advanced iteration of the powerful meta-llama/Meta-Llama-3.1-8B-Instruct, specifically fine-tuned to enhance its capabilities in the legal domain. The fine-tuning process utilized a synthetically generated dataset derived from the French LegalKit, a comprehensive legal language resource.
+    This model is an advanced iteration of the powerful meta-llama/Meta-Llama-3.1-8B-Instruct, specifically fine-tuned to enhance its capabilities in the legal domain. The fine-tuning process utilized a synthetically generated dataset derived from the French LegalKit, a comprehensive legal language resource.
 
-      To create this specialized dataset, I used the NousResearch/Nous-Hermes-2-Mixtral-8x7B-DPO model in conjunction with Hugging Face's Inference Endpoint. This approach allowed for the generation of high-quality, synthetic data that incorporates Chain of Thought (CoT) and advanced reasoning in its responses.
+    To create this specialized dataset, I used the NousResearch/Nous-Hermes-2-Mixtral-8x7B-DPO model in conjunction with Hugging Face's Inference Endpoint. This approach allowed for the generation of high-quality, synthetic data that incorporates Chain of Thought (CoT) and advanced reasoning in its responses.
 
-      The resulting model combines the robust foundation of Llama-3.1-8B with tailored legal knowledge and enhanced reasoning capabilities. This makes it particularly well-suited for tasks requiring in-depth legal analysis, interpretation, and application of French legal concepts.
+    The resulting model combines the robust foundation of Llama-3.1-8B with tailored legal knowledge and enhanced reasoning capabilities. This makes it particularly well-suited for tasks requiring in-depth legal analysis, interpretation, and application of French legal concepts.
   overrides:
     parameters:
       model: calme-2.3-legalkit-8b.i1-Q4_K_M.gguf
@@ -4319,7 +4319,7 @@
   files:
     - filename: "Phi-3-medium-4k-instruct-Q4_K_M.gguf"
       uri: "huggingface://bartowski/Phi-3-medium-4k-instruct-GGUF/Phi-3-medium-4k-instruct-Q4_K_M.gguf"
-      sha256: 4e8d4258ed44562573c8984a045b0a4651c51e7e4d9d00a06c65cd2149ab4539
+      sha256: 6f05c97bc676dd1ec8d58e9a8795b4f5c809db771f6fc7bf48634c805face82c
 - !!merge <<: *phi-3
   name: "cream-phi-3-14b-v1"
   icon: https://cdn-uploads.huggingface.co/production/uploads/65f2fd1c25b848bd061b5c2e/AP4-OHepdqiqHj2KSi26M.gif

From 0c0bc18c94e8c000c7d2d47cb8bdac8b4c73e5cc Mon Sep 17 00:00:00 2001
From: Ettore Di Giacinto <mudler@localai.io>
Date: Sat, 10 Aug 2024 10:10:47 +0200
Subject: [PATCH 142/235] fix(diffusers): pin torch and torchvision (#1592)

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
---
 backend/python/diffusers/install.sh               | 5 -----
 backend/python/diffusers/requirements-hipblas.txt | 6 +++---
 2 files changed, 3 insertions(+), 8 deletions(-)

diff --git a/backend/python/diffusers/install.sh b/backend/python/diffusers/install.sh
index b0b46a86..36443ef1 100755
--- a/backend/python/diffusers/install.sh
+++ b/backend/python/diffusers/install.sh
@@ -11,9 +11,4 @@ if [ "x${BUILD_PROFILE}" == "xintel" ]; then
     EXTRA_PIP_INSTALL_FLAGS+=" --upgrade --index-strategy=unsafe-first-match"
 fi
 
-# hipblas builds from nightly that needs pre-releases to work
-if [ "x${BUILD_PROFILE}" == "xhipblas" ]; then
-    EXTRA_PIP_INSTALL_FLAGS+=" --prerelease=allow"
-fi
-
 installRequirements
diff --git a/backend/python/diffusers/requirements-hipblas.txt b/backend/python/diffusers/requirements-hipblas.txt
index 8c4c070c..fc9ea3b4 100644
--- a/backend/python/diffusers/requirements-hipblas.txt
+++ b/backend/python/diffusers/requirements-hipblas.txt
@@ -1,6 +1,6 @@
---extra-index-url https://download.pytorch.org/whl/nightly/rocm6.1/
-torch
-torchvision
+--extra-index-url https://download.pytorch.org/whl/rocm6.0
+torch==2.3.1+rocm6.0
+torchvision==0.18.1+rocm6.0
 diffusers
 opencv-python
 transformers

From 8627bc2dd4371f3a7629bf0b24a024b1c001d3f3 Mon Sep 17 00:00:00 2001
From: Ettore Di Giacinto <mudler@users.noreply.github.com>
Date: Sat, 10 Aug 2024 20:50:57 +0200
Subject: [PATCH 143/235] feat(explorer): relax token deletion with error
 threshold (#3211)

feat(explorer): relax token deletion with error threashold

Signed-off-by: Ettore Di Giacinto <mudler@users.noreply.github.com>
---
 core/cli/explorer.go       |  9 ++++----
 core/explorer/discovery.go | 43 ++++++++++++++++++++++++++++++--------
 2 files changed, 39 insertions(+), 13 deletions(-)

diff --git a/core/cli/explorer.go b/core/cli/explorer.go
index 0fcde728..f3e3618d 100644
--- a/core/cli/explorer.go
+++ b/core/cli/explorer.go
@@ -10,9 +10,10 @@ import (
 )
 
 type ExplorerCMD struct {
-	Address           string `env:"LOCALAI_ADDRESS,ADDRESS" default:":8080" help:"Bind address for the API server" group:"api"`
-	PoolDatabase      string `env:"LOCALAI_POOL_DATABASE,POOL_DATABASE" default:"explorer.json" help:"Path to the pool database" group:"api"`
-	ConnectionTimeout string `env:"LOCALAI_CONNECTION_TIMEOUT,CONNECTION_TIMEOUT" default:"2m" help:"Connection timeout for the explorer" group:"api"`
+	Address                  string `env:"LOCALAI_ADDRESS,ADDRESS" default:":8080" help:"Bind address for the API server" group:"api"`
+	PoolDatabase             string `env:"LOCALAI_POOL_DATABASE,POOL_DATABASE" default:"explorer.json" help:"Path to the pool database" group:"api"`
+	ConnectionTimeout        string `env:"LOCALAI_CONNECTION_TIMEOUT,CONNECTION_TIMEOUT" default:"2m" help:"Connection timeout for the explorer" group:"api"`
+	ConnectionErrorThreshold int    `env:"LOCALAI_CONNECTION_ERROR_THRESHOLD,CONNECTION_ERROR_THRESHOLD" default:"3" help:"Connection failure threshold for the explorer" group:"api"`
 }
 
 func (e *ExplorerCMD) Run(ctx *cliContext.Context) error {
@@ -26,7 +27,7 @@ func (e *ExplorerCMD) Run(ctx *cliContext.Context) error {
 	if err != nil {
 		return err
 	}
-	ds := explorer.NewDiscoveryServer(db, dur)
+	ds := explorer.NewDiscoveryServer(db, dur, e.ConnectionErrorThreshold)
 
 	go ds.Start(context.Background())
 	appHTTP := http.Explorer(db, ds)
diff --git a/core/explorer/discovery.go b/core/explorer/discovery.go
index 73281dc0..dc2b6e88 100644
--- a/core/explorer/discovery.go
+++ b/core/explorer/discovery.go
@@ -15,9 +15,11 @@ import (
 
 type DiscoveryServer struct {
 	sync.Mutex
-	database       *Database
-	networkState   *NetworkState
-	connectionTime time.Duration
+	database          *Database
+	networkState      *NetworkState
+	connectionTime    time.Duration
+	failures          map[string]int
+	errorThreshold int
 }
 
 type NetworkState struct {
@@ -32,16 +34,20 @@ func (s *DiscoveryServer) NetworkState() *NetworkState {
 
 // NewDiscoveryServer creates a new DiscoveryServer with the given Database.
 // it keeps the db state in sync with the network state
-func NewDiscoveryServer(db *Database, dur time.Duration) *DiscoveryServer {
+func NewDiscoveryServer(db *Database, dur time.Duration, failureThreshold int) *DiscoveryServer {
 	if dur == 0 {
 		dur = 50 * time.Second
 	}
+	if failureThreshold == 0 {
+		failureThreshold = 3
+	}
 	return &DiscoveryServer{
 		database:       db,
 		connectionTime: dur,
 		networkState: &NetworkState{
 			Networks: map[string]Network{},
 		},
+		errorThreshold: failureThreshold,
 	}
 }
 
@@ -66,21 +72,21 @@ func (s *DiscoveryServer) runBackground() {
 		n, err := p2p.NewNode(token)
 		if err != nil {
 			log.Err(err).Msg("Failed to create node")
-			s.database.Delete(token)
+			s.failedToken(token)
 			continue
 		}
 
 		err = n.Start(c)
 		if err != nil {
 			log.Err(err).Msg("Failed to start node")
-			s.database.Delete(token)
+			s.failedToken(token)
 			continue
 		}
 
 		ledger, err := n.Ledger()
 		if err != nil {
 			log.Err(err).Msg("Failed to start ledger")
-			s.database.Delete(token)
+			s.failedToken(token)
 			continue
 		}
 
@@ -114,8 +120,27 @@ func (s *DiscoveryServer) runBackground() {
 			}
 			s.Unlock()
 		} else {
-			log.Info().Any("network", token).Msg("No workers found in the network. Removing it from the database")
-			s.database.Delete(token)
+			s.failedToken(token)
+		}
+	}
+
+	s.deleteFailedConnections()
+}
+
+func (s *DiscoveryServer) failedToken(token string) {
+	s.Lock()
+	defer s.Unlock()
+	s.failures[token]++
+}
+
+func (s *DiscoveryServer) deleteFailedConnections() {
+	s.Lock()
+	defer s.Unlock()
+	for k, v := range s.failures {
+		if v > s.errorThreshold {
+			log.Info().Any("network", k).Msg("Network has been removed from the database")
+			s.database.Delete(k)
+			delete(s.failures, k)
 		}
 	}
 }

From f3357a17b8012049c8dd26ea6bec8096ef1cbe73 Mon Sep 17 00:00:00 2001
From: "LocalAI [bot]" <139863280+localai-bot@users.noreply.github.com>
Date: Sun, 11 Aug 2024 00:16:51 +0200
Subject: [PATCH 144/235] chore: :arrow_up: Update ggerganov/llama.cpp to
 `6e02327e8b7837358e0406bf90a4632e18e27846` (#3212)

:arrow_up: Update ggerganov/llama.cpp

Signed-off-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com>
Co-authored-by: mudler <2420543+mudler@users.noreply.github.com>
---
 Makefile | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/Makefile b/Makefile
index 9d09b917..ef38a460 100644
--- a/Makefile
+++ b/Makefile
@@ -8,7 +8,7 @@ DETECT_LIBS?=true
 # llama.cpp versions
 GOLLAMA_REPO?=https://github.com/go-skynet/go-llama.cpp
 GOLLAMA_VERSION?=2b57a8ae43e4699d3dc5d1496a1ccd42922993be
-CPPLLAMA_VERSION?=b72942fac998672a79a1ae3c03b340f7e629980b
+CPPLLAMA_VERSION?=6e02327e8b7837358e0406bf90a4632e18e27846
 
 # go-rwkv version
 RWKV_REPO?=https://github.com/donomii/go-rwkv.cpp

From 7ba4a78fcc87db6cd5a029ee3f8e11b516d4e144 Mon Sep 17 00:00:00 2001
From: Ettore Di Giacinto <mudler@users.noreply.github.com>
Date: Sun, 11 Aug 2024 00:59:58 +0200
Subject: [PATCH 145/235] fix(explorer): reset counter when network is active
 (#3213)

Signed-off-by: Ettore Di Giacinto <mudler@users.noreply.github.com>
---
 core/explorer/discovery.go | 1 +
 1 file changed, 1 insertion(+)

diff --git a/core/explorer/discovery.go b/core/explorer/discovery.go
index dc2b6e88..5de4162f 100644
--- a/core/explorer/discovery.go
+++ b/core/explorer/discovery.go
@@ -118,6 +118,7 @@ func (s *DiscoveryServer) runBackground() {
 			s.networkState.Networks[token] = Network{
 				Clusters: ledgerK,
 			}
+			delete(s.failures, token)
 			s.Unlock()
 		} else {
 			s.failedToken(token)

From 74eaf024847b99a3edbc1fb90edb9d9234c4b3b8 Mon Sep 17 00:00:00 2001
From: Ettore Di Giacinto <mudler@users.noreply.github.com>
Date: Sun, 11 Aug 2024 01:31:53 +0200
Subject: [PATCH 146/235] feat(diffusers): support flux models (#3129)

* feat(diffusers): support flux models

This adds support for FLUX models. For instance:
https://huggingface.co/black-forest-labs/FLUX.1-dev

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>

* feat(diffusers): support FluxTransformer2DModel

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>

* Small fixups

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>

---------

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
---
 backend/python/diffusers/backend.py           | 39 +++++++++++++++++--
 backend/python/diffusers/requirements-cpu.txt |  3 +-
 .../diffusers/requirements-cublas11.txt       |  3 +-
 .../diffusers/requirements-cublas12.txt       |  3 +-
 .../python/diffusers/requirements-hipblas.txt |  1 +
 .../python/diffusers/requirements-intel.txt   |  3 +-
 6 files changed, 45 insertions(+), 7 deletions(-)

diff --git a/backend/python/diffusers/backend.py b/backend/python/diffusers/backend.py
index a348d290..8f420848 100755
--- a/backend/python/diffusers/backend.py
+++ b/backend/python/diffusers/backend.py
@@ -18,13 +18,13 @@ import backend_pb2_grpc
 import grpc
 
 from diffusers import StableDiffusion3Pipeline, StableDiffusionXLPipeline, StableDiffusionDepth2ImgPipeline, DPMSolverMultistepScheduler, StableDiffusionPipeline, DiffusionPipeline, \
-    EulerAncestralDiscreteScheduler
+    EulerAncestralDiscreteScheduler, FluxPipeline, FluxTransformer2DModel
 from diffusers import StableDiffusionImg2ImgPipeline, AutoPipelineForText2Image, ControlNetModel, StableVideoDiffusionPipeline
 from diffusers.pipelines.stable_diffusion import safety_checker
 from diffusers.utils import load_image, export_to_video
 from compel import Compel, ReturnedEmbeddingsType
-
-from transformers import CLIPTextModel
+from optimum.quanto import freeze, qfloat8, quantize
+from transformers import CLIPTextModel, T5EncoderModel
 from safetensors.torch import load_file
 
 _ONE_DAY_IN_SECONDS = 60 * 60 * 24
@@ -163,6 +163,8 @@ class BackendServicer(backend_pb2_grpc.BackendServicer):
             modelFile = request.Model
 
             self.cfg_scale = 7
+            self.PipelineType = request.PipelineType
+
             if request.CFGScale != 0:
                 self.cfg_scale = request.CFGScale
 
@@ -244,6 +246,30 @@ class BackendServicer(backend_pb2_grpc.BackendServicer):
                         torch_dtype=torchType,
                         use_safetensors=True,
                         variant=variant)
+            elif request.PipelineType == "FluxPipeline":
+                    self.pipe = FluxPipeline.from_pretrained(
+                        request.Model,
+                        torch_dtype=torch.bfloat16)
+                    if request.LowVRAM:
+                        self.pipe.enable_model_cpu_offload()
+            elif request.PipelineType == "FluxTransformer2DModel":
+                    dtype = torch.bfloat16
+                    # specify from environment or default to "ChuckMcSneed/FLUX.1-dev"
+                    bfl_repo = os.environ.get("BFL_REPO", "ChuckMcSneed/FLUX.1-dev")
+
+                    transformer = FluxTransformer2DModel.from_single_file(modelFile, torch_dtype=dtype)
+                    quantize(transformer, weights=qfloat8)
+                    freeze(transformer)
+                    text_encoder_2 = T5EncoderModel.from_pretrained(bfl_repo, subfolder="text_encoder_2", torch_dtype=dtype)
+                    quantize(text_encoder_2, weights=qfloat8)
+                    freeze(text_encoder_2)
+
+                    self.pipe = FluxPipeline.from_pretrained(bfl_repo, transformer=None, text_encoder_2=None, torch_dtype=dtype)
+                    self.pipe.transformer = transformer
+                    self.pipe.text_encoder_2 = text_encoder_2
+
+                    if request.LowVRAM:
+                        self.pipe.enable_model_cpu_offload()
 
             if CLIPSKIP and request.CLIPSkip != 0:
                 self.clip_skip = request.CLIPSkip
@@ -399,6 +425,13 @@ class BackendServicer(backend_pb2_grpc.BackendServicer):
                 request.seed
             )
 
+        if self.PipelineType == "FluxPipeline":
+            kwargs["max_sequence_length"] = 256
+
+        if self.PipelineType == "FluxTransformer2DModel":
+            kwargs["output_type"] = "pil"
+            kwargs["generator"] = torch.Generator("cpu").manual_seed(0)
+
         if self.img2vid:
             # Load the conditioning image
             image = load_image(request.src)
diff --git a/backend/python/diffusers/requirements-cpu.txt b/backend/python/diffusers/requirements-cpu.txt
index e46a53e5..235bb57e 100644
--- a/backend/python/diffusers/requirements-cpu.txt
+++ b/backend/python/diffusers/requirements-cpu.txt
@@ -5,4 +5,5 @@ accelerate
 compel
 peft
 sentencepiece
-torch
\ No newline at end of file
+torch
+optimum-quanto
\ No newline at end of file
diff --git a/backend/python/diffusers/requirements-cublas11.txt b/backend/python/diffusers/requirements-cublas11.txt
index df28b821..40e718cb 100644
--- a/backend/python/diffusers/requirements-cublas11.txt
+++ b/backend/python/diffusers/requirements-cublas11.txt
@@ -6,4 +6,5 @@ transformers
 accelerate
 compel
 peft
-sentencepiece
\ No newline at end of file
+sentencepiece
+optimum-quanto
\ No newline at end of file
diff --git a/backend/python/diffusers/requirements-cublas12.txt b/backend/python/diffusers/requirements-cublas12.txt
index b0685a62..3bcc5397 100644
--- a/backend/python/diffusers/requirements-cublas12.txt
+++ b/backend/python/diffusers/requirements-cublas12.txt
@@ -5,4 +5,5 @@ transformers
 accelerate
 compel
 peft
-sentencepiece
\ No newline at end of file
+sentencepiece
+optimum-quanto
\ No newline at end of file
diff --git a/backend/python/diffusers/requirements-hipblas.txt b/backend/python/diffusers/requirements-hipblas.txt
index fc9ea3b4..17cf7249 100644
--- a/backend/python/diffusers/requirements-hipblas.txt
+++ b/backend/python/diffusers/requirements-hipblas.txt
@@ -8,3 +8,4 @@ accelerate
 compel
 peft
 sentencepiece
+optimum-quanto
\ No newline at end of file
diff --git a/backend/python/diffusers/requirements-intel.txt b/backend/python/diffusers/requirements-intel.txt
index 77f9e674..1cc2e2a2 100644
--- a/backend/python/diffusers/requirements-intel.txt
+++ b/backend/python/diffusers/requirements-intel.txt
@@ -10,4 +10,5 @@ transformers
 accelerate
 compel
 peft
-sentencepiece
\ No newline at end of file
+sentencepiece
+optimum-quanto
\ No newline at end of file

From 9f61ac8accf41fde4c52c27e02398a6563e99bdb Mon Sep 17 00:00:00 2001
From: Ettore Di Giacinto <mudler@users.noreply.github.com>
Date: Sun, 11 Aug 2024 10:19:02 +0200
Subject: [PATCH 147/235] models(gallery): add flux.1-dev and flux.1-schnell
 (#3215)

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
---
 gallery/flux.yaml  | 14 ++++++++++++++
 gallery/index.yaml | 38 ++++++++++++++++++++++++++++++++++++++
 2 files changed, 52 insertions(+)
 create mode 100644 gallery/flux.yaml

diff --git a/gallery/flux.yaml b/gallery/flux.yaml
new file mode 100644
index 00000000..bb75b53b
--- /dev/null
+++ b/gallery/flux.yaml
@@ -0,0 +1,14 @@
+---
+name: "flux"
+
+config_file: |
+  backend: diffusers
+  f16: true
+  low_vram: true
+  step: 25
+
+  diffusers:
+    cuda: true
+    enable_parameters: num_inference_steps
+    pipeline_type: FluxPipeline
+    cfg_scale: 0
diff --git a/gallery/index.yaml b/gallery/index.yaml
index 8daa39c6..cca968bf 100644
--- a/gallery/index.yaml
+++ b/gallery/index.yaml
@@ -4942,6 +4942,44 @@
     - sd-3
     - gpu
   url: "github:mudler/LocalAI/gallery/stablediffusion3.yaml@master"
+- &flux
+  name: flux.1-dev
+  license: flux-1-dev-non-commercial-license
+  description: |
+    FLUX.1 [dev] is a 12 billion parameter rectified flow transformer capable of generating images from text descriptions. For more information, please read our blog post.
+    Key Features
+        Cutting-edge output quality, second only to our state-of-the-art model FLUX.1 [pro].
+        Competitive prompt following, matching the performance of closed source alternatives .
+        Trained using guidance distillation, making FLUX.1 [dev] more efficient.
+        Open weights to drive new scientific research, and empower artists to develop innovative workflows.
+        Generated outputs can be used for personal, scientific, and commercial purposes as described in the flux-1-dev-non-commercial-license.
+  urls:
+    - https://huggingface.co/black-forest-labs/FLUX.1-dev
+  tags:
+    - text-to-image
+    - flux
+    - python
+    - gpu
+  url: "github:mudler/LocalAI/gallery/flux.yaml@master"
+  overrides:
+    parameters:
+      model: ChuckMcSneed/FLUX.1-dev
+- !!merge <<: *flux
+  name: flux.1-schnell
+  license: apache-2
+  icon: https://huggingface.co/black-forest-labs/FLUX.1-schnell/resolve/main/schnell_grid.jpeg
+  description: |
+    FLUX.1 [schnell] is a 12 billion parameter rectified flow transformer capable of generating images from text descriptions. For more information, please read our blog post.
+    Key Features
+
+        Cutting-edge output quality and competitive prompt following, matching the performance of closed source alternatives.
+        Trained using latent adversarial diffusion distillation, FLUX.1 [schnell] can generate high-quality images in only 1 to 4 steps.
+        Released under the apache-2.0 licence, the model can be used for personal, scientific, and commercial purposes.
+  urls:
+    - https://huggingface.co/black-forest-labs/FLUX.1-schnell
+  overrides:
+    parameters:
+      model: black-forest-labs/FLUX.1-schnell
 - &whisper
   ## Whisper
   url: "github:mudler/LocalAI/gallery/whisper-base.yaml@master"

From c4534cd90800463b83d3231be184e0f06c3bdcb6 Mon Sep 17 00:00:00 2001
From: Ettore Di Giacinto <mudler@users.noreply.github.com>
Date: Sun, 11 Aug 2024 10:46:17 +0200
Subject: [PATCH 148/235] chore(deps): update edgevpn (#3214)

* chore(deps): update edgevpn

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>

* fix: initialize failure map

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>

---------

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
---
 core/explorer/discovery.go |  9 +++++----
 go.mod                     | 35 +++++++++++++++++++-------------
 go.sum                     | 41 ++++++++++++++++++++++++++++++++++++++
 3 files changed, 67 insertions(+), 18 deletions(-)

diff --git a/core/explorer/discovery.go b/core/explorer/discovery.go
index 5de4162f..6a29442f 100644
--- a/core/explorer/discovery.go
+++ b/core/explorer/discovery.go
@@ -15,10 +15,10 @@ import (
 
 type DiscoveryServer struct {
 	sync.Mutex
-	database          *Database
-	networkState      *NetworkState
-	connectionTime    time.Duration
-	failures          map[string]int
+	database       *Database
+	networkState   *NetworkState
+	connectionTime time.Duration
+	failures       map[string]int
 	errorThreshold int
 }
 
@@ -48,6 +48,7 @@ func NewDiscoveryServer(db *Database, dur time.Duration, failureThreshold int) *
 			Networks: map[string]Network{},
 		},
 		errorThreshold: failureThreshold,
+		failures:       make(map[string]int),
 	}
 }
 
diff --git a/go.mod b/go.mod
index fad40e01..b35db1b1 100644
--- a/go.mod
+++ b/go.mod
@@ -29,15 +29,15 @@ require (
 	github.com/jaypipes/ghw v0.12.0
 	github.com/joho/godotenv v1.5.1
 	github.com/klauspost/cpuid/v2 v2.2.8
-	github.com/libp2p/go-libp2p v0.35.2
+	github.com/libp2p/go-libp2p v0.35.4
 	github.com/mholt/archiver/v3 v3.5.1
 	github.com/microcosm-cc/bluemonday v1.0.26
-	github.com/mudler/edgevpn v0.26.2
+	github.com/mudler/edgevpn v0.27.0
 	github.com/mudler/go-processmanager v0.0.0-20230818213616-f204007f963c
 	github.com/mudler/go-stable-diffusion v0.0.0-20240429204715-4a3cd6aeae6f
 	github.com/nomic-ai/gpt4all/gpt4all-bindings/golang v0.0.0-20240606155928-41c9013fa46a
-	github.com/onsi/ginkgo/v2 v2.19.0
-	github.com/onsi/gomega v1.33.1
+	github.com/onsi/ginkgo/v2 v2.20.0
+	github.com/onsi/gomega v1.34.1
 	github.com/ory/dockertest/v3 v3.10.0
 	github.com/otiai10/openaigo v1.7.0
 	github.com/phayes/freeport v0.0.0-20220201140144-74d24b5ae9f5
@@ -64,8 +64,11 @@ require (
 )
 
 require (
+	github.com/cpuguy83/go-md2man/v2 v2.0.4 // indirect
 	github.com/go-task/slim-sprig/v3 v3.0.0 // indirect
 	github.com/go-viper/mapstructure/v2 v2.0.0 // indirect
+	github.com/labstack/echo/v4 v4.12.0 // indirect
+	github.com/labstack/gommon v0.4.2 // indirect
 	github.com/moby/docker-image-spec v1.3.1 // indirect
 	github.com/munnerz/goautoneg v0.0.0-20191010083416-a7dc8b61c822 // indirect
 	github.com/pion/datachannel v1.5.6 // indirect
@@ -84,6 +87,10 @@ require (
 	github.com/pion/transport/v2 v2.2.5 // indirect
 	github.com/pion/turn/v2 v2.1.6 // indirect
 	github.com/pion/webrtc/v3 v3.2.40 // indirect
+	github.com/russross/blackfriday/v2 v2.1.0 // indirect
+	github.com/urfave/cli/v2 v2.27.3 // indirect
+	github.com/valyala/fasttemplate v1.2.2 // indirect
+	github.com/xrash/smetrics v0.0.0-20240521201337-686a1a2994c1 // indirect
 	go.uber.org/mock v0.4.0 // indirect
 )
 
@@ -146,7 +153,7 @@ require (
 	github.com/google/btree v1.1.2 // indirect
 	github.com/google/go-cmp v0.6.0 // indirect
 	github.com/google/gopacket v1.1.19 // indirect
-	github.com/google/pprof v0.0.0-20240424215950-a892ee059fd6 // indirect
+	github.com/google/pprof v0.0.0-20240727154555-813a5fbdbec8 // indirect
 	github.com/google/shlex v0.0.0-20191202100458-e7afc7fbc510 // indirect
 	github.com/gorilla/css v1.0.1 // indirect
 	github.com/gorilla/websocket v1.5.3 // indirect
@@ -274,15 +281,15 @@ require (
 	go.uber.org/fx v1.22.1 // indirect
 	go.uber.org/multierr v1.11.0 // indirect
 	go.uber.org/zap v1.27.0 // indirect
-	golang.org/x/crypto v0.24.0 // indirect
-	golang.org/x/exp v0.0.0-20240506185415-9bf2ced13842 // indirect
-	golang.org/x/mod v0.18.0 // indirect
-	golang.org/x/net v0.26.0 // indirect
-	golang.org/x/sync v0.7.0 // indirect
-	golang.org/x/sys v0.22.0 // indirect
-	golang.org/x/term v0.21.0 // indirect
-	golang.org/x/text v0.16.0 // indirect
-	golang.org/x/tools v0.22.0 // indirect
+	golang.org/x/crypto v0.26.0 // indirect
+	golang.org/x/exp v0.0.0-20240719175910-8a7402abbf56 // indirect
+	golang.org/x/mod v0.20.0 // indirect
+	golang.org/x/net v0.28.0 // indirect
+	golang.org/x/sync v0.8.0 // indirect
+	golang.org/x/sys v0.24.0 // indirect
+	golang.org/x/term v0.23.0 // indirect
+	golang.org/x/text v0.17.0 // indirect
+	golang.org/x/tools v0.24.0 // indirect
 	golang.zx2c4.com/wintun v0.0.0-20211104114900-415007cec224 // indirect
 	golang.zx2c4.com/wireguard v0.0.0-20220703234212-c31a7b1ab478 // indirect
 	golang.zx2c4.com/wireguard/windows v0.5.3 // indirect
diff --git a/go.sum b/go.sum
index 84dd09e6..5c035169 100644
--- a/go.sum
+++ b/go.sum
@@ -90,6 +90,8 @@ github.com/coreos/go-systemd/v22 v22.5.0/go.mod h1:Y58oyj3AT4RCenI/lSvhwexgC+NSV
 github.com/cpuguy83/go-md2man/v2 v2.0.0-20190314233015-f79a8a8ca69d/go.mod h1:maD7wRr/U5Z6m/iR4s+kqSMx2CaBsrgA7czyZG/E6dU=
 github.com/cpuguy83/go-md2man/v2 v2.0.0/go.mod h1:maD7wRr/U5Z6m/iR4s+kqSMx2CaBsrgA7czyZG/E6dU=
 github.com/cpuguy83/go-md2man/v2 v2.0.2/go.mod h1:tgQtvFlXSQOSOSIRvRPT7W67SCa46tRHOmNcaadrF8o=
+github.com/cpuguy83/go-md2man/v2 v2.0.4 h1:wfIWP927BUkWJb2NmU/kNDYIBTh/ziUX91+lVfRxZq4=
+github.com/cpuguy83/go-md2man/v2 v2.0.4/go.mod h1:tgQtvFlXSQOSOSIRvRPT7W67SCa46tRHOmNcaadrF8o=
 github.com/creachadair/mds v0.7.0 h1:7QoYqiPl18C0h7CLq9z9/qUH5Vr62V9677yJZHGLoQM=
 github.com/creachadair/mds v0.7.0/go.mod h1:4vrFYUzTXMJpMBU+OA292I6IUxKWCCfZkgXg+/kBZMo=
 github.com/creachadair/otp v0.4.2 h1:ngNMaD6Tzd7UUNRFyed7ykZFn/Wr5sSs5ffqZWm9pu8=
@@ -254,6 +256,8 @@ github.com/google/martian v2.1.0+incompatible/go.mod h1:9I4somxYTbIHy5NJKHRl3wXi
 github.com/google/pprof v0.0.0-20181206194817-3ea8567a2e57/go.mod h1:zfwlbNMJ+OItoe0UupaVj+oy1omPYYDuagoSzA8v9mc=
 github.com/google/pprof v0.0.0-20240424215950-a892ee059fd6 h1:k7nVchz72niMH6YLQNvHSdIE7iqsQxK1P41mySCvssg=
 github.com/google/pprof v0.0.0-20240424215950-a892ee059fd6/go.mod h1:kf6iHlnVGwgKolg33glAes7Yg/8iWP8ukqeldJSO7jw=
+github.com/google/pprof v0.0.0-20240727154555-813a5fbdbec8 h1:FKHo8hFI3A+7w0aUQuYXQ+6EN5stWmeY/AZqtM8xk9k=
+github.com/google/pprof v0.0.0-20240727154555-813a5fbdbec8/go.mod h1:K1liHPHnj73Fdn/EKuT8nrFqBihUSKXoLYU0BuatOYo=
 github.com/google/renameio v0.1.0/go.mod h1:KWCgfxg9yswjAJkECMjeO8J8rahYeXnNhOm40UhjYkI=
 github.com/google/shlex v0.0.0-20191202100458-e7afc7fbc510 h1:El6M4kTTCOh6aBiKaUGG7oYTSPP8MxqL4YI3kZKwcP4=
 github.com/google/shlex v0.0.0-20191202100458-e7afc7fbc510/go.mod h1:pupxD2MaaD3pAXIBCelhxNneeOaAeabZDe5s4K6zSpQ=
@@ -357,6 +361,10 @@ github.com/kr/pty v1.1.3/go.mod h1:pFQYn66WHrOpPYNljwOMqo10TkYh1fy3cYio2l3bCsQ=
 github.com/kr/text v0.1.0/go.mod h1:4Jbv+DJW3UT/LiOwJeYQe1efqtUx/iVham/4vfdArNI=
 github.com/kr/text v0.2.0 h1:5Nx0Ya0ZqY2ygV366QzturHI13Jq95ApcVaJBhpS+AY=
 github.com/kr/text v0.2.0/go.mod h1:eLer722TekiGuMkidMxC/pM04lWEeraHUUmBw8l2grE=
+github.com/labstack/echo/v4 v4.12.0 h1:IKpw49IMryVB2p1a4dzwlhP1O2Tf2E0Ir/450lH+kI0=
+github.com/labstack/echo/v4 v4.12.0/go.mod h1:UP9Cr2DJXbOK3Kr9ONYzNowSh7HP0aG0ShAyycHSJvM=
+github.com/labstack/gommon v0.4.2 h1:F8qTUNXgG1+6WQmqoUWnz8WiEU60mXVVw0P4ht1WRA0=
+github.com/labstack/gommon v0.4.2/go.mod h1:QlUFxVM+SNXhDL/Z7YhocGIBYOiwB0mXm1+1bAPHPyU=
 github.com/lib/pq v0.0.0-20180327071824-d34b9ff171c2 h1:hRGSmZu7j271trc9sneMrpOW7GN5ngLm8YUZIPzf394=
 github.com/lib/pq v0.0.0-20180327071824-d34b9ff171c2/go.mod h1:5WUZQaWbwv1U+lTReE5YruASi9Al49XbQIvNi/34Woo=
 github.com/libp2p/go-buffer-pool v0.1.0 h1:oK4mSFcQz7cTQIfqbe4MIj9gLW+mnanjyFtc6cdF0Y8=
@@ -367,6 +375,8 @@ github.com/libp2p/go-flow-metrics v0.1.0 h1:0iPhMI8PskQwzh57jB9WxIuIOQ0r+15PChFG
 github.com/libp2p/go-flow-metrics v0.1.0/go.mod h1:4Xi8MX8wj5aWNDAZttg6UPmc0ZrnFNsMtpsYUClFtro=
 github.com/libp2p/go-libp2p v0.35.2 h1:287oHbuplkrLdAF+syB0n/qDgd50AUBtEODqS0e0HDs=
 github.com/libp2p/go-libp2p v0.35.2/go.mod h1:RKCDNt30IkFipGL0tl8wQW/3zVWEGFUZo8g2gAKxwjU=
+github.com/libp2p/go-libp2p v0.35.4 h1:FDiBUYLkueFwsuNJUZaxKRdpKvBOWU64qQPL768bSeg=
+github.com/libp2p/go-libp2p v0.35.4/go.mod h1:RKCDNt30IkFipGL0tl8wQW/3zVWEGFUZo8g2gAKxwjU=
 github.com/libp2p/go-libp2p-asn-util v0.4.1 h1:xqL7++IKD9TBFMgnLPZR6/6iYhawHKHl950SO9L6n94=
 github.com/libp2p/go-libp2p-asn-util v0.4.1/go.mod h1:d/NI6XZ9qxw67b4e+NgpQexCIiFYJjErASrYW4PFDN8=
 github.com/libp2p/go-libp2p-kad-dht v0.25.2 h1:FOIk9gHoe4YRWXTu8SY9Z1d0RILol0TrtApsMDPjAVQ=
@@ -459,6 +469,8 @@ github.com/mr-tron/base58 v1.2.0 h1:T/HDJBh4ZCPbU39/+c3rRvE0uKBQlU27+QI8LJ4t64o=
 github.com/mr-tron/base58 v1.2.0/go.mod h1:BinMc/sQntlIE1frQmRFPUoPA1Zkr8VRgBdjWI2mNwc=
 github.com/mudler/edgevpn v0.26.2 h1:OK4jfk7sYjuU7vCh+geUJk38lsxRgMk+EdsS9s0hioE=
 github.com/mudler/edgevpn v0.26.2/go.mod h1:lplntB9N6LzGNqeSM3XHCq8kyDPsNhY3jqEbWGD2WaQ=
+github.com/mudler/edgevpn v0.27.0 h1:FnBVzPs098DTgbUkiwm22n30hmEVBAq+PVpXanqx6qo=
+github.com/mudler/edgevpn v0.27.0/go.mod h1:Hwvr+i+dePgn/Yh+EMMvqcw9ByUCLAWD9TgYtJYV95Y=
 github.com/mudler/go-piper v0.0.0-20240315144837-9d0100873a7d h1:8udOFrDf/I83JL0/u22j6U6Q9z9LoSdby2a/DWdd0/s=
 github.com/mudler/go-piper v0.0.0-20240315144837-9d0100873a7d/go.mod h1:O7SwdSWMilAWhBZMK9N9Y/oBDyMMzshE3ju8Xkexwig=
 github.com/mudler/go-processmanager v0.0.0-20230818213616-f204007f963c h1:CI5uGwqBpN8N7BrSKC+nmdfw+9nPQIDyjHHlaIiitZI=
@@ -516,11 +528,14 @@ github.com/onsi/ginkgo v1.16.5 h1:8xi0RTUf59SOSfEtZMvwTvXYMzG4gV23XVHOZiXNtnE=
 github.com/onsi/ginkgo v1.16.5/go.mod h1:+E8gABHa3K6zRBolWtd+ROzc/U5bkGt0FwiG042wbpU=
 github.com/onsi/ginkgo/v2 v2.19.0 h1:9Cnnf7UHo57Hy3k6/m5k3dRfGTMXGvxhHFvkDTCTpvA=
 github.com/onsi/ginkgo/v2 v2.19.0/go.mod h1:rlwLi9PilAFJ8jCg9UE1QP6VBpd6/xj3SRC0d6TU0To=
+github.com/onsi/ginkgo/v2 v2.20.0 h1:PE84V2mHqoT1sglvHc8ZdQtPcwmvvt29WLEEO3xmdZw=
+github.com/onsi/ginkgo/v2 v2.20.0/go.mod h1:lG9ey2Z29hR41WMVthyJBGUBcBhGOtoPF2VFMvBXFCI=
 github.com/onsi/gomega v1.7.1/go.mod h1:XdKZgCCFLUoM/7CFJVPcG8C1xQ1AJ0vpAezJrB7JYyY=
 github.com/onsi/gomega v1.10.1/go.mod h1:iN09h71vgCQne3DLsj+A5owkum+a2tYe+TOCB1ybHNo=
 github.com/onsi/gomega v1.16.0/go.mod h1:HnhC7FXeEQY45zxNK3PPoIUhzk/80Xly9PcubAlGdZY=
 github.com/onsi/gomega v1.33.1 h1:dsYjIxxSR755MDmKVsaFQTE22ChNBcuuTWgkUDSubOk=
 github.com/onsi/gomega v1.33.1/go.mod h1:U4R44UsT+9eLIaYRB2a5qajjtQYn0hauxvRm16AVYg0=
+github.com/onsi/gomega v1.34.1/go.mod h1:kU1QgUvBDLXBJq618Xvm2LUX6rSAfRaFRTcdOeDLwwY=
 github.com/opencontainers/go-digest v1.0.0 h1:apOUWs51W5PlhuyGyz9FCeeBIOUDA/6nW8Oi/yOhh5U=
 github.com/opencontainers/go-digest v1.0.0/go.mod h1:0JzlMkj0TRzQZfJkVvzbP0HBR3IKzErnv2BNG4W4MAM=
 github.com/opencontainers/image-spec v1.1.0 h1:8SG7/vwALn54lVB/0yZ/MMwhFrPYtpEHQb2IpWsCzug=
@@ -639,6 +654,7 @@ github.com/russross/blackfriday v1.5.2/go.mod h1:JO/DiYxRf+HjHt06OyowR9PTA263kcR
 github.com/russross/blackfriday v1.6.0 h1:KqfZb0pUVN2lYqZUYRddxF4OR8ZMURnJIG5Y3VRLtww=
 github.com/russross/blackfriday v1.6.0/go.mod h1:ti0ldHuxg49ri4ksnFxlkCfN+hvslNlmVHqNRXXJNAY=
 github.com/russross/blackfriday/v2 v2.0.1/go.mod h1:+Rmxgy9KzJVeS9/2gXHxylqXiyQDYRxCVz55jmeOWTM=
+github.com/russross/blackfriday/v2 v2.1.0 h1:JIOH55/0cWyOuilr9/qlrm0BSXldqnqwMsf35Ld67mk=
 github.com/russross/blackfriday/v2 v2.1.0/go.mod h1:+Rmxgy9KzJVeS9/2gXHxylqXiyQDYRxCVz55jmeOWTM=
 github.com/sashabaranov/go-openai v1.26.2 h1:cVlQa3gn3eYqNXRW03pPlpy6zLG52EU4g0FrWXc0EFI=
 github.com/sashabaranov/go-openai v1.26.2/go.mod h1:lj5b/K+zjTSFxVLijLSTDZuP7adOgerWeFyZLUhAKRg=
@@ -736,11 +752,16 @@ github.com/ulikunitz/xz v0.5.9 h1:RsKRIA2MO8x56wkkcd3LbtcE/uMszhb6DpRf+3uwa3I=
 github.com/ulikunitz/xz v0.5.9/go.mod h1:nbz6k7qbPmH4IRqmfOplQw/tblSgqTqBwxkY0oWt/14=
 github.com/urfave/cli v1.22.2/go.mod h1:Gos4lmkARVdJ6EkW0WaNv/tZAAMe9V7XWyB60NtXRu0=
 github.com/urfave/cli v1.22.10/go.mod h1:Gos4lmkARVdJ6EkW0WaNv/tZAAMe9V7XWyB60NtXRu0=
+github.com/urfave/cli v1.22.12 h1:igJgVw1JdKH+trcLWLeLwZjU9fEfPesQ+9/e4MQ44S8=
 github.com/urfave/cli v1.22.12/go.mod h1:sSBEIC79qR6OvcmsD4U3KABeOTxDqQtdDnaFuUN30b8=
+github.com/urfave/cli/v2 v2.27.3 h1:/POWahRmdh7uztQ3CYnaDddk0Rm90PyOgIxgW2rr41M=
+github.com/urfave/cli/v2 v2.27.3/go.mod h1:m4QzxcD2qpra4z7WhzEGn74WZLViBnMpb1ToCAKdGRQ=
 github.com/valyala/bytebufferpool v1.0.0 h1:GqA5TC/0021Y/b9FG4Oi9Mr3q7XYx6KllzawFIhcdPw=
 github.com/valyala/bytebufferpool v1.0.0/go.mod h1:6bBcMArwyJ5K/AmCkWv1jt77kVWyCJ6HpOuEn7z0Csc=
 github.com/valyala/fasthttp v1.55.0 h1:Zkefzgt6a7+bVKHnu/YaYSOPfNYNisSVBo/unVCf8k8=
 github.com/valyala/fasthttp v1.55.0/go.mod h1:NkY9JtkrpPKmgwV3HTaS2HWaJss9RSIsRVfcxxoHiOM=
+github.com/valyala/fasttemplate v1.2.2 h1:lxLXG0uE3Qnshl9QyaK6XJxMXlQZELvChBOCmQD0Loo=
+github.com/valyala/fasttemplate v1.2.2/go.mod h1:KHLXt3tVN2HBp8eijSv/kGJopbvo7S+qRAEEKiv+SiQ=
 github.com/valyala/tcplisten v1.0.0 h1:rBHj/Xf+E1tRGZyWIWwJDiRY0zc1Js+CV5DqwacVSA8=
 github.com/valyala/tcplisten v1.0.0/go.mod h1:T0xQ8SeCZGxckz9qRXTfG43PvQ/mcWh7FwZEA7Ioqkc=
 github.com/vbatts/tar-split v0.11.3 h1:hLFqsOLQ1SsppQNTMpkpPXClLDfC2A3Zgy9OUU+RVck=
@@ -765,6 +786,8 @@ github.com/xeipuuv/gojsonschema v1.2.0 h1:LhYJRs+L4fBtjZUfuSZIKGeVu0QRy8e5Xi7D17
 github.com/xeipuuv/gojsonschema v1.2.0/go.mod h1:anYRn/JVcOK2ZgGU+IjEV4nwlhoK5sQluxsYJ78Id3Y=
 github.com/xi2/xz v0.0.0-20171230120015-48954b6210f8 h1:nIPpBwaJSVYIxUFsDv3M8ofmx9yWTog9BfvIu0q41lo=
 github.com/xi2/xz v0.0.0-20171230120015-48954b6210f8/go.mod h1:HUYIGzjTL3rfEspMxjDjgmT5uz5wzYJKVo23qUhYTos=
+github.com/xrash/smetrics v0.0.0-20240521201337-686a1a2994c1 h1:gEOO8jv9F4OT7lGCjxCBTO/36wtF6j2nSip77qHd4x4=
+github.com/xrash/smetrics v0.0.0-20240521201337-686a1a2994c1/go.mod h1:Ohn+xnUBiLI6FVj/9LpzZWtj1/D6lUovWYBkxHVV3aM=
 github.com/yuin/goldmark v1.1.27/go.mod h1:3hX8gzYuyVAZsxl0MRgGTJEmQBFcNTphYh9decYSb74=
 github.com/yuin/goldmark v1.2.1/go.mod h1:3hX8gzYuyVAZsxl0MRgGTJEmQBFcNTphYh9decYSb74=
 github.com/yuin/goldmark v1.3.5/go.mod h1:mwnBkeHKe2W/ZEtQ+71ViKU8L12m81fl3OWwC1Zlc8k=
@@ -833,9 +856,13 @@ golang.org/x/crypto v0.19.0/go.mod h1:Iy9bg/ha4yyC70EfRS8jz+B6ybOBKMaSxLj6P6oBDf
 golang.org/x/crypto v0.21.0/go.mod h1:0BP7YvVV9gBbVKyeTG0Gyn+gZm94bibOW5BjDEYAOMs=
 golang.org/x/crypto v0.24.0 h1:mnl8DM0o513X8fdIkmyFE/5hTYxbwYOjDS/+rK6qpRI=
 golang.org/x/crypto v0.24.0/go.mod h1:Z1PMYSOR5nyMcyAVAIQSKCDwalqy85Aqn1x3Ws4L5DM=
+golang.org/x/crypto v0.26.0 h1:RrRspgV4mU+YwB4FYnuBoKsUapNIL5cohGAmSH3azsw=
+golang.org/x/crypto v0.26.0/go.mod h1:GY7jblb9wI+FOo5y8/S2oY4zWP07AkOJ4+jxCqdqn54=
 golang.org/x/exp v0.0.0-20190121172915-509febef88a4/go.mod h1:CJ0aWSM057203Lf6IL+f9T1iT9GByDxfZKAQTCR3kQA=
 golang.org/x/exp v0.0.0-20240506185415-9bf2ced13842 h1:vr/HnozRka3pE4EsMEg1lgkXJkTFJCVUX+S/ZT6wYzM=
 golang.org/x/exp v0.0.0-20240506185415-9bf2ced13842/go.mod h1:XtvwrStGgqGPLc4cjQfWqZHG1YFdYs6swckp8vpsjnc=
+golang.org/x/exp v0.0.0-20240719175910-8a7402abbf56 h1:2dVuKD2vS7b0QIHQbpyTISPd0LeHDbnYEryqj5Q1ug8=
+golang.org/x/exp v0.0.0-20240719175910-8a7402abbf56/go.mod h1:M4RDyNAINzryxdtnbRXRL/OHtkFuWGRjvuhBJpk2IlY=
 golang.org/x/lint v0.0.0-20180702182130-06c8688daad7/go.mod h1:UVdnD1Gm6xHRNCYTkRU2/jEulfH38KcIWyp/GAMgvoE=
 golang.org/x/lint v0.0.0-20181026193005-c67002cb31c3/go.mod h1:UVdnD1Gm6xHRNCYTkRU2/jEulfH38KcIWyp/GAMgvoE=
 golang.org/x/lint v0.0.0-20190227174305-5b3e6a55c961/go.mod h1:wehouNa3lNwaWXcvxsM5YxQ5yQlVC4a0KAMCusXpPoU=
@@ -852,6 +879,8 @@ golang.org/x/mod v0.7.0/go.mod h1:iBbtSCu2XBx23ZKBPSOrRkjjQPZFPuis4dIYUhu/chs=
 golang.org/x/mod v0.8.0/go.mod h1:iBbtSCu2XBx23ZKBPSOrRkjjQPZFPuis4dIYUhu/chs=
 golang.org/x/mod v0.18.0 h1:5+9lSbEzPSdWkH32vYPBwEpX8KwDbM52Ud9xBUvNlb0=
 golang.org/x/mod v0.18.0/go.mod h1:hTbmBsO62+eylJbnUtE2MGJUyE7QWk4xUqPFrRgJ+7c=
+golang.org/x/mod v0.20.0 h1:utOm6MM3R3dnawAiJgn0y+xvuYRsm1RKM/4giyfDgV0=
+golang.org/x/mod v0.20.0/go.mod h1:hTbmBsO62+eylJbnUtE2MGJUyE7QWk4xUqPFrRgJ+7c=
 golang.org/x/net v0.0.0-20180724234803-3673e40ba225/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4=
 golang.org/x/net v0.0.0-20180826012351-8a410e7b638d/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4=
 golang.org/x/net v0.0.0-20180906233101-161cd47e91fd/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4=
@@ -885,6 +914,8 @@ golang.org/x/net v0.21.0/go.mod h1:bIjVDfnllIU7BJ2DNgfnXvpSvtn8VRwhlsaeUTyUS44=
 golang.org/x/net v0.22.0/go.mod h1:JKghWKKOSdJwpW2GEx0Ja7fmaKnMsbu+MWVZTokSYmg=
 golang.org/x/net v0.26.0 h1:soB7SVo0PWrY4vPW/+ay0jKDNScG2X9wFeYlXIvJsOQ=
 golang.org/x/net v0.26.0/go.mod h1:5YKkiSynbBIh3p6iOc/vibscux0x38BZDkn8sCUPxHE=
+golang.org/x/net v0.28.0 h1:a9JDOJc5GMUJ0+UDqmLT86WiEy7iWyIhz8gz8E4e5hE=
+golang.org/x/net v0.28.0/go.mod h1:yqtgsTWOOnlGLG9GFRrK3++bGOUEkNBoHZc8MEDWPNg=
 golang.org/x/oauth2 v0.0.0-20180821212333-d2e6202438be/go.mod h1:N/0e6XlmueqKjAGxoOufVs8QHGRruUQn6yWY3a++T0U=
 golang.org/x/oauth2 v0.0.0-20181017192945-9dcd33a902f4/go.mod h1:N/0e6XlmueqKjAGxoOufVs8QHGRruUQn6yWY3a++T0U=
 golang.org/x/oauth2 v0.0.0-20181203162652-d668ce993890/go.mod h1:N/0e6XlmueqKjAGxoOufVs8QHGRruUQn6yWY3a++T0U=
@@ -902,6 +933,8 @@ golang.org/x/sync v0.0.0-20220722155255-886fb9371eb4/go.mod h1:RxMgew5VJxzue5/jJ
 golang.org/x/sync v0.1.0/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
 golang.org/x/sync v0.7.0 h1:YsImfSBoP9QPYL0xyKJPq0gcaJdG3rInoqxTWbfQu9M=
 golang.org/x/sync v0.7.0/go.mod h1:Czt+wKu1gCyEFDUtn0jG5QVvpJ6rzVqr5aXyt9drQfk=
+golang.org/x/sync v0.8.0 h1:3NFvSEYkUoMifnESzZl15y791HH1qU2xm6eCJU5ZPXQ=
+golang.org/x/sync v0.8.0/go.mod h1:Czt+wKu1gCyEFDUtn0jG5QVvpJ6rzVqr5aXyt9drQfk=
 golang.org/x/sys v0.0.0-20180810173357-98c5dad5d1a0/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY=
 golang.org/x/sys v0.0.0-20180830151530-49385e6e1522/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY=
 golang.org/x/sys v0.0.0-20180909124046-d0be0721c37e/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY=
@@ -952,6 +985,8 @@ golang.org/x/sys v0.18.0/go.mod h1:/VUhepiaJMQUp4+oa/7Zr1D23ma6VTLIYjOOTFZPUcA=
 golang.org/x/sys v0.20.0/go.mod h1:/VUhepiaJMQUp4+oa/7Zr1D23ma6VTLIYjOOTFZPUcA=
 golang.org/x/sys v0.22.0 h1:RI27ohtqKCnwULzJLqkv897zojh5/DwS/ENaMzUOaWI=
 golang.org/x/sys v0.22.0/go.mod h1:/VUhepiaJMQUp4+oa/7Zr1D23ma6VTLIYjOOTFZPUcA=
+golang.org/x/sys v0.24.0 h1:Twjiwq9dn6R1fQcyiK+wQyHWfaz/BJB+YIpzU/Cv3Xg=
+golang.org/x/sys v0.24.0/go.mod h1:/VUhepiaJMQUp4+oa/7Zr1D23ma6VTLIYjOOTFZPUcA=
 golang.org/x/term v0.0.0-20201126162022-7de9c90e9dd1/go.mod h1:bj7SfCRtBDWHUb9snDiAeCFNEtKQo2Wmx5Cou7ajbmo=
 golang.org/x/term v0.0.0-20210927222741-03fcf44c2211/go.mod h1:jbD1KX2456YbFQfuXm/mYQcufACuNUgVhRMnK/tPxf8=
 golang.org/x/term v0.2.0/go.mod h1:TVmDHMZPmdnySmBfhjOoOdhjzdE1h4u1VwSiw2l1Nuc=
@@ -967,6 +1002,8 @@ golang.org/x/term v0.18.0/go.mod h1:ILwASektA3OnRv7amZ1xhE/KTR+u50pbXfZ03+6Nx58=
 golang.org/x/term v0.20.0/go.mod h1:8UkIAJTvZgivsXaD6/pH6U9ecQzZ45awqEOzuCvwpFY=
 golang.org/x/term v0.21.0 h1:WVXCp+/EBEHOj53Rvu+7KiT/iElMrO8ACK16SMZ3jaA=
 golang.org/x/term v0.21.0/go.mod h1:ooXLefLobQVslOqselCNF4SxFAaoS6KujMbsGzSDmX0=
+golang.org/x/term v0.23.0 h1:F6D4vR+EHoL9/sWAWgAR1H2DcHr4PareCbAaCo1RpuU=
+golang.org/x/term v0.23.0/go.mod h1:DgV24QBUrK6jhZXl+20l6UWznPlwAHm1Q1mGHtydmSk=
 golang.org/x/text v0.3.0/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ=
 golang.org/x/text v0.3.1-0.20180807135948-17ff2d5776d2/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ=
 golang.org/x/text v0.3.3/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ=
@@ -981,6 +1018,8 @@ golang.org/x/text v0.12.0/go.mod h1:TvPlkZtksWOMsz7fbANvkp4WM8x/WCo/om8BMLbz+aE=
 golang.org/x/text v0.14.0/go.mod h1:18ZOQIKpY8NJVqYksKHtTdi31H5itFRjB5/qKTNYzSU=
 golang.org/x/text v0.16.0 h1:a94ExnEXNtEwYLGJSIUxnWoxoRz/ZcCsV63ROupILh4=
 golang.org/x/text v0.16.0/go.mod h1:GhwF1Be+LQoKShO3cGOHzqOgRrGaYc9AvblQOmPVHnI=
+golang.org/x/text v0.17.0 h1:XtiM5bkSOt+ewxlOE/aE/AKEHibwj/6gvWMl9Rsh0Qc=
+golang.org/x/text v0.17.0/go.mod h1:BuEKDfySbSR4drPmRPG/7iBdf8hvFMuRexcpahXilzY=
 golang.org/x/time v0.0.0-20180412165947-fbb02b2291d2/go.mod h1:tRJNPiyCQ0inRvYxbN9jk5I+vvW/OXSQhTDSoE431IQ=
 golang.org/x/time v0.0.0-20181108054448-85acf8d2951c/go.mod h1:tRJNPiyCQ0inRvYxbN9jk5I+vvW/OXSQhTDSoE431IQ=
 golang.org/x/time v0.5.0 h1:o7cqy6amK/52YcAKIPlM3a+Fpj35zvRj2TP+e1xFSfk=
@@ -1008,6 +1047,8 @@ golang.org/x/tools v0.4.0/go.mod h1:UE5sM2OK9E/d67R0ANs2xJizIymRP5gJU295PvKXxjQ=
 golang.org/x/tools v0.6.0/go.mod h1:Xwgl3UAJ/d3gWutnCtw505GrjyAbvKui8lOU390QaIU=
 golang.org/x/tools v0.22.0 h1:gqSGLZqv+AI9lIQzniJ0nZDRG5GBPsSi+DRNHWNz6yA=
 golang.org/x/tools v0.22.0/go.mod h1:aCwcsjqvq7Yqt6TNyX7QMU2enbQ/Gt0bo6krSeEri+c=
+golang.org/x/tools v0.24.0 h1:J1shsA93PJUEVaUSaay7UXAyE8aimq3GW0pjlolpa24=
+golang.org/x/tools v0.24.0/go.mod h1:YhNqVBIfWHdzvTLs0d8LCuMhkKUgSUKldakyV7W/WDQ=
 golang.org/x/xerrors v0.0.0-20190717185122-a985d3407aa7/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0=
 golang.org/x/xerrors v0.0.0-20191011141410-1b5146add898/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0=
 golang.org/x/xerrors v0.0.0-20191204190536-9bdfabe68543/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0=

From a92b3b13e9f68c41dc64cdaac858c49921cc3422 Mon Sep 17 00:00:00 2001
From: Ettore Di Giacinto <mudler@localai.io>
Date: Sun, 11 Aug 2024 11:22:00 +0200
Subject: [PATCH 149/235] chore: fix gosum missing entry

---
 go.sum | 1 +
 1 file changed, 1 insertion(+)

diff --git a/go.sum b/go.sum
index 5c035169..47fd4c06 100644
--- a/go.sum
+++ b/go.sum
@@ -535,6 +535,7 @@ github.com/onsi/gomega v1.10.1/go.mod h1:iN09h71vgCQne3DLsj+A5owkum+a2tYe+TOCB1y
 github.com/onsi/gomega v1.16.0/go.mod h1:HnhC7FXeEQY45zxNK3PPoIUhzk/80Xly9PcubAlGdZY=
 github.com/onsi/gomega v1.33.1 h1:dsYjIxxSR755MDmKVsaFQTE22ChNBcuuTWgkUDSubOk=
 github.com/onsi/gomega v1.33.1/go.mod h1:U4R44UsT+9eLIaYRB2a5qajjtQYn0hauxvRm16AVYg0=
+github.com/onsi/gomega v1.34.1 h1:EUMJIKUjM8sKjYbtxQI9A4z2o+rruxnzNvpknOXie6k=
 github.com/onsi/gomega v1.34.1/go.mod h1:kU1QgUvBDLXBJq618Xvm2LUX6rSAfRaFRTcdOeDLwwY=
 github.com/opencontainers/go-digest v1.0.0 h1:apOUWs51W5PlhuyGyz9FCeeBIOUDA/6nW8Oi/yOhh5U=
 github.com/opencontainers/go-digest v1.0.0/go.mod h1:0JzlMkj0TRzQZfJkVvzbP0HBR3IKzErnv2BNG4W4MAM=

From e30114a4a42aeb55a1114707b313819b85d60a11 Mon Sep 17 00:00:00 2001
From: "LocalAI [bot]" <139863280+localai-bot@users.noreply.github.com>
Date: Sun, 11 Aug 2024 23:46:30 +0200
Subject: [PATCH 150/235] chore: :arrow_up: Update ggerganov/llama.cpp to
 `4134999e01f31256b15342b41c4de9e2477c4a6c` (#3218)

:arrow_up: Update ggerganov/llama.cpp

Signed-off-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com>
Co-authored-by: mudler <2420543+mudler@users.noreply.github.com>
---
 Makefile | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/Makefile b/Makefile
index ef38a460..b5b2a435 100644
--- a/Makefile
+++ b/Makefile
@@ -8,7 +8,7 @@ DETECT_LIBS?=true
 # llama.cpp versions
 GOLLAMA_REPO?=https://github.com/go-skynet/go-llama.cpp
 GOLLAMA_VERSION?=2b57a8ae43e4699d3dc5d1496a1ccd42922993be
-CPPLLAMA_VERSION?=6e02327e8b7837358e0406bf90a4632e18e27846
+CPPLLAMA_VERSION?=4134999e01f31256b15342b41c4de9e2477c4a6c
 
 # go-rwkv version
 RWKV_REPO?=https://github.com/donomii/go-rwkv.cpp

From 7137c32f8f2eeba0eb101473f2700cbb76b37b46 Mon Sep 17 00:00:00 2001
From: Ettore Di Giacinto <mudler@users.noreply.github.com>
Date: Mon, 12 Aug 2024 09:56:31 +0200
Subject: [PATCH 151/235] models(gallery): add
 infinity-instruct-7m-gen-llama3_1-70b (#3220)

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
---
 gallery/index.yaml | 14 ++++++++++++++
 1 file changed, 14 insertions(+)

diff --git a/gallery/index.yaml b/gallery/index.yaml
index cca968bf..bc23d1b6 100644
--- a/gallery/index.yaml
+++ b/gallery/index.yaml
@@ -488,6 +488,20 @@
     - filename: Kumiho-v1-rp-UwU-8B-gguf-q4_k_m.gguf
       sha256: a1deb46675418277cf785a406cd1508fec556ff6e4d45d2231eb2a82986d52d0
       uri: huggingface://juvi21/Kumiho-v1-rp-UwU-8B-GGUF/Kumiho-v1-rp-UwU-8B-gguf-q4_k_m.gguf
+- !!merge <<: *llama31
+  name: "infinity-instruct-7m-gen-llama3_1-70b"
+  icon: https://huggingface.co/BAAI/Infinity-Instruct-7M-Gen-Llama3_1-70B/resolve/main/fig/Bk3NbjnJko51MTx1ZCScT2sqnGg.png
+  urls:
+    - https://huggingface.co/mradermacher/Infinity-Instruct-7M-Gen-Llama3_1-70B-GGUF
+  description: |
+    Infinity-Instruct-7M-Gen-Llama3.1-70B is an opensource supervised instruction tuning model without reinforcement learning from human feedback (RLHF). This model is just finetuned on Infinity-Instruct-7M and Infinity-Instruct-Gen and showing favorable results on AlpacaEval 2.0 and arena-hard compared to GPT4.
+  overrides:
+    parameters:
+      model: Infinity-Instruct-7M-Gen-Llama3_1-70B.Q4_K_M.gguf
+  files:
+    - filename: Infinity-Instruct-7M-Gen-Llama3_1-70B.Q4_K_M.gguf
+      sha256: f4379ab4d7140da0510886073375ca820ea9ac4ad9d3c20e17ed05156bd29697
+      uri: huggingface://mradermacher/Infinity-Instruct-7M-Gen-Llama3_1-70B-GGUF/Infinity-Instruct-7M-Gen-Llama3_1-70B.Q4_K_M.gguf
 - &deepseek
   ## Deepseek
   url: "github:mudler/LocalAI/gallery/deepseek.yaml@master"

From 4dfa0853392bb1bb2eda86743265d5d3754536a7 Mon Sep 17 00:00:00 2001
From: Ettore Di Giacinto <mudler@users.noreply.github.com>
Date: Mon, 12 Aug 2024 09:59:17 +0200
Subject: [PATCH 152/235] models(gallery): add cathallama-70b (#3221)

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
---
 gallery/index.yaml | 27 +++++++++++++++++++++++++++
 1 file changed, 27 insertions(+)

diff --git a/gallery/index.yaml b/gallery/index.yaml
index bc23d1b6..eb7515ba 100644
--- a/gallery/index.yaml
+++ b/gallery/index.yaml
@@ -502,6 +502,33 @@
     - filename: Infinity-Instruct-7M-Gen-Llama3_1-70B.Q4_K_M.gguf
       sha256: f4379ab4d7140da0510886073375ca820ea9ac4ad9d3c20e17ed05156bd29697
       uri: huggingface://mradermacher/Infinity-Instruct-7M-Gen-Llama3_1-70B-GGUF/Infinity-Instruct-7M-Gen-Llama3_1-70B.Q4_K_M.gguf
+- !!merge <<: *llama31
+  name: "cathallama-70b"
+  icon: https://cdn-uploads.huggingface.co/production/uploads/649dc85249ae3a68334adcc6/KxaiZ7rDKkYlix99O9j5H.png
+  urls:
+    - https://huggingface.co/gbueno86/Cathallama-70B
+    - https://huggingface.co/mradermacher/Cathallama-70B-GGUF
+  description: |
+    Notable Performance
+
+        9% overall success rate increase on MMLU-PRO over LLaMA 3.1 70b
+        Strong performance in MMLU-PRO categories overall
+        Great performance during manual testing
+
+    Creation workflow
+
+    Models merged
+
+        meta-llama/Meta-Llama-3.1-70B-Instruct
+        turboderp/Cat-Llama-3-70B-instruct
+        Nexusflow/Athene-70B
+  overrides:
+    parameters:
+      model: Cathallama-70B.Q4_K_M.gguf
+  files:
+    - filename: Cathallama-70B.Q4_K_M.gguf
+      sha256: 7bbac0849a8da82e7912a493a15fa07d605f1ffbe7337a322f17e09195511022
+      uri: huggingface://mradermacher/Cathallama-70B-GGUF/Cathallama-70B.Q4_K_M.gguf
 - &deepseek
   ## Deepseek
   url: "github:mudler/LocalAI/gallery/deepseek.yaml@master"

From 9729d2ae37a4913e1d57d9006cd9e8359983c932 Mon Sep 17 00:00:00 2001
From: Ettore Di Giacinto <mudler@users.noreply.github.com>
Date: Mon, 12 Aug 2024 19:25:44 +0200
Subject: [PATCH 153/235] feat(explorer): make possible to run sync in a
 separate process (#3224)

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
---
 core/cli/explorer.go                      | 19 ++++++--
 core/explorer/database.go                 | 59 +++++++++++++++--------
 core/explorer/discovery.go                | 49 ++++++-------------
 core/http/endpoints/explorer/dashboard.go | 11 ++---
 core/http/explorer.go                     |  4 +-
 core/http/routes/explorer.go              |  4 +-
 core/p2p/p2p.go                           |  1 +
 go.mod                                    |  1 +
 go.sum                                    |  2 +
 9 files changed, 83 insertions(+), 67 deletions(-)

diff --git a/core/cli/explorer.go b/core/cli/explorer.go
index f3e3618d..67d25304 100644
--- a/core/cli/explorer.go
+++ b/core/cli/explorer.go
@@ -14,6 +14,9 @@ type ExplorerCMD struct {
 	PoolDatabase             string `env:"LOCALAI_POOL_DATABASE,POOL_DATABASE" default:"explorer.json" help:"Path to the pool database" group:"api"`
 	ConnectionTimeout        string `env:"LOCALAI_CONNECTION_TIMEOUT,CONNECTION_TIMEOUT" default:"2m" help:"Connection timeout for the explorer" group:"api"`
 	ConnectionErrorThreshold int    `env:"LOCALAI_CONNECTION_ERROR_THRESHOLD,CONNECTION_ERROR_THRESHOLD" default:"3" help:"Connection failure threshold for the explorer" group:"api"`
+
+	WithSync bool `env:"LOCALAI_WITH_SYNC,WITH_SYNC" default:"false" help:"Enable sync with the network" group:"api"`
+	OnlySync bool `env:"LOCALAI_ONLY_SYNC,ONLY_SYNC" default:"false" help:"Only sync with the network" group:"api"`
 }
 
 func (e *ExplorerCMD) Run(ctx *cliContext.Context) error {
@@ -27,10 +30,20 @@ func (e *ExplorerCMD) Run(ctx *cliContext.Context) error {
 	if err != nil {
 		return err
 	}
-	ds := explorer.NewDiscoveryServer(db, dur, e.ConnectionErrorThreshold)
 
-	go ds.Start(context.Background())
-	appHTTP := http.Explorer(db, ds)
+	if e.WithSync {
+		ds := explorer.NewDiscoveryServer(db, dur, e.ConnectionErrorThreshold)
+		go ds.Start(context.Background(), true)
+	}
+
+	if e.OnlySync {
+		ds := explorer.NewDiscoveryServer(db, dur, e.ConnectionErrorThreshold)
+		ctx := context.Background()
+
+		return ds.Start(ctx, false)
+	}
+
+	appHTTP := http.Explorer(db)
 
 	return appHTTP.Listen(e.Address)
 }
diff --git a/core/explorer/database.go b/core/explorer/database.go
index 8535140c..e24de0aa 100644
--- a/core/explorer/database.go
+++ b/core/explorer/database.go
@@ -7,58 +7,83 @@ import (
 	"os"
 	"sort"
 	"sync"
+
+	"github.com/gofrs/flock"
 )
 
 // Database is a simple JSON database for storing and retrieving p2p network tokens and a name and description.
 type Database struct {
-	sync.RWMutex
-	path string
-	data map[string]TokenData
+	path  string
+	data  map[string]TokenData
+	flock *flock.Flock
+	sync.Mutex
 }
 
 // TokenData is a p2p network token with a name and description.
 type TokenData struct {
 	Name        string `json:"name"`
 	Description string `json:"description"`
+	Clusters    []ClusterData
+	Failures    int
+}
+
+type ClusterData struct {
+	Workers   []string
+	Type      string
+	NetworkID string
 }
 
 // NewDatabase creates a new Database with the given path.
 func NewDatabase(path string) (*Database, error) {
+	fileLock := flock.New(path + ".lock")
 	db := &Database{
-		data: make(map[string]TokenData),
-		path: path,
+		data:  make(map[string]TokenData),
+		path:  path,
+		flock: fileLock,
 	}
 	return db, db.load()
 }
 
 // Get retrieves a Token from the Database by its token.
 func (db *Database) Get(token string) (TokenData, bool) {
-	db.RLock()
-	defer db.RUnlock()
+	db.flock.Lock() // we are making sure that the file is not being written to
+	defer db.flock.Unlock()
+	db.Lock() // we are making sure that is safe if called by another instance in the same process
+	defer db.Unlock()
+	db.load()
 	t, ok := db.data[token]
 	return t, ok
 }
 
 // Set stores a Token in the Database by its token.
 func (db *Database) Set(token string, t TokenData) error {
+	db.flock.Lock()
+	defer db.flock.Unlock()
 	db.Lock()
+	defer db.Unlock()
+	db.load()
 	db.data[token] = t
-	db.Unlock()
 
-	return db.Save()
+	return db.save()
 }
 
 // Delete removes a Token from the Database by its token.
 func (db *Database) Delete(token string) error {
+	db.flock.Lock()
+	defer db.flock.Unlock()
 	db.Lock()
+	defer db.Unlock()
+	db.load()
 	delete(db.data, token)
-	db.Unlock()
-	return db.Save()
+	return db.save()
 }
 
 func (db *Database) TokenList() []string {
-	db.RLock()
-	defer db.RUnlock()
+	db.flock.Lock()
+	defer db.flock.Unlock()
+	db.Lock()
+	defer db.Unlock()
+	db.load()
 	tokens := []string{}
 	for k := range db.data {
 		tokens = append(tokens, k)
@@ -74,9 +99,6 @@ func (db *Database) TokenList() []string {
 
 // load reads the Database from disk.
 func (db *Database) load() error {
-	db.Lock()
-	defer db.Unlock()
-
 	if _, err := os.Stat(db.path); os.IsNotExist(err) {
 		return nil
 	}
@@ -91,10 +113,7 @@ func (db *Database) load() error {
 }
 
 // Save writes the Database to disk.
-func (db *Database) Save() error {
-	db.RLock()
-	defer db.RUnlock()
-
+func (db *Database) save() error {
 	// Marshal db.data into JSON
 	// Write the JSON to the file
 	f, err := os.Create(db.path)
diff --git a/core/explorer/discovery.go b/core/explorer/discovery.go
index 6a29442f..fe6470cb 100644
--- a/core/explorer/discovery.go
+++ b/core/explorer/discovery.go
@@ -16,22 +16,10 @@ import (
 type DiscoveryServer struct {
 	sync.Mutex
 	database       *Database
-	networkState   *NetworkState
 	connectionTime time.Duration
-	failures       map[string]int
 	errorThreshold int
 }
 
-type NetworkState struct {
-	Networks map[string]Network
-}
-
-func (s *DiscoveryServer) NetworkState() *NetworkState {
-	s.Lock()
-	defer s.Unlock()
-	return s.networkState
-}
-
 // NewDiscoveryServer creates a new DiscoveryServer with the given Database.
 // it keeps the db state in sync with the network state
 func NewDiscoveryServer(db *Database, dur time.Duration, failureThreshold int) *DiscoveryServer {
@@ -44,11 +32,7 @@ func NewDiscoveryServer(db *Database, dur time.Duration, failureThreshold int) *
 	return &DiscoveryServer{
 		database:       db,
 		connectionTime: dur,
-		networkState: &NetworkState{
-			Networks: map[string]Network{},
-		},
 		errorThreshold: failureThreshold,
-		failures:       make(map[string]int),
 	}
 }
 
@@ -116,10 +100,10 @@ func (s *DiscoveryServer) runBackground() {
 
 		if hasWorkers {
 			s.Lock()
-			s.networkState.Networks[token] = Network{
-				Clusters: ledgerK,
-			}
-			delete(s.failures, token)
+			data, _ := s.database.Get(token)
+			(&data).Clusters = ledgerK
+			(&data).Failures = 0
+			s.database.Set(token, data)
 			s.Unlock()
 		} else {
 			s.failedToken(token)
@@ -132,27 +116,23 @@ func (s *DiscoveryServer) runBackground() {
 func (s *DiscoveryServer) failedToken(token string) {
 	s.Lock()
 	defer s.Unlock()
-	s.failures[token]++
+	data, _ := s.database.Get(token)
+	(&data).Failures++
+	s.database.Set(token, data)
 }
 
 func (s *DiscoveryServer) deleteFailedConnections() {
 	s.Lock()
 	defer s.Unlock()
-	for k, v := range s.failures {
-		if v > s.errorThreshold {
-			log.Info().Any("network", k).Msg("Network has been removed from the database")
-			s.database.Delete(k)
-			delete(s.failures, k)
+	for _, t := range s.database.TokenList() {
+		data, _ := s.database.Get(t)
+		if data.Failures > s.errorThreshold {
+			log.Info().Any("token", t).Msg("Token has been removed from the database")
+			s.database.Delete(t)
 		}
 	}
 }
 
-type ClusterData struct {
-	Workers   []string
-	Type      string
-	NetworkID string
-}
-
 func (s *DiscoveryServer) retrieveNetworkData(c context.Context, ledger *blockchain.Ledger, networkData chan ClusterData) {
 	clusters := map[string]ClusterData{}
 
@@ -217,7 +197,7 @@ func (s *DiscoveryServer) retrieveNetworkData(c context.Context, ledger *blockch
 }
 
 // Start the discovery server. This is meant to be run in to a goroutine.
-func (s *DiscoveryServer) Start(ctx context.Context) error {
+func (s *DiscoveryServer) Start(ctx context.Context, keepRunning bool) error {
 	for {
 		select {
 		case <-ctx.Done():
@@ -225,6 +205,9 @@ func (s *DiscoveryServer) Start(ctx context.Context) error {
 		default:
 			// Collect data
 			s.runBackground()
+			if !keepRunning {
+				return nil
+			}
 		}
 	}
 }
diff --git a/core/http/endpoints/explorer/dashboard.go b/core/http/endpoints/explorer/dashboard.go
index 7cd9f3c9..9c731d9a 100644
--- a/core/http/endpoints/explorer/dashboard.go
+++ b/core/http/endpoints/explorer/dashboard.go
@@ -11,7 +11,6 @@ import (
 
 func Dashboard() func(*fiber.Ctx) error {
 	return func(c *fiber.Ctx) error {
-
 		summary := fiber.Map{
 			"Title":   "LocalAI API - " + internal.PrintableVersion(),
 			"Version": internal.PrintableVersion(),
@@ -34,26 +33,24 @@ type AddNetworkRequest struct {
 }
 
 type Network struct {
-	explorer.Network
 	explorer.TokenData
 	Token string `json:"token"`
 }
 
-func ShowNetworks(db *explorer.Database, ds *explorer.DiscoveryServer) func(*fiber.Ctx) error {
+func ShowNetworks(db *explorer.Database) func(*fiber.Ctx) error {
 	return func(c *fiber.Ctx) error {
-		networkState := ds.NetworkState()
 		results := []Network{}
-		for token, network := range networkState.Networks {
+		for _, token := range db.TokenList() {
 			networkData, exists := db.Get(token) // get the token data
 			hasWorkers := false
-			for _, cluster := range network.Clusters {
+			for _, cluster := range networkData.Clusters {
 				if len(cluster.Workers) > 0 {
 					hasWorkers = true
 					break
 				}
 			}
 			if exists && hasWorkers {
-				results = append(results, Network{Network: network, TokenData: networkData, Token: token})
+				results = append(results, Network{TokenData: networkData, Token: token})
 			}
 		}
 
diff --git a/core/http/explorer.go b/core/http/explorer.go
index 608ecdb5..bdcb93b1 100644
--- a/core/http/explorer.go
+++ b/core/http/explorer.go
@@ -10,7 +10,7 @@ import (
 	"github.com/mudler/LocalAI/core/http/routes"
 )
 
-func Explorer(db *explorer.Database, discoveryServer *explorer.DiscoveryServer) *fiber.App {
+func Explorer(db *explorer.Database) *fiber.App {
 
 	fiberCfg := fiber.Config{
 		Views: renderEngine(),
@@ -22,7 +22,7 @@ func Explorer(db *explorer.Database, discoveryServer *explorer.DiscoveryServer)
 
 	app := fiber.New(fiberCfg)
 
-	routes.RegisterExplorerRoutes(app, db, discoveryServer)
+	routes.RegisterExplorerRoutes(app, db)
 
 	httpFS := http.FS(embedDirStatic)
 
diff --git a/core/http/routes/explorer.go b/core/http/routes/explorer.go
index b3c0d40b..960b476b 100644
--- a/core/http/routes/explorer.go
+++ b/core/http/routes/explorer.go
@@ -6,8 +6,8 @@ import (
 	"github.com/mudler/LocalAI/core/http/endpoints/explorer"
 )
 
-func RegisterExplorerRoutes(app *fiber.App, db *coreExplorer.Database, ds *coreExplorer.DiscoveryServer) {
+func RegisterExplorerRoutes(app *fiber.App, db *coreExplorer.Database) {
 	app.Get("/", explorer.Dashboard())
 	app.Post("/network/add", explorer.AddNetwork(db))
-	app.Get("/networks", explorer.ShowNetworks(db, ds))
+	app.Get("/networks", explorer.ShowNetworks(db))
 }
diff --git a/core/p2p/p2p.go b/core/p2p/p2p.go
index 37b892d9..bfa12287 100644
--- a/core/p2p/p2p.go
+++ b/core/p2p/p2p.go
@@ -236,6 +236,7 @@ func ensureService(ctx context.Context, n *node.Node, nd *NodeData, sserv string
 	if ndService, found := service[nd.Name]; !found {
 		if !nd.IsOnline() {
 			// if node is offline and not present, do nothing
+			zlog.Debug().Msgf("Node %s is offline", nd.ID)
 			return
 		}
 		newCtxm, cancel := context.WithCancel(ctx)
diff --git a/go.mod b/go.mod
index b35db1b1..dcece45c 100644
--- a/go.mod
+++ b/go.mod
@@ -67,6 +67,7 @@ require (
 	github.com/cpuguy83/go-md2man/v2 v2.0.4 // indirect
 	github.com/go-task/slim-sprig/v3 v3.0.0 // indirect
 	github.com/go-viper/mapstructure/v2 v2.0.0 // indirect
+	github.com/gofrs/flock v0.12.1 // indirect
 	github.com/labstack/echo/v4 v4.12.0 // indirect
 	github.com/labstack/gommon v0.4.2 // indirect
 	github.com/moby/docker-image-spec v1.3.1 // indirect
diff --git a/go.sum b/go.sum
index 47fd4c06..db47c36b 100644
--- a/go.sum
+++ b/go.sum
@@ -204,6 +204,8 @@ github.com/gofiber/template/html/v2 v2.1.2 h1:wkK/mYJ3nIhongTkG3t0QgV4ADdgOYJYVS
 github.com/gofiber/template/html/v2 v2.1.2/go.mod h1:E98Z/FzvpaSib06aWEgYk6GXNf3ctoyaJH8yW5ay5ak=
 github.com/gofiber/utils v1.1.0 h1:vdEBpn7AzIUJRhe+CiTOJdUcTg4Q9RK+pEa0KPbLdrM=
 github.com/gofiber/utils v1.1.0/go.mod h1:poZpsnhBykfnY1Mc0KeEa6mSHrS3dV0+oBWyeQmb2e0=
+github.com/gofrs/flock v0.12.1 h1:MTLVXXHf8ekldpJk3AKicLij9MdwOWkZ+a/jHHZby9E=
+github.com/gofrs/flock v0.12.1/go.mod h1:9zxTsyu5xtJ9DK+1tFZyibEV7y3uwDxPPfbxeeHCoD0=
 github.com/gogo/protobuf v1.1.1/go.mod h1:r8qH/GZQm5c6nD/R0oafs1akxWv10x8SbQlK7atdtwQ=
 github.com/gogo/protobuf v1.3.1/go.mod h1:SlYgWuQ5SjCEi6WLHjHCa1yvBfUnHcTbrrZtXPKa29o=
 github.com/gogo/protobuf v1.3.2 h1:Ov1cvc58UF3b5XjBnZv7+opcTcQFZebYjWzi34vdm4Q=

From ae4b67fb560e0d048d1c7d8884e958ddb0eefa33 Mon Sep 17 00:00:00 2001
From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com>
Date: Mon, 12 Aug 2024 21:00:30 +0000
Subject: [PATCH 154/235] chore(deps): Bump llama-index from 0.10.61 to 0.10.65
 in /examples/langchain-chroma (#3225)

chore(deps): Bump llama-index in /examples/langchain-chroma

Bumps [llama-index](https://github.com/run-llama/llama_index) from 0.10.61 to 0.10.65.
- [Release notes](https://github.com/run-llama/llama_index/releases)
- [Changelog](https://github.com/run-llama/llama_index/blob/main/CHANGELOG.md)
- [Commits](https://github.com/run-llama/llama_index/compare/v0.10.61...v0.10.65)

---
updated-dependencies:
- dependency-name: llama-index
  dependency-type: direct:production
  update-type: version-update:semver-patch
...

Signed-off-by: dependabot[bot] <support@github.com>
Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>
---
 examples/langchain-chroma/requirements.txt | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/examples/langchain-chroma/requirements.txt b/examples/langchain-chroma/requirements.txt
index 535c6537..98f7855c 100644
--- a/examples/langchain-chroma/requirements.txt
+++ b/examples/langchain-chroma/requirements.txt
@@ -1,4 +1,4 @@
 langchain==0.2.12
 openai==1.39.0
 chromadb==0.5.5
-llama-index==0.10.61
\ No newline at end of file
+llama-index==0.10.65
\ No newline at end of file

From bd57ebf042f197272ada0216081639b8b6770d2e Mon Sep 17 00:00:00 2001
From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com>
Date: Mon, 12 Aug 2024 21:13:01 +0000
Subject: [PATCH 155/235] chore(deps): Bump langchain-community from 0.2.9 to
 0.2.11 in /examples/langchain/langchainpy-localai-example (#3230)

chore(deps): Bump langchain-community

Bumps [langchain-community](https://github.com/langchain-ai/langchain) from 0.2.9 to 0.2.11.
- [Release notes](https://github.com/langchain-ai/langchain/releases)
- [Commits](https://github.com/langchain-ai/langchain/compare/langchain-community==0.2.9...langchain-community==0.2.11)

---
updated-dependencies:
- dependency-name: langchain-community
  dependency-type: direct:production
  update-type: version-update:semver-patch
...

Signed-off-by: dependabot[bot] <support@github.com>
Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>
---
 examples/langchain/langchainpy-localai-example/requirements.txt | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/examples/langchain/langchainpy-localai-example/requirements.txt b/examples/langchain/langchainpy-localai-example/requirements.txt
index 414a1b27..c46a794a 100644
--- a/examples/langchain/langchainpy-localai-example/requirements.txt
+++ b/examples/langchain/langchainpy-localai-example/requirements.txt
@@ -11,7 +11,7 @@ frozenlist==1.4.1
 greenlet==3.0.3
 idna==3.7
 langchain==0.2.12
-langchain-community==0.2.9
+langchain-community==0.2.11
 marshmallow==3.21.3
 marshmallow-enum==1.5.1
 multidict==6.0.5

From 710f566553d1374087fff04990bf4ce5c153dc8d Mon Sep 17 00:00:00 2001
From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com>
Date: Mon, 12 Aug 2024 22:36:11 +0000
Subject: [PATCH 156/235] chore(deps): Bump attrs from 23.2.0 to 24.2.0 in
 /examples/langchain/langchainpy-localai-example (#3232)

chore(deps): Bump attrs

Bumps [attrs](https://github.com/sponsors/hynek) from 23.2.0 to 24.2.0.
- [Commits](https://github.com/sponsors/hynek/commits)

---
updated-dependencies:
- dependency-name: attrs
  dependency-type: direct:production
  update-type: version-update:semver-major
...

Signed-off-by: dependabot[bot] <support@github.com>
Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>
---
 examples/langchain/langchainpy-localai-example/requirements.txt | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/examples/langchain/langchainpy-localai-example/requirements.txt b/examples/langchain/langchainpy-localai-example/requirements.txt
index c46a794a..68031d75 100644
--- a/examples/langchain/langchainpy-localai-example/requirements.txt
+++ b/examples/langchain/langchainpy-localai-example/requirements.txt
@@ -1,7 +1,7 @@
 aiohttp==3.10.2
 aiosignal==1.3.1
 async-timeout==4.0.3
-attrs==23.2.0
+attrs==24.2.0
 certifi==2024.7.4
 charset-normalizer==3.3.2
 colorama==0.4.6

From 121ffe61c5dcccfe8237db338e04578ae7587386 Mon Sep 17 00:00:00 2001
From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com>
Date: Mon, 12 Aug 2024 23:31:45 +0000
Subject: [PATCH 157/235] chore(deps): Bump pyyaml from 6.0.1 to 6.0.2 in
 /examples/langchain/langchainpy-localai-example (#3231)

chore(deps): Bump pyyaml

Bumps [pyyaml](https://github.com/yaml/pyyaml) from 6.0.1 to 6.0.2.
- [Release notes](https://github.com/yaml/pyyaml/releases)
- [Changelog](https://github.com/yaml/pyyaml/blob/main/CHANGES)
- [Commits](https://github.com/yaml/pyyaml/compare/6.0.1...6.0.2)

---
updated-dependencies:
- dependency-name: pyyaml
  dependency-type: direct:production
  update-type: version-update:semver-patch
...

Signed-off-by: dependabot[bot] <support@github.com>
Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>
---
 examples/langchain/langchainpy-localai-example/requirements.txt | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/examples/langchain/langchainpy-localai-example/requirements.txt b/examples/langchain/langchainpy-localai-example/requirements.txt
index 68031d75..493f2687 100644
--- a/examples/langchain/langchainpy-localai-example/requirements.txt
+++ b/examples/langchain/langchainpy-localai-example/requirements.txt
@@ -22,7 +22,7 @@ openai==1.39.0
 openapi-schema-pydantic==1.2.4
 packaging>=23.2
 pydantic==2.8.2
-PyYAML==6.0.1
+PyYAML==6.0.2
 requests==2.32.3
 SQLAlchemy==2.0.32
 tenacity==8.5.0

From 83ffd626dc93be6edee3a01b95338657f6b49a24 Mon Sep 17 00:00:00 2001
From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com>
Date: Tue, 13 Aug 2024 00:23:31 +0000
Subject: [PATCH 158/235] chore(deps): Bump llama-index from 0.10.59 to 0.10.65
 in /examples/chainlit (#3238)

chore(deps): Bump llama-index in /examples/chainlit

Bumps [llama-index](https://github.com/run-llama/llama_index) from 0.10.59 to 0.10.65.
- [Release notes](https://github.com/run-llama/llama_index/releases)
- [Changelog](https://github.com/run-llama/llama_index/blob/main/CHANGELOG.md)
- [Commits](https://github.com/run-llama/llama_index/compare/v0.10.59...v0.10.65)

---
updated-dependencies:
- dependency-name: llama-index
  dependency-type: direct:production
  update-type: version-update:semver-patch
...

Signed-off-by: dependabot[bot] <support@github.com>
Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>
---
 examples/chainlit/requirements.txt | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/examples/chainlit/requirements.txt b/examples/chainlit/requirements.txt
index 52e2b8a2..9e8b3b31 100644
--- a/examples/chainlit/requirements.txt
+++ b/examples/chainlit/requirements.txt
@@ -1,4 +1,4 @@
-llama_index==0.10.59
+llama_index==0.10.65
 requests==2.32.3
 weaviate_client==4.6.7
 transformers

From cd385c2720c41534c4900d470af09fe63d38e398 Mon Sep 17 00:00:00 2001
From: "LocalAI [bot]" <139863280+localai-bot@users.noreply.github.com>
Date: Tue, 13 Aug 2024 02:59:04 +0200
Subject: [PATCH 159/235] chore: :arrow_up: Update ggerganov/llama.cpp to
 `fc4ca27b25464a11b3b86c9dbb5b6ed6065965c2` (#3240)

:arrow_up: Update ggerganov/llama.cpp

Signed-off-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com>
Co-authored-by: mudler <2420543+mudler@users.noreply.github.com>
---
 Makefile | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/Makefile b/Makefile
index b5b2a435..40ddcc6d 100644
--- a/Makefile
+++ b/Makefile
@@ -8,7 +8,7 @@ DETECT_LIBS?=true
 # llama.cpp versions
 GOLLAMA_REPO?=https://github.com/go-skynet/go-llama.cpp
 GOLLAMA_VERSION?=2b57a8ae43e4699d3dc5d1496a1ccd42922993be
-CPPLLAMA_VERSION?=4134999e01f31256b15342b41c4de9e2477c4a6c
+CPPLLAMA_VERSION?=fc4ca27b25464a11b3b86c9dbb5b6ed6065965c2
 
 # go-rwkv version
 RWKV_REPO?=https://github.com/donomii/go-rwkv.cpp

From 71f3fa653aa1a599af02505440f2cafcd1ca33c2 Mon Sep 17 00:00:00 2001
From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com>
Date: Tue, 13 Aug 2024 01:12:11 +0000
Subject: [PATCH 160/235] chore(deps): Bump openai from 1.39.0 to 1.40.5 in
 /examples/langchain-chroma (#3241)

chore(deps): Bump openai in /examples/langchain-chroma

Bumps [openai](https://github.com/openai/openai-python) from 1.39.0 to 1.40.5.
- [Release notes](https://github.com/openai/openai-python/releases)
- [Changelog](https://github.com/openai/openai-python/blob/main/CHANGELOG.md)
- [Commits](https://github.com/openai/openai-python/compare/v1.39.0...v1.40.5)

---
updated-dependencies:
- dependency-name: openai
  dependency-type: direct:production
  update-type: version-update:semver-minor
...

Signed-off-by: dependabot[bot] <support@github.com>
Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>
---
 examples/langchain-chroma/requirements.txt | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/examples/langchain-chroma/requirements.txt b/examples/langchain-chroma/requirements.txt
index 98f7855c..16701ca3 100644
--- a/examples/langchain-chroma/requirements.txt
+++ b/examples/langchain-chroma/requirements.txt
@@ -1,4 +1,4 @@
 langchain==0.2.12
-openai==1.39.0
+openai==1.40.5
 chromadb==0.5.5
 llama-index==0.10.65
\ No newline at end of file

From 89979da33f0738990b791b7ca36fdc12552153ce Mon Sep 17 00:00:00 2001
From: "LocalAI [bot]" <139863280+localai-bot@users.noreply.github.com>
Date: Tue, 13 Aug 2024 04:01:26 +0200
Subject: [PATCH 161/235] chore: :arrow_up: Update ggerganov/whisper.cpp to
 `22fcd5fd110ba1ff592b4e23013d870831756259` (#3239)

:arrow_up: Update ggerganov/whisper.cpp

Signed-off-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com>
Co-authored-by: mudler <2420543+mudler@users.noreply.github.com>
---
 Makefile | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/Makefile b/Makefile
index 40ddcc6d..c57a8cf2 100644
--- a/Makefile
+++ b/Makefile
@@ -16,7 +16,7 @@ RWKV_VERSION?=661e7ae26d442f5cfebd2a0881b44e8c55949ec6
 
 # whisper.cpp version
 WHISPER_REPO?=https://github.com/ggerganov/whisper.cpp
-WHISPER_CPP_VERSION?=81c999fe0a25c4ebbfef10ed8a1a96df9cfc10fd
+WHISPER_CPP_VERSION?=22fcd5fd110ba1ff592b4e23013d870831756259
 
 # bert.cpp version
 BERT_REPO?=https://github.com/go-skynet/go-bert.cpp

From 447d9f844bf546dd0dd54eb32998417ba54dd999 Mon Sep 17 00:00:00 2001
From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com>
Date: Tue, 13 Aug 2024 02:18:44 +0000
Subject: [PATCH 162/235] chore(deps): Bump aiohttp from 3.10.2 to 3.10.3 in
 /examples/langchain/langchainpy-localai-example (#3234)

chore(deps): Bump aiohttp

Bumps [aiohttp](https://github.com/aio-libs/aiohttp) from 3.10.2 to 3.10.3.
- [Release notes](https://github.com/aio-libs/aiohttp/releases)
- [Changelog](https://github.com/aio-libs/aiohttp/blob/master/CHANGES.rst)
- [Commits](https://github.com/aio-libs/aiohttp/compare/v3.10.2...v3.10.3)

---
updated-dependencies:
- dependency-name: aiohttp
  dependency-type: direct:production
  update-type: version-update:semver-patch
...

Signed-off-by: dependabot[bot] <support@github.com>
Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>
---
 examples/langchain/langchainpy-localai-example/requirements.txt | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/examples/langchain/langchainpy-localai-example/requirements.txt b/examples/langchain/langchainpy-localai-example/requirements.txt
index 493f2687..bf46bef4 100644
--- a/examples/langchain/langchainpy-localai-example/requirements.txt
+++ b/examples/langchain/langchainpy-localai-example/requirements.txt
@@ -1,4 +1,4 @@
-aiohttp==3.10.2
+aiohttp==3.10.3
 aiosignal==1.3.1
 async-timeout==4.0.3
 attrs==24.2.0

From 7d92936e1a181a92e5599a8b7dd21ecdf3a6f3b7 Mon Sep 17 00:00:00 2001
From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com>
Date: Tue, 13 Aug 2024 03:59:16 +0000
Subject: [PATCH 163/235] chore(deps): Bump openai from 1.39.0 to 1.40.6 in
 /examples/langchain/langchainpy-localai-example (#3244)

chore(deps): Bump openai

Bumps [openai](https://github.com/openai/openai-python) from 1.39.0 to 1.40.6.
- [Release notes](https://github.com/openai/openai-python/releases)
- [Changelog](https://github.com/openai/openai-python/blob/main/CHANGELOG.md)
- [Commits](https://github.com/openai/openai-python/compare/v1.39.0...v1.40.6)

---
updated-dependencies:
- dependency-name: openai
  dependency-type: direct:production
  update-type: version-update:semver-minor
...

Signed-off-by: dependabot[bot] <support@github.com>
Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>
---
 examples/langchain/langchainpy-localai-example/requirements.txt | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/examples/langchain/langchainpy-localai-example/requirements.txt b/examples/langchain/langchainpy-localai-example/requirements.txt
index bf46bef4..b9d161c5 100644
--- a/examples/langchain/langchainpy-localai-example/requirements.txt
+++ b/examples/langchain/langchainpy-localai-example/requirements.txt
@@ -18,7 +18,7 @@ multidict==6.0.5
 mypy-extensions==1.0.0
 numexpr==2.10.1
 numpy==2.0.1
-openai==1.39.0
+openai==1.40.6
 openapi-schema-pydantic==1.2.4
 packaging>=23.2
 pydantic==2.8.2

From 02de274e00154269c4e9ccc653846f2cfdb77fcc Mon Sep 17 00:00:00 2001
From: Ettore Di Giacinto <mudler@users.noreply.github.com>
Date: Tue, 13 Aug 2024 16:17:18 +0200
Subject: [PATCH 164/235] feat(federated): allow to pickup a specific worker,
 improve loadbalancing (#3243)

* feat(explorer): allow to specify a worker target

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>

* feat(explorer): correctly load balance requests

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>

* feat(explorer): mark load balanced by default

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>

* fix: make sure to delete tunnels that might not exist anymore

If a worker goes off and on might change tunnel address, and we want to
load balance only on the active tunnels.

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>

---------

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
---
 core/cli/federated.go        |  5 +--
 core/p2p/federated.go        | 68 ++++++++++++++++++++++++++++++++++--
 core/p2p/federated_server.go | 53 +++++++++++++---------------
 core/p2p/p2p.go              |  4 ++-
 4 files changed, 96 insertions(+), 34 deletions(-)

diff --git a/core/cli/federated.go b/core/cli/federated.go
index 271babca..b917812c 100644
--- a/core/cli/federated.go
+++ b/core/cli/federated.go
@@ -10,13 +10,14 @@ import (
 type FederatedCLI struct {
 	Address            string `env:"LOCALAI_ADDRESS,ADDRESS" default:":8080" help:"Bind address for the API server" group:"api"`
 	Peer2PeerToken     string `env:"LOCALAI_P2P_TOKEN,P2P_TOKEN,TOKEN" name:"p2ptoken" help:"Token for P2P mode (optional)" group:"p2p"`
-	LoadBalanced       bool   `env:"LOCALAI_LOAD_BALANCED,LOAD_BALANCED" default:"false" help:"Enable load balancing" group:"p2p"`
+	RandomWorker       bool   `env:"LOCALAI_RANDOM_WORKER,RANDOM_WORKER" default:"false" help:"Select a random worker from the pool" group:"p2p"`
 	Peer2PeerNetworkID string `env:"LOCALAI_P2P_NETWORK_ID,P2P_NETWORK_ID" help:"Network ID for P2P mode, can be set arbitrarly by the user for grouping a set of instances." group:"p2p"`
+	TargetWorker       string `env:"LOCALAI_TARGET_WORKER,TARGET_WORKER" help:"Target worker to run the federated server on" group:"p2p"`
 }
 
 func (f *FederatedCLI) Run(ctx *cliContext.Context) error {
 
-	fs := p2p.NewFederatedServer(f.Address, p2p.NetworkID(f.Peer2PeerNetworkID, p2p.FederatedID), f.Peer2PeerToken, f.LoadBalanced)
+	fs := p2p.NewFederatedServer(f.Address, p2p.NetworkID(f.Peer2PeerNetworkID, p2p.FederatedID), f.Peer2PeerToken, !f.RandomWorker, f.TargetWorker)
 
 	return fs.Start(context.Background())
 }
diff --git a/core/p2p/federated.go b/core/p2p/federated.go
index 3ac3ff91..8e468ef6 100644
--- a/core/p2p/federated.go
+++ b/core/p2p/federated.go
@@ -1,6 +1,12 @@
 package p2p
 
-import "fmt"
+import (
+	"fmt"
+	"math/rand/v2"
+	"sync"
+
+	"github.com/rs/zerolog/log"
+)
 
 const FederatedID = "federated"
 
@@ -12,22 +18,70 @@ func NetworkID(networkID, serviceID string) string {
 }
 
 type FederatedServer struct {
+	sync.Mutex
 	listenAddr, service, p2ptoken string
 	requestTable                  map[string]int
 	loadBalanced                  bool
+	workerTarget                  string
 }
 
-func NewFederatedServer(listenAddr, service, p2pToken string, loadBalanced bool) *FederatedServer {
+func NewFederatedServer(listenAddr, service, p2pToken string, loadBalanced bool, workerTarget string) *FederatedServer {
 	return &FederatedServer{
 		listenAddr:   listenAddr,
 		service:      service,
 		p2ptoken:     p2pToken,
 		requestTable: map[string]int{},
 		loadBalanced: loadBalanced,
+		workerTarget: workerTarget,
+	}
+}
+
+func (fs *FederatedServer) RandomServer() string {
+	var tunnelAddresses []string
+	for _, v := range GetAvailableNodes(fs.service) {
+		if v.IsOnline() {
+			tunnelAddresses = append(tunnelAddresses, v.TunnelAddress)
+		} else {
+			delete(fs.requestTable, v.TunnelAddress) // make sure it's not tracked
+			log.Info().Msgf("Node %s is offline", v.ID)
+		}
+	}
+
+	if len(tunnelAddresses) == 0 {
+		return ""
+	}
+
+	return tunnelAddresses[rand.IntN(len(tunnelAddresses))]
+}
+
+func (fs *FederatedServer) syncTableStatus() {
+	fs.Lock()
+	defer fs.Unlock()
+	currentTunnels := make(map[string]struct{})
+
+	for _, v := range GetAvailableNodes(fs.service) {
+		if v.IsOnline() {
+			fs.ensureRecordExist(v.TunnelAddress)
+			currentTunnels[v.TunnelAddress] = struct{}{}
+		}
+	}
+
+	// delete tunnels that don't exist anymore
+	for t := range fs.requestTable {
+		if _, ok := currentTunnels[t]; !ok {
+			delete(fs.requestTable, t)
+		}
 	}
 }
 
 func (fs *FederatedServer) SelectLeastUsedServer() string {
+	fs.syncTableStatus()
+
+	fs.Lock()
+	defer fs.Unlock()
+
+	log.Debug().Any("request_table", fs.requestTable).Msgf("Current request table")
+
 	// cycle over requestTable and find the entry with the lower number
 	// if there are multiple entries with the same number, select one randomly
 	// if there are no entries, return an empty string
@@ -39,18 +93,26 @@ func (fs *FederatedServer) SelectLeastUsedServer() string {
 			minKey = k
 		}
 	}
+	log.Debug().Any("requests_served", min).Msgf("Selected tunnel %s", minKey)
+
 	return minKey
 }
 
 func (fs *FederatedServer) RecordRequest(nodeID string) {
+	fs.Lock()
+	defer fs.Unlock()
 	// increment the counter for the nodeID in the requestTable
 	fs.requestTable[nodeID]++
+
+	log.Debug().Any("request_table", fs.requestTable).Msgf("Current request table")
 }
 
-func (fs *FederatedServer) EnsureRecordExist(nodeID string) {
+func (fs *FederatedServer) ensureRecordExist(nodeID string) {
 	// if the nodeID is not in the requestTable, add it with a counter of 0
 	_, ok := fs.requestTable[nodeID]
 	if !ok {
 		fs.requestTable[nodeID] = 0
 	}
+
+	log.Debug().Any("request_table", fs.requestTable).Msgf("Current request table")
 }
diff --git a/core/p2p/federated_server.go b/core/p2p/federated_server.go
index 75da97ec..6d7ccd46 100644
--- a/core/p2p/federated_server.go
+++ b/core/p2p/federated_server.go
@@ -10,8 +10,6 @@ import (
 	"net"
 	"time"
 
-	"math/rand/v2"
-
 	"github.com/mudler/edgevpn/pkg/node"
 	"github.com/mudler/edgevpn/pkg/protocol"
 	"github.com/mudler/edgevpn/pkg/types"
@@ -76,7 +74,7 @@ func (fs *FederatedServer) proxy(ctx context.Context, node *node.Node) error {
 		case <-ctx.Done():
 			return errors.New("context canceled")
 		default:
-			log.Debug().Msg("New for connection")
+			log.Debug().Msgf("New connection from %s", l.Addr().String())
 			// Listen for an incoming connection.
 			conn, err := l.Accept()
 			if err != nil {
@@ -86,37 +84,33 @@ func (fs *FederatedServer) proxy(ctx context.Context, node *node.Node) error {
 
 			// Handle connections in a new goroutine, forwarding to the p2p service
 			go func() {
-				var tunnelAddresses []string
-				for _, v := range GetAvailableNodes(fs.service) {
-					if v.IsOnline() {
-						tunnelAddresses = append(tunnelAddresses, v.TunnelAddress)
-					} else {
-						log.Info().Msgf("Node %s is offline", v.ID)
+				tunnelAddr := ""
+
+				if fs.workerTarget != "" {
+					for _, v := range GetAvailableNodes(fs.service) {
+						if v.ID == fs.workerTarget {
+							tunnelAddr = v.TunnelAddress
+							break
+						}
 					}
+				} else if fs.loadBalanced {
+					log.Debug().Msgf("Load balancing request")
+
+					tunnelAddr = fs.SelectLeastUsedServer()
+					if tunnelAddr == "" {
+						tunnelAddr = fs.RandomServer()
+					}
+
+				} else {
+					tunnelAddr = fs.RandomServer()
 				}
 
-				if len(tunnelAddresses) == 0 {
+				if tunnelAddr == "" {
 					log.Error().Msg("No available nodes yet")
 					return
 				}
 
-				tunnelAddr := ""
-
-				if fs.loadBalanced {
-					for _, t := range tunnelAddresses {
-						fs.EnsureRecordExist(t)
-					}
-
-					tunnelAddr = fs.SelectLeastUsedServer()
-					log.Debug().Msgf("Selected tunnel %s", tunnelAddr)
-					if tunnelAddr == "" {
-						tunnelAddr = tunnelAddresses[rand.IntN(len(tunnelAddresses))]
-					}
-
-					fs.RecordRequest(tunnelAddr)
-				} else {
-					tunnelAddr = tunnelAddresses[rand.IntN(len(tunnelAddresses))]
-				}
+				log.Debug().Msgf("Selected tunnel %s", tunnelAddr)
 
 				tunnelConn, err := net.Dial("tcp", tunnelAddr)
 				if err != nil {
@@ -132,7 +126,10 @@ func (fs *FederatedServer) proxy(ctx context.Context, node *node.Node) error {
 
 				tunnelConn.Close()
 				conn.Close()
-				//	ll.Infof("(service %s) Done handling %s", serviceID, l.Addr().String())
+
+				if fs.loadBalanced {
+					fs.RecordRequest(tunnelAddr)
+				}
 			}()
 		}
 	}
diff --git a/core/p2p/p2p.go b/core/p2p/p2p.go
index bfa12287..af2106be 100644
--- a/core/p2p/p2p.go
+++ b/core/p2p/p2p.go
@@ -181,7 +181,6 @@ func discoveryTunnels(ctx context.Context, n *node.Node, token, servicesID strin
 	if err != nil {
 		return nil, fmt.Errorf("creating a new node: %w", err)
 	}
-
 	// get new services, allocate and return to the channel
 
 	// TODO:
@@ -201,6 +200,9 @@ func discoveryTunnels(ctx context.Context, n *node.Node, token, servicesID strin
 				zlog.Debug().Msg("Searching for workers")
 
 				data := ledger.LastBlock().Storage[servicesID]
+
+				zlog.Debug().Any("data", ledger.LastBlock().Storage).Msg("Ledger data")
+
 				for k, v := range data {
 					zlog.Info().Msgf("Found worker %s", k)
 					nd := &NodeData{}

From 10324d9ad209f321c6d263770139473e73fe1994 Mon Sep 17 00:00:00 2001
From: "LocalAI [bot]" <139863280+localai-bot@users.noreply.github.com>
Date: Tue, 13 Aug 2024 23:45:01 +0200
Subject: [PATCH 165/235] chore: :arrow_up: Update ggerganov/llama.cpp to
 `06943a69f678fb32829ff06d9c18367b17d4b361` (#3245)

:arrow_up: Update ggerganov/llama.cpp

Signed-off-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com>
Co-authored-by: mudler <2420543+mudler@users.noreply.github.com>
---
 Makefile | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/Makefile b/Makefile
index c57a8cf2..eb507acd 100644
--- a/Makefile
+++ b/Makefile
@@ -8,7 +8,7 @@ DETECT_LIBS?=true
 # llama.cpp versions
 GOLLAMA_REPO?=https://github.com/go-skynet/go-llama.cpp
 GOLLAMA_VERSION?=2b57a8ae43e4699d3dc5d1496a1ccd42922993be
-CPPLLAMA_VERSION?=fc4ca27b25464a11b3b86c9dbb5b6ed6065965c2
+CPPLLAMA_VERSION?=06943a69f678fb32829ff06d9c18367b17d4b361
 
 # go-rwkv version
 RWKV_REPO?=https://github.com/donomii/go-rwkv.cpp

From 5bb2321fe0c1f99b44196aeb74473adf421b8a56 Mon Sep 17 00:00:00 2001
From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com>
Date: Tue, 13 Aug 2024 23:47:52 +0000
Subject: [PATCH 166/235] chore(deps): Bump openai from 1.39.0 to 1.40.4 in
 /examples/functions (#3235)

Bumps [openai](https://github.com/openai/openai-python) from 1.39.0 to 1.40.4.
- [Release notes](https://github.com/openai/openai-python/releases)
- [Changelog](https://github.com/openai/openai-python/blob/main/CHANGELOG.md)
- [Commits](https://github.com/openai/openai-python/compare/v1.39.0...v1.40.4)

---
updated-dependencies:
- dependency-name: openai
  dependency-type: direct:production
  update-type: version-update:semver-minor
...

Signed-off-by: dependabot[bot] <support@github.com>
Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>
---
 examples/functions/requirements.txt | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/examples/functions/requirements.txt b/examples/functions/requirements.txt
index a8a8ca8c..d24cb5ec 100644
--- a/examples/functions/requirements.txt
+++ b/examples/functions/requirements.txt
@@ -1,2 +1,2 @@
 langchain==0.2.12
-openai==1.39.0
+openai==1.40.4

From 57f79002107be719c22399048e5d9188e8f749d6 Mon Sep 17 00:00:00 2001
From: Dave <dave@gray101.com>
Date: Wed, 14 Aug 2024 03:06:41 -0400
Subject: [PATCH 167/235] feat: Initial Version of vscode DevContainer (#3217)

initial version of devcontainer

---------

Signed-off-by: Dave Lee <dave@gray101.com>
---
 .devcontainer/devcontainer.json               | 23 ++++++
 .devcontainer/docker-compose-devcontainer.yml | 45 ++++++++++++
 .devcontainer/grafana/datasource.yml          | 10 +++
 .devcontainer/prometheus/prometheus.yml       | 21 ++++++
 .dockerignore                                 |  1 +
 .env                                          |  3 +
 .vscode/launch.json                           | 21 +++---
 Dockerfile                                    | 70 +++++++++++++++----
 docker-compose.yaml                           |  2 -
 9 files changed, 169 insertions(+), 27 deletions(-)
 create mode 100644 .devcontainer/devcontainer.json
 create mode 100644 .devcontainer/docker-compose-devcontainer.yml
 create mode 100644 .devcontainer/grafana/datasource.yml
 create mode 100644 .devcontainer/prometheus/prometheus.yml

diff --git a/.devcontainer/devcontainer.json b/.devcontainer/devcontainer.json
new file mode 100644
index 00000000..a111dbfd
--- /dev/null
+++ b/.devcontainer/devcontainer.json
@@ -0,0 +1,23 @@
+{
+    "$schema": "https://raw.githubusercontent.com/devcontainers/spec/main/schemas/devContainer.schema.json",
+    "name": "LocalAI",
+    "workspaceFolder": "/workspace",
+    "dockerComposeFile": [ "./docker-compose-devcontainer.yml" ],
+    "service": "api",
+    "shutdownAction": "stopCompose",
+    "customizations": {
+        "vscode": {
+            "extensions": [
+                "golang.go",
+                "ms-vscode.makefile-tools",
+                "ms-azuretools.vscode-docker",
+                "ms-python.python",
+                "ms-python.debugpy",
+                "wayou.vscode-todo-highlight",
+                "waderyan.gitblame"
+            ]
+        }
+    },
+    "forwardPorts": [8080, 3000],
+    "postStartCommand": "make prepare && cp /build/backend-assets /workdir/backend-assets"
+}
\ No newline at end of file
diff --git a/.devcontainer/docker-compose-devcontainer.yml b/.devcontainer/docker-compose-devcontainer.yml
new file mode 100644
index 00000000..e36492e9
--- /dev/null
+++ b/.devcontainer/docker-compose-devcontainer.yml
@@ -0,0 +1,45 @@
+services:
+  api:
+    build:
+      context: ..
+      dockerfile: Dockerfile
+      target: devcontainer
+      args:
+      - FFMPEG=true
+      - IMAGE_TYPE=extras
+      - GO_TAGS=stablediffusion p2p tts
+    env_file:
+      - ../.env
+    ports:
+      - 8080:8080
+    volumes:
+      - ..:/workspace:cached
+    command: /bin/sh -c "while sleep 1000; do :; done"
+    cap_add:
+      - SYS_PTRACE
+    security_opt:
+      - seccomp:unconfined
+  prometheus:
+    image: prom/prometheus
+    container_name: prometheus
+    command:
+      - '--config.file=/etc/prometheus/prometheus.yml'
+    ports:
+      - 9090:9090
+    restart: unless-stopped
+    volumes:
+      - ./prometheus:/etc/prometheus
+      - prom_data:/prometheus
+  grafana:
+    image: grafana/grafana
+    container_name: grafana
+    ports:
+      - 3000:3000
+    restart: unless-stopped
+    environment:
+      - GF_SECURITY_ADMIN_USER=admin
+      - GF_SECURITY_ADMIN_PASSWORD=grafana
+    volumes:
+      - ./grafana:/etc/grafana/provisioning/datasources
+volumes:
+  prom_data:
\ No newline at end of file
diff --git a/.devcontainer/grafana/datasource.yml b/.devcontainer/grafana/datasource.yml
new file mode 100644
index 00000000..1ed2fa3c
--- /dev/null
+++ b/.devcontainer/grafana/datasource.yml
@@ -0,0 +1,10 @@
+
+apiVersion: 1
+
+datasources:
+- name: Prometheus
+  type: prometheus
+  url: http://prometheus:9090 
+  isDefault: true
+  access: proxy
+  editable: true
diff --git a/.devcontainer/prometheus/prometheus.yml b/.devcontainer/prometheus/prometheus.yml
new file mode 100644
index 00000000..18c44da7
--- /dev/null
+++ b/.devcontainer/prometheus/prometheus.yml
@@ -0,0 +1,21 @@
+global:
+  scrape_interval: 15s
+  scrape_timeout: 10s
+  evaluation_interval: 15s
+alerting:
+  alertmanagers:
+  - static_configs:
+    - targets: []
+    scheme: http
+    timeout: 10s
+    api_version: v1
+scrape_configs:
+- job_name: prometheus
+  honor_timestamps: true
+  scrape_interval: 15s
+  scrape_timeout: 10s
+  metrics_path: /metrics
+  scheme: http
+  static_configs:
+  - targets:
+    - localhost:9090
\ No newline at end of file
diff --git a/.dockerignore b/.dockerignore
index 3954769f..e91f0008 100644
--- a/.dockerignore
+++ b/.dockerignore
@@ -1,6 +1,7 @@
 .idea
 .github
 .vscode
+.devcontainer
 models
 examples/chatbot-ui/models
 examples/rwkv/models
diff --git a/.env b/.env
index 95a515bc..9e5dbd79 100644
--- a/.env
+++ b/.env
@@ -79,6 +79,9 @@
 ### Enable to run parallel requests
 # LOCALAI_PARALLEL_REQUESTS=true
 
+# Enable to allow p2p mode
+# LOCALAI_P2P=true
+
 ### Watchdog settings
 ###
 # Enables watchdog to kill backends that are inactive for too much time
diff --git a/.vscode/launch.json b/.vscode/launch.json
index 2727da92..50493421 100644
--- a/.vscode/launch.json
+++ b/.vscode/launch.json
@@ -3,12 +3,12 @@
     "configurations": [
         {
             "name": "Python: Current File",
-            "type": "python",
+            "type": "debugpy",
             "request": "launch",
             "program": "${file}",
             "console": "integratedTerminal",
             "justMyCode": false,
-            "cwd": "${workspaceFolder}/examples/langchain-chroma",
+            "cwd": "${fileDirname}",
             "env": {
                 "OPENAI_API_BASE": "http://localhost:8080/v1",
                 "OPENAI_API_KEY": "abc"
@@ -19,15 +19,16 @@
             "type": "go",
             "request": "launch",
             "mode": "debug",
-            "program": "${workspaceFolder}/main.go",
-            "args": [
-                "api"
-            ],
+            "program": "${workspaceRoot}",
+            "args": [],
             "env": {
-                "C_INCLUDE_PATH": "${workspaceFolder}/go-llama:${workspaceFolder}/go-stable-diffusion/:${workspaceFolder}/gpt4all/gpt4all-bindings/golang/:${workspaceFolder}/go-gpt2:${workspaceFolder}/go-rwkv:${workspaceFolder}/whisper.cpp:${workspaceFolder}/go-bert:${workspaceFolder}/bloomz",
-                "LIBRARY_PATH": "${workspaceFolder}/go-llama:${workspaceFolder}/go-stable-diffusion/:${workspaceFolder}/gpt4all/gpt4all-bindings/golang/:${workspaceFolder}/go-gpt2:${workspaceFolder}/go-rwkv:${workspaceFolder}/whisper.cpp:${workspaceFolder}/go-bert:${workspaceFolder}/bloomz",
-                "DEBUG": "true"
-            }
+                "LOCALAI_LOG_LEVEL": "debug",
+                "LOCALAI_P2P": "true",
+                "LOCALAI_FEDERATED": "true"
+            },
+            "buildFlags": ["-tags", "stablediffusion p2p tts", "-v"],
+            "envFile": "${workspaceFolder}/.env",
+            "cwd": "${workspaceRoot}"
         }
     ]
 }
\ No newline at end of file
diff --git a/Dockerfile b/Dockerfile
index a0feadd9..0dfaaa19 100644
--- a/Dockerfile
+++ b/Dockerfile
@@ -8,7 +8,7 @@ FROM ${BASE_IMAGE} AS requirements-core
 
 USER root
 
-ARG GO_VERSION=1.22.5
+ARG GO_VERSION=1.22.6
 ARG TARGETARCH
 ARG TARGETVARIANT
 
@@ -30,7 +30,7 @@ RUN apt-get update && \
 
 # Install Go
 RUN curl -L -s https://go.dev/dl/go${GO_VERSION}.linux-${TARGETARCH}.tar.gz | tar -C /usr/local -xz
-ENV PATH $PATH:/root/go/bin:/usr/local/go/bin
+ENV PATH=$PATH:/root/go/bin:/usr/local/go/bin
 
 # Install grpc compilers
 RUN go install google.golang.org/protobuf/cmd/protoc-gen-go@v1.34.2 && \
@@ -39,15 +39,18 @@ RUN go install google.golang.org/protobuf/cmd/protoc-gen-go@v1.34.2 && \
 COPY --chmod=644 custom-ca-certs/* /usr/local/share/ca-certificates/
 RUN update-ca-certificates
 
+RUN test -n "$TARGETARCH" \
+    || (echo 'warn: missing $TARGETARCH, either set this `ARG` manually, or run using `docker buildkit`')
+
 # Use the variables in subsequent instructions
 RUN echo "Target Architecture: $TARGETARCH"
 RUN echo "Target Variant: $TARGETVARIANT"
 
 # Cuda
-ENV PATH /usr/local/cuda/bin:${PATH}
+ENV PATH=/usr/local/cuda/bin:${PATH}
 
 # HipBLAS requirements
-ENV PATH /opt/rocm/bin:${PATH}
+ENV PATH=/opt/rocm/bin:${PATH}
 
 # OpenBLAS requirements and stable diffusion
 RUN apt-get update && \
@@ -62,9 +65,6 @@ RUN ln -s /usr/include/opencv4/opencv2 /usr/include/opencv2
 
 WORKDIR /build
 
-RUN test -n "$TARGETARCH" \
-    || (echo 'warn: missing $TARGETARCH, either set this `ARG` manually, or run using `docker buildkit`')
-
 ###################################
 ###################################
 
@@ -217,13 +217,14 @@ RUN git clone --recurse-submodules --jobs 4 -b ${GRPC_VERSION} --depth 1 --shall
 ###################################
 ###################################
 
-# The builder target compiles LocalAI. This target is not the target that will be uploaded to the registry.
-# Adjustments to the build process should likely be made here.
-FROM requirements-drivers AS builder
+# The builder-base target has the arguments, variables, and copies shared between full builder images and the uncompiled devcontainer
+
+FROM requirements-drivers AS builder-base
 
 ARG GO_TAGS="stablediffusion tts p2p"
 ARG GRPC_BACKENDS
 ARG MAKEFLAGS
+ARG LD_FLAGS="-s -w"
 
 ENV GRPC_BACKENDS=${GRPC_BACKENDS}
 ENV GO_TAGS=${GO_TAGS}
@@ -231,14 +232,12 @@ ENV MAKEFLAGS=${MAKEFLAGS}
 ENV NVIDIA_DRIVER_CAPABILITIES=compute,utility
 ENV NVIDIA_REQUIRE_CUDA="cuda>=${CUDA_MAJOR_VERSION}.0"
 ENV NVIDIA_VISIBLE_DEVICES=all
+ENV LD_FLAGS=${LD_FLAGS}
+
+RUN echo "GO_TAGS: $GO_TAGS" && echo "TARGETARCH: $TARGETARCH"
 
 WORKDIR /build
 
-COPY . .
-COPY .git .
-RUN echo "GO_TAGS: $GO_TAGS"
-
-RUN make prepare
 
 # We need protoc installed, and the version in 22.04 is too old.  We will create one as part installing the GRPC build below
 # but that will also being in a newer version of absl which stablediffusion cannot compile with.  This version of protoc is only
@@ -256,6 +255,20 @@ RUN <<EOT bash
     fi
 EOT
 
+
+###################################
+###################################
+
+# The builder target compiles LocalAI. This target is not the target that will be uploaded to the registry.
+# Adjustments to the build process should likely be made here.
+FROM builder-base AS builder
+
+COPY . .
+COPY .git .
+
+RUN make prepare
+
+
 # stablediffusion does not tolerate a newer version of abseil, build it first
 RUN GRPC_BACKENDS=backend-assets/grpc/stablediffusion make build
 
@@ -276,6 +289,33 @@ RUN if [ ! -d "/build/sources/go-piper/piper-phonemize/pi/lib/" ]; then \
 ###################################
 ###################################
 
+# The devcontainer target is not used on CI. It is a target for developers to use locally -
+# rather than copying files it mounts them locally and leaves building to the developer
+
+FROM builder-base AS devcontainer
+
+ARG FFMPEG
+
+COPY --from=grpc /opt/grpc /usr/local
+
+# This is somewhat of a dirty hack as this dev machine has issues with stablediffusion... but it should also speed up devcontainers?
+# localai/localai:latest-aio-cpu
+COPY --from=builder /build/backend-assets/grpc/stablediffusion /build/backend-assets/grpc/stablediffusion
+
+# Add FFmpeg
+RUN if [ "${FFMPEG}" = "true" ]; then \
+        apt-get update && \
+        apt-get install -y --no-install-recommends \
+            ffmpeg && \
+        apt-get clean && \
+        rm -rf /var/lib/apt/lists/* \
+    ; fi
+
+RUN go install github.com/go-delve/delve/cmd/dlv@latest
+
+###################################
+###################################
+
 # This is the final target. The result of this target will be the image uploaded to the registry.
 # If you cannot find a more suitable place for an addition, this layer is a suitable place for it.
 FROM requirements-drivers
diff --git a/docker-compose.yaml b/docker-compose.yaml
index b6384c52..2306c28f 100644
--- a/docker-compose.yaml
+++ b/docker-compose.yaml
@@ -1,5 +1,3 @@
-version: '3.6'
-
 services:
   api:
     # See https://localai.io/basics/getting_started/#container-images for

From faadabea14c8b824d7dc7fd36f08704af6845b3b Mon Sep 17 00:00:00 2001
From: Ettore Di Giacinto <mudler@users.noreply.github.com>
Date: Wed, 14 Aug 2024 10:08:32 +0200
Subject: [PATCH 168/235] Update binaries.md

Signed-off-by: Ettore Di Giacinto <mudler@users.noreply.github.com>
---
 docs/content/docs/reference/binaries.md | 11 ++++++++++-
 1 file changed, 10 insertions(+), 1 deletion(-)

diff --git a/docs/content/docs/reference/binaries.md b/docs/content/docs/reference/binaries.md
index edefca75..7780864c 100644
--- a/docs/content/docs/reference/binaries.md
+++ b/docs/content/docs/reference/binaries.md
@@ -19,4 +19,13 @@ Otherwise, here are the links to the binaries:
 | --- | --- |
 | Linux (amd64)  | [Download](https://github.com/mudler/LocalAI/releases/download/{{< version >}}/local-ai-Linux-x86_64) |
 | Linux (arm64)  | [Download](https://github.com/mudler/LocalAI/releases/download/{{< version >}}/local-ai-Linux-arm64) |
-| MacOS (arm64)  | [Download](https://github.com/mudler/LocalAI/releases/download/{{< version >}}/local-ai-Darwin-arm64) |
\ No newline at end of file
+| MacOS (arm64)  | [Download](https://github.com/mudler/LocalAI/releases/download/{{< version >}}/local-ai-Darwin-arm64) |
+
+
+{{% alert icon="⚡" context="warning" %}}
+Binaries do have limited support compared to container images:
+
+- Python-based backends are not shipped with binaries (e.g. `bark`, `diffusers` or `transformers`)
+- MacOS binaries and Linux-arm64 do not ship TTS nor `stablediffusion-cpp` backends
+- Linux binaries do not ship `stablediffusion-cpp` backend
+{{% /alert %}}

From d6c4e751f23b4c6eb6d103490ee9fd4738e34667 Mon Sep 17 00:00:00 2001
From: Ettore Di Giacinto <mudler@users.noreply.github.com>
Date: Wed, 14 Aug 2024 12:53:29 +0200
Subject: [PATCH 169/235] feat(explorer): visual improvements (#3247)

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
---
 core/http/views/explorer.html | 67 +++++++++++++++++++++++++++++------
 1 file changed, 56 insertions(+), 11 deletions(-)

diff --git a/core/http/views/explorer.html b/core/http/views/explorer.html
index 91cb9720..033fa546 100644
--- a/core/http/views/explorer.html
+++ b/core/http/views/explorer.html
@@ -152,6 +152,35 @@
             right: 10px;
             color: #e2e8f0;
         }
+        .fa-circle-nodes {
+           /* font-size: 100px; /* Adjust the size as needed */
+            animation: rotateCircleNodes 8s linear infinite; /* Slow and fluid rotation */
+            display: inline-block;
+        }
+
+        @keyframes rotateCircleNodes {
+            0% { transform: rotate(0deg); }
+            100% { transform: rotate(360deg); }
+        }
+        /* Animation for the warning box */
+        .fa-flask {
+         /*  font-size: 100px; /* Adjust the size as needed */
+            animation: shakeFlask 3s ease-in-out infinite; /* Smooth easing and longer duration for fluidity */
+            transform-origin: bottom center;
+        }
+
+        @keyframes shakeFlask {
+            0%, 10% { transform: rotate(0deg); } /* Start and end still */
+            20% { transform: rotate(-10deg); } /* Smooth transition to left */
+            30% { transform: rotate(10deg); } /* Smooth transition to right */
+            40% { transform: rotate(-8deg); } /* Smooth transition to left */
+            50% { transform: rotate(8deg); } /* Smooth transition to right */
+            60% { transform: rotate(-5deg); } /* Smooth transition to left */
+            70% { transform: rotate(5deg); } /* Smooth transition to right */
+            80% { transform: rotate(-2deg); } /* Smooth transition to left */
+            90% { transform: rotate(2deg); } /* Smooth transition to right */
+            100% { transform: rotate(0deg); } /* Return to center */
+        }
 </style>
 
 <body class="bg-gray-900 text-gray-200">
@@ -159,14 +188,23 @@
         {{template "views/partials/navbar_explorer" .}}
 
         <header class="text-center py-12">
-            <h1 class="text-5xl font-bold text-gray-100">Network Clusters Explorer</h1>
-            <p class="mt-4 text-lg">View the clusters and workers available in each network.</p>
+            <h1 class="text-5xl font-bold text-gray-100">
+                <i class="fa-solid fa-circle-nodes mr-2"></i> Network Clusters Explorer   
+
+            </h1>
+            <p class="mt-4 text-lg">
+                View the clusters and workers available in each network.
+                <a href="https://localai.io/features/distribute/" target="_blank">
+                    <i class="fas fa-circle-info pr-2"></i>
+                </a>
+            </p>
+
         </header>
 
         <div class="container mx-auto px-4 flex-grow">
         <!-- Warning Box -->
         <div class="warning-box bg-yellow-100 text-gray-800 mb-20 pt-5 pb-5 pr-5 pl-5 text-lg">
-            <i class="fa-solid fa-triangle-exclamation"></i>
+            <i class="fa-solid fa-triangle-exclamation"></i><i class="fa-solid fa-flask"></i>
             The explorer is a global, community-driven tool to share network tokens and view available clusters in the globe.
             Anyone can use the tokens to offload computation and use the clusters available or share resources.
             This is provided without any warranty. Use it at your own risk. We are not responsible for any potential harm or misuse. Sharing tokens globally allows anyone from the internet to use your instances. 
@@ -221,23 +259,30 @@
             <!-- Display Networks -->
             <template x-for="network in networks" :key="network.name">
                 <div class="network-card">
-                    <div class="network-title" x-text="network.name"></div>
+                    <i class="fa-solid fa-circle-nodes mr-2"></i><span class="network-title font-bold mb-4 mt-1" x-text="network.name"></span>
                     <div class="token-box" @click="copyToken(network.token)">
-                        <i class="fa-solid fa-copy copy-icon"></i>
-                        Token (click to copy): <br>
+                        <p class="text-lg font-bold mb-4 mt-1">
+                            <i class="fa-solid fa-copy copy-icon"></i>
+                            <i class="fa-solid fa-key mr-2"></i>Token (click to copy): 
+                        </p>
                         <span class="token-text" x-text="network.token"></span>
                     </div>
 
                     <div class="cluster">
-                        <p class="text-lg">Description</p>
+                        <p class="text-lg font-bold mb-4 mt-1"><i class="fa-solid fa-book mr-2"></i> Description</p>
                         <p x-text="network.description"></p>
                     </div>
-
+                    <h2 class="text-3xl font-bold mb-4 mt-4">Available Clusters in this network</h2>
                     <template x-for="cluster in network.Clusters" :key="cluster.NetworkID + cluster.Type">
                         <div class="cluster">
-                            <div class="cluster-title" x-text="'Cluster Type: ' + cluster.Type"></div>
-                            <p x-show="cluster.NetworkID" x-text="'Network ID: ' + (cluster.NetworkID || 'N/A')"></p>
-                            <p x-text="'Number of Workers: ' + cluster.Workers.length"></p>
+                       
+                            <div class="cluster-title"></div>
+                            <span class="inline-block bg-orange-500 text-white py-1 px-3 rounded-full text-xs"  x-text="'Cluster Type: ' + cluster.Type">
+                            </span>
+                            <span class="inline-block bg-orange-500 text-white py-1 px-3 rounded-full text-xs" x-show="cluster.NetworkID" x-text="'Network ID: ' + (cluster.NetworkID || 'N/A')">
+                            </span>
+                            <span class="inline-block bg-blue-500 text-white py-1 px-3 rounded-full text-xs"  x-text="'Number of Workers: ' + cluster.Workers.length">
+                            </span>
                         </div>
                     </template>
                 </div>

From c50e0edcb80c92d3048263b16b05deaa05148d33 Mon Sep 17 00:00:00 2001
From: Ettore Di Giacinto <mudler@users.noreply.github.com>
Date: Wed, 14 Aug 2024 12:53:42 +0200
Subject: [PATCH 170/235] feat(gallery): lazy load images (#3246)

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
---
 core/http/elements/gallery.go | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/core/http/elements/gallery.go b/core/http/elements/gallery.go
index 575ea87d..91a12310 100644
--- a/core/http/elements/gallery.go
+++ b/core/http/elements/gallery.go
@@ -501,8 +501,9 @@ func ListModels(models []*gallery.GalleryModel, processTracker ProcessTracker, g
 				},
 					elem.Img(attrs.Props{
 						//	"class": "rounded-t-lg object-fit object-center h-96",
-						"class": "rounded-t-lg max-h-48 max-w-96 object-cover mt-3",
-						"src":   m.Icon,
+						"class":   "rounded-t-lg max-h-48 max-w-96 object-cover mt-3",
+						"src":     m.Icon,
+						"loading": "lazy",
 					}),
 				),
 			),

From 121f143fc018749c6d728b0b3e9fe74428ef0dbd Mon Sep 17 00:00:00 2001
From: "LocalAI [bot]" <139863280+localai-bot@users.noreply.github.com>
Date: Wed, 14 Aug 2024 23:44:38 +0200
Subject: [PATCH 171/235] chore: :arrow_up: Update ggerganov/llama.cpp to
 `5fd89a70ead34d1a17015ddecad05aaa2490ca46` (#3248)

:arrow_up: Update ggerganov/llama.cpp

Signed-off-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com>
Co-authored-by: mudler <2420543+mudler@users.noreply.github.com>
---
 Makefile | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/Makefile b/Makefile
index eb507acd..45613419 100644
--- a/Makefile
+++ b/Makefile
@@ -8,7 +8,7 @@ DETECT_LIBS?=true
 # llama.cpp versions
 GOLLAMA_REPO?=https://github.com/go-skynet/go-llama.cpp
 GOLLAMA_VERSION?=2b57a8ae43e4699d3dc5d1496a1ccd42922993be
-CPPLLAMA_VERSION?=06943a69f678fb32829ff06d9c18367b17d4b361
+CPPLLAMA_VERSION?=5fd89a70ead34d1a17015ddecad05aaa2490ca46
 
 # go-rwkv version
 RWKV_REPO?=https://github.com/donomii/go-rwkv.cpp

From 714e80abce4405dcd9c624c1257397e7ed22d5e1 Mon Sep 17 00:00:00 2001
From: Ettore Di Giacinto <mudler@users.noreply.github.com>
Date: Thu, 15 Aug 2024 10:07:26 +0200
Subject: [PATCH 172/235] models(gallery): add edgerunner-tactical-7b (#3249)

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
---
 gallery/index.yaml | 17 +++++++++++++++++
 1 file changed, 17 insertions(+)

diff --git a/gallery/index.yaml b/gallery/index.yaml
index eb7515ba..26ed6062 100644
--- a/gallery/index.yaml
+++ b/gallery/index.yaml
@@ -868,6 +868,23 @@
     - filename: calme-2.2-qwen2-72b.Q4_K_M.gguf
       sha256: 95b9613df0abe6c1b6b7b017d7cc8bcf19b46c29f92a503dcc6da1704b12b402
       uri: huggingface://MaziyarPanahi/calme-2.2-qwen2-72b-GGUF/calme-2.2-qwen2-72b.Q4_K_M.gguf
+- !!merge <<: *qwen2
+  name: "edgerunner-tactical-7b"
+  icon: https://cdn-uploads.huggingface.co/production/uploads/668ed3dcd857a9ca47edb75c/tSyuw39VtmEqvC_wptTDf.png
+  urls:
+    - https://huggingface.co/edgerunner-ai/EdgeRunner-Tactical-7B
+    - https://huggingface.co/RichardErkhov/edgerunner-ai_-_EdgeRunner-Tactical-7B-gguf
+  description: |
+    EdgeRunner-Tactical-7B is a powerful and efficient language model for the edge. Our mission is to build Generative AI for the edge that is safe, secure, and transparent. To that end, the EdgeRunner team is proud to release EdgeRunner-Tactical-7B, the most powerful language model for its size to date.
+
+    EdgeRunner-Tactical-7B is a 7 billion parameter language model that delivers powerful performance while demonstrating the potential of running state-of-the-art (SOTA) models at the edge.
+  overrides:
+    parameters:
+      model: EdgeRunner-Tactical-7B.Q4_K_M.gguf
+  files:
+    - filename: EdgeRunner-Tactical-7B.Q4_K_M.gguf
+      sha256: 90ca9c3ab19e5d1de4499e3f988cc0ba3d205e50285d7c89de6f0a4c525bf204
+      uri: huggingface://RichardErkhov/edgerunner-ai_-_EdgeRunner-Tactical-7B-gguf/EdgeRunner-Tactical-7B.Q4_K_M.gguf
 - &mistral03
   ## START Mistral
   url: "github:mudler/LocalAI/gallery/mistral-0.3.yaml@master"

From 8bbf09370cebec062141c95fac9c4791d7029c28 Mon Sep 17 00:00:00 2001
From: Ettore Di Giacinto <mudler@users.noreply.github.com>
Date: Fri, 16 Aug 2024 00:02:21 +0200
Subject: [PATCH 173/235] models(gallery): add hermes-3 (#3252)

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
---
 gallery/hermes-2-pro-mistral.yaml |  1 -
 gallery/index.yaml                | 30 ++++++++++++++++++++++++++++++
 2 files changed, 30 insertions(+), 1 deletion(-)

diff --git a/gallery/hermes-2-pro-mistral.yaml b/gallery/hermes-2-pro-mistral.yaml
index 6ef42db7..22a5fb3a 100644
--- a/gallery/hermes-2-pro-mistral.yaml
+++ b/gallery/hermes-2-pro-mistral.yaml
@@ -11,7 +11,6 @@ config_file: |
   - "<|end_of_text|>"
   function:
     disable_no_action: true
-    return_name_in_function_response: true
     grammar:
       # Uncomment the line below to enable grammar matching for JSON results if the model is breaking
       # the output. This will make the model more accurate and won't break the JSON output.
diff --git a/gallery/index.yaml b/gallery/index.yaml
index 26ed6062..28ad036a 100644
--- a/gallery/index.yaml
+++ b/gallery/index.yaml
@@ -4604,6 +4604,36 @@
     - filename: "Hermes-2-Pro-Llama-3-8B-Q8_0.gguf"
       sha256: "d138388cfda04d185a68eaf2396cf7a5cfa87d038a20896817a9b7cf1806f532"
       uri: "huggingface://NousResearch/Hermes-2-Pro-Llama-3-8B-GGUF/Hermes-2-Pro-Llama-3-8B-Q8_0.gguf"
+- !!merge <<: *hermes-2-pro-mistral
+  name: "hermes-3-llama-3.1-8b"
+  icon: https://cdn-uploads.huggingface.co/production/uploads/6317aade83d8d2fd903192d9/bMcZ3sNNQK8SRZpHXBmwM.jpeg
+  urls:
+    - https://huggingface.co/NousResearch/Hermes-3-Llama-3.1-8B
+    - https://huggingface.co/NousResearch/Hermes-3-Llama-3.1-8B-GGUF
+  description: |
+    Hermes 3 is a generalist language model developed by Nous Research. It is an advanced agentic model with improved roleplaying, reasoning, multi-turn conversation, long context coherence, and generalist assistant capabilities. The model is built on top of the Llama-3 architecture and has been fine-tuned to achieve superior performance in various tasks. It is designed to be a powerful and reliable tool for solving complex problems and assisting users in achieving their goals. Hermes 3 can be used for a wide range of applications, including research, education, and personal assistant tasks. It is available on the Hugging Face model hub for easy access and integration into existing workflows.
+  overrides:
+    parameters:
+      model: Hermes-3-Llama-3.1-8B.Q4_K_M.gguf
+  files:
+    - filename: Hermes-3-Llama-3.1-8B.Q4_K_M.gguf
+      sha256: d4403ce5a6e930f4c2509456388c20d633a15ff08dd52ef3b142ff1810ec3553
+      uri: huggingface://NousResearch/Hermes-3-Llama-3.1-8B-GGUF/Hermes-3-Llama-3.1-8B.Q4_K_M.gguf
+- !!merge <<: *hermes-2-pro-mistral
+  name: "hermes-3-llama-3.1-70b"
+  icon: https://cdn-uploads.huggingface.co/production/uploads/6317aade83d8d2fd903192d9/vG6j5WxHX09yj32vgjJlI.jpeg
+  urls:
+    - https://huggingface.co/NousResearch/Hermes-3-Llama-3.1-70B
+    - https://huggingface.co/NousResearch/Hermes-3-Llama-3.1-70B-GGUF
+  description: |
+    Hermes 3 is a generalist language model with many improvements over Hermes 2, including advanced agentic capabilities, much better roleplaying, reasoning, multi-turn conversation, long context coherence, and improvements across the board. It is designed to focus on aligning LLMs to the user, with powerful steering capabilities and control given to the end user. The model uses ChatML as the prompt format, opening up a much more structured system for engaging the LLM in multi-turn chat dialogue. It also supports function calling and structured output capabilities, generalist assistant capabilities, and improved code generation skills.
+  overrides:
+    parameters:
+      model: Hermes-3-Llama-3.1-70B.Q4_K_M.gguf
+  files:
+    - filename: Hermes-3-Llama-3.1-70B.Q4_K_M.gguf
+      sha256: 955c2f42caade4278f3c9dbffa32bb74572652b20e49e5340e782de3585bbe3f
+      uri: huggingface://NousResearch/Hermes-3-Llama-3.1-70B-GGUF/Hermes-3-Llama-3.1-70B.Q4_K_M.gguf
 - !!merge <<: *hermes-2-pro-mistral
   name: "biomistral-7b"
   description: |

From 409e2d348eb9d21bcbbf3c94d4199ccb79da68b2 Mon Sep 17 00:00:00 2001
From: Ettore Di Giacinto <mudler@users.noreply.github.com>
Date: Fri, 16 Aug 2024 01:20:21 +0200
Subject: [PATCH 174/235] chore(deps): bump llama.cpp, rename
 `llama_add_bos_token` (#3253)

deps(llama.cpp): bump, rename llama_add_bos_token

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
---
 Makefile                          | 2 +-
 backend/cpp/llama/grpc-server.cpp | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/Makefile b/Makefile
index 45613419..0b237191 100644
--- a/Makefile
+++ b/Makefile
@@ -8,7 +8,7 @@ DETECT_LIBS?=true
 # llama.cpp versions
 GOLLAMA_REPO?=https://github.com/go-skynet/go-llama.cpp
 GOLLAMA_VERSION?=2b57a8ae43e4699d3dc5d1496a1ccd42922993be
-CPPLLAMA_VERSION?=5fd89a70ead34d1a17015ddecad05aaa2490ca46
+CPPLLAMA_VERSION?=4b9afbbe9037f8a2d659097c0c7d9fce32c6494c
 
 # go-rwkv version
 RWKV_REPO?=https://github.com/donomii/go-rwkv.cpp
diff --git a/backend/cpp/llama/grpc-server.cpp b/backend/cpp/llama/grpc-server.cpp
index e8701d36..57ab46fe 100644
--- a/backend/cpp/llama/grpc-server.cpp
+++ b/backend/cpp/llama/grpc-server.cpp
@@ -480,7 +480,7 @@ struct llama_server_context
 
         n_ctx = llama_n_ctx(ctx);
 
-        add_bos_token = llama_should_add_bos_token(model);
+        add_bos_token = llama_add_bos_token(model);
 
         return true;
     }

From be55fce9be2efbaed3c1efff8ed6c186ebb0f3e0 Mon Sep 17 00:00:00 2001
From: Dave <dave@gray101.com>
Date: Fri, 16 Aug 2024 03:49:55 -0400
Subject: [PATCH 175/235] fix: devcontainer part 1 (#3254)

`-r` fix and checkout within volume for performance

Signed-off-by: Dave Lee <dave@gray101.com>
---
 .devcontainer/devcontainer.json               | 3 ++-
 .devcontainer/docker-compose-devcontainer.yml | 5 +++--
 2 files changed, 5 insertions(+), 3 deletions(-)

diff --git a/.devcontainer/devcontainer.json b/.devcontainer/devcontainer.json
index a111dbfd..f82b62f9 100644
--- a/.devcontainer/devcontainer.json
+++ b/.devcontainer/devcontainer.json
@@ -19,5 +19,6 @@
         }
     },
     "forwardPorts": [8080, 3000],
-    "postStartCommand": "make prepare && cp /build/backend-assets /workdir/backend-assets"
+    "postCreateCommand": "git clone https://github.com/mudler/LocalAI.git .",
+    "postStartCommand": "make prepare && cp -r /build/backend-assets /workdir/backend-assets"
 }
\ No newline at end of file
diff --git a/.devcontainer/docker-compose-devcontainer.yml b/.devcontainer/docker-compose-devcontainer.yml
index e36492e9..f7de406e 100644
--- a/.devcontainer/docker-compose-devcontainer.yml
+++ b/.devcontainer/docker-compose-devcontainer.yml
@@ -13,7 +13,7 @@ services:
     ports:
       - 8080:8080
     volumes:
-      - ..:/workspace:cached
+      - localai_workspace:/workspace
     command: /bin/sh -c "while sleep 1000; do :; done"
     cap_add:
       - SYS_PTRACE
@@ -42,4 +42,5 @@ services:
     volumes:
       - ./grafana:/etc/grafana/provisioning/datasources
 volumes:
-  prom_data:
\ No newline at end of file
+  prom_data:
+  localai_workspace:
\ No newline at end of file

From f18862fb4406ab775cd36998e603cce7bdac4924 Mon Sep 17 00:00:00 2001
From: Ettore Di Giacinto <mudler@users.noreply.github.com>
Date: Fri, 16 Aug 2024 09:57:23 +0200
Subject: [PATCH 176/235] fix: update gallery entry

Signed-off-by: Ettore Di Giacinto <mudler@users.noreply.github.com>
---
 gallery/index.yaml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/gallery/index.yaml b/gallery/index.yaml
index 28ad036a..5152317b 100644
--- a/gallery/index.yaml
+++ b/gallery/index.yaml
@@ -1511,7 +1511,7 @@
   files:
     - filename: Gemmasutra-Mini-2B-v1i-Q4_K_M.gguf
       sha256: 29ba3db911fbadef4452ba757ddd9ce58fb892b7a872f19eefd0743c961797fb
-      uri: huggingface://TheDrummer/Gemmasutra-Mini-2B-v1-GGUF/Gemmasutra-Mini-2B-v1i-Q4_K_M.gguf
+      uri: huggingface://TheDrummer/Gemmasutra-Mini-2B-v1-GGUF/Gemmasutra-Mini-2B-v1-Q4_K_M.gguf
 - !!merge <<: *gemma
   name: "tarnished-9b-i1"
   icon: https://huggingface.co/lodrick-the-lafted/tarnished-9b/resolve/main/nox.jpg

From 3457acc48b52073be0f036a59da8918eafc6cfa1 Mon Sep 17 00:00:00 2001
From: Ettore Di Giacinto <mudler@users.noreply.github.com>
Date: Fri, 16 Aug 2024 19:34:36 +0200
Subject: [PATCH 177/235] chore(explorer): add join instructions (#3255)

* feat(explorer): give CLI instructions to join federated clusters

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>

* debug message

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>

---------

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
---
 core/http/views/explorer.html | 23 ++++++++++++++++++++---
 core/p2p/federated_server.go  |  1 +
 2 files changed, 21 insertions(+), 3 deletions(-)

diff --git a/core/http/views/explorer.html b/core/http/views/explorer.html
index 033fa546..1db3dc9b 100644
--- a/core/http/views/explorer.html
+++ b/core/http/views/explorer.html
@@ -9,6 +9,9 @@
         color: #e2e8f0;
         font-family: 'Segoe UI', Tahoma, Geneva, Verdana, sans-serif;
     }
+    .token {
+        word-break: break-all;
+    }
     .container {
         max-width: 800px;
         margin: 0 auto;
@@ -275,14 +278,28 @@
                     <h2 class="text-3xl font-bold mb-4 mt-4">Available Clusters in this network</h2>
                     <template x-for="cluster in network.Clusters" :key="cluster.NetworkID + cluster.Type">
                         <div class="cluster">
-                       
                             <div class="cluster-title"></div>
                             <span class="inline-block bg-orange-500 text-white py-1 px-3 rounded-full text-xs"  x-text="'Cluster Type: ' + cluster.Type">
                             </span>
+
                             <span class="inline-block bg-orange-500 text-white py-1 px-3 rounded-full text-xs" x-show="cluster.NetworkID" x-text="'Network ID: ' + (cluster.NetworkID || 'N/A')">
                             </span>
                             <span class="inline-block bg-blue-500 text-white py-1 px-3 rounded-full text-xs"  x-text="'Number of Workers: ' + cluster.Workers.length">
                             </span>
+                            <!-- Give commands and instructions to join the network -->
+                            <span class="inline-block token-box text-white py-1 px-3 text-xs" x-show="cluster.Type == 'federated'" >
+                                <p class="text-lg font-bold mb-4 mt-1">
+                                    <i class="fa-solid fa-copy copy-icon float-right"></i>
+                                    Command to connect (click to copy): 
+                                </p>
+                                <code class="block bg-gray-700 text-yellow-300 p-4 rounded-lg break-words"  @click="copyToken($el.textContent)" >
+                                    docker run -d --restart=always -e ADDRESS=":80" -e LOCALAI_P2P_NETWORK_ID=<span class="token" x-text="cluster.NetworkID"></span> -e LOCALAI_P2P_LOGLEVEL=debug --name local-ai -e TOKEN="<span class="token" x-text="network.token"></span>" --net host -ti localai/localai:master-ffmpeg-core federated --debug
+                                </code>
+                                or via CLI:
+                                <code class="block bg-gray-700 text-yellow-300 p-4 rounded-lg break-words"  @click="copyToken($el.textContent)" >
+                                   ADDRESS=":80" LOCALAI_P2P_NETWORK_ID=<span class="token" x-text="cluster.NetworkID"></span> LOCALAI_P2P_LOGLEVEL=debug TOKEN="<span class="token" x-text="network.token"></span>" local-ai federated --debug
+                                </code>
+                            </span>
                         </div>
                     </template>
                 </div>
@@ -361,8 +378,8 @@
                     copyToken(token) {
                         navigator.clipboard.writeText(token)
                         .then(() => {
-                            console.log('Token copied to clipboard:', token);
-                            alert('Token copied to clipboard!');
+                            console.log('Text copied to clipboard:', token);
+                            alert('Text copied to clipboard!');
                         })
                         .catch(err => {
                             console.error('Failed to copy token:', err);
diff --git a/core/p2p/federated_server.go b/core/p2p/federated_server.go
index 6d7ccd46..18080b2f 100644
--- a/core/p2p/federated_server.go
+++ b/core/p2p/federated_server.go
@@ -98,6 +98,7 @@ func (fs *FederatedServer) proxy(ctx context.Context, node *node.Node) error {
 
 					tunnelAddr = fs.SelectLeastUsedServer()
 					if tunnelAddr == "" {
+						log.Debug().Msgf("Least used server not found, selecting random")
 						tunnelAddr = fs.RandomServer()
 					}
 

From d6b3fbb4add485405d6930f3feab772392d0b246 Mon Sep 17 00:00:00 2001
From: "LocalAI [bot]" <139863280+localai-bot@users.noreply.github.com>
Date: Fri, 16 Aug 2024 23:50:54 +0200
Subject: [PATCH 178/235] chore: :arrow_up: Update ggerganov/llama.cpp to
 `8b3befc0e2ed8fb18b903735831496b8b0c80949` (#3257)

:arrow_up: Update ggerganov/llama.cpp

Signed-off-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com>
Co-authored-by: mudler <2420543+mudler@users.noreply.github.com>
---
 Makefile | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/Makefile b/Makefile
index 0b237191..67e4dee3 100644
--- a/Makefile
+++ b/Makefile
@@ -8,7 +8,7 @@ DETECT_LIBS?=true
 # llama.cpp versions
 GOLLAMA_REPO?=https://github.com/go-skynet/go-llama.cpp
 GOLLAMA_VERSION?=2b57a8ae43e4699d3dc5d1496a1ccd42922993be
-CPPLLAMA_VERSION?=4b9afbbe9037f8a2d659097c0c7d9fce32c6494c
+CPPLLAMA_VERSION?=8b3befc0e2ed8fb18b903735831496b8b0c80949
 
 # go-rwkv version
 RWKV_REPO?=https://github.com/donomii/go-rwkv.cpp

From 7278bf3de898b136adaf34917a1a4783eac65d2e Mon Sep 17 00:00:00 2001
From: Ettore Di Giacinto <mudler@users.noreply.github.com>
Date: Sat, 17 Aug 2024 08:28:52 +0200
Subject: [PATCH 179/235] chore: allow to disable gallery endpoints, improve
 p2p connection handling (#3256)

* Add more debug messages

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>

* feat: allow to disable gallery endpoints

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>

* improve p2p messaging

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>

* improve error handling

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>

* Make sure to close the listening socket when context is exhausted

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>

---------

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
---
 core/cli/run.go                   |  5 +++++
 core/config/application_config.go |  6 ++++++
 core/http/routes/localai.go       | 21 +++++++++++----------
 core/p2p/federated.go             |  8 ++++----
 core/p2p/federated_server.go      | 10 ++++------
 core/p2p/p2p.go                   | 29 ++++++++++++++---------------
 6 files changed, 44 insertions(+), 35 deletions(-)

diff --git a/core/cli/run.go b/core/cli/run.go
index 707f6afb..b2f73ef9 100644
--- a/core/cli/run.go
+++ b/core/cli/run.go
@@ -64,6 +64,7 @@ type RunCMD struct {
 	EnableWatchdogBusy     bool     `env:"LOCALAI_WATCHDOG_BUSY,WATCHDOG_BUSY" default:"false" help:"Enable watchdog for stopping backends that are busy longer than the watchdog-busy-timeout" group:"backends"`
 	WatchdogBusyTimeout    string   `env:"LOCALAI_WATCHDOG_BUSY_TIMEOUT,WATCHDOG_BUSY_TIMEOUT" default:"5m" help:"Threshold beyond which a busy backend should be stopped" group:"backends"`
 	Federated              bool     `env:"LOCALAI_FEDERATED,FEDERATED" help:"Enable federated instance" group:"federated"`
+	DisableGalleryEndpoint bool     `env:"LOCALAI_DISABLE_GALLERY_ENDPOINT,DISABLE_GALLERY_ENDPOINT" help:"Disable the gallery endpoints" group:"api"`
 }
 
 func (r *RunCMD) Run(ctx *cliContext.Context) error {
@@ -164,6 +165,10 @@ func (r *RunCMD) Run(ctx *cliContext.Context) error {
 		opts = append(opts, config.DisableWebUI)
 	}
 
+	if r.DisableGalleryEndpoint {
+		opts = append(opts, config.DisableGalleryEndpoint)
+	}
+
 	if idleWatchDog || busyWatchDog {
 		opts = append(opts, config.EnableWatchDog)
 		if idleWatchDog {
diff --git a/core/config/application_config.go b/core/config/application_config.go
index 6e8c46e1..947c4f13 100644
--- a/core/config/application_config.go
+++ b/core/config/application_config.go
@@ -57,6 +57,8 @@ type ApplicationConfig struct {
 	ModelsURL []string
 
 	WatchDogBusyTimeout, WatchDogIdleTimeout time.Duration
+
+	DisableGalleryEndpoint bool
 }
 
 type AppOption func(*ApplicationConfig)
@@ -131,6 +133,10 @@ var EnableWatchDogIdleCheck = func(o *ApplicationConfig) {
 	o.WatchDogIdle = true
 }
 
+var DisableGalleryEndpoint = func(o *ApplicationConfig) {
+	o.DisableGalleryEndpoint = true
+}
+
 var EnableWatchDogBusyCheck = func(o *ApplicationConfig) {
 	o.WatchDog = true
 	o.WatchDogBusy = true
diff --git a/core/http/routes/localai.go b/core/http/routes/localai.go
index 9c420010..105991e8 100644
--- a/core/http/routes/localai.go
+++ b/core/http/routes/localai.go
@@ -21,17 +21,18 @@ func RegisterLocalAIRoutes(app *fiber.App,
 	app.Get("/swagger/*", swagger.HandlerDefault) // default
 
 	// LocalAI API endpoints
+	if !appConfig.DisableGalleryEndpoint {
+		modelGalleryEndpointService := localai.CreateModelGalleryEndpointService(appConfig.Galleries, appConfig.ModelPath, galleryService)
+		app.Post("/models/apply", auth, modelGalleryEndpointService.ApplyModelGalleryEndpoint())
+		app.Post("/models/delete/:name", auth, modelGalleryEndpointService.DeleteModelGalleryEndpoint())
 
-	modelGalleryEndpointService := localai.CreateModelGalleryEndpointService(appConfig.Galleries, appConfig.ModelPath, galleryService)
-	app.Post("/models/apply", auth, modelGalleryEndpointService.ApplyModelGalleryEndpoint())
-	app.Post("/models/delete/:name", auth, modelGalleryEndpointService.DeleteModelGalleryEndpoint())
-
-	app.Get("/models/available", auth, modelGalleryEndpointService.ListModelFromGalleryEndpoint())
-	app.Get("/models/galleries", auth, modelGalleryEndpointService.ListModelGalleriesEndpoint())
-	app.Post("/models/galleries", auth, modelGalleryEndpointService.AddModelGalleryEndpoint())
-	app.Delete("/models/galleries", auth, modelGalleryEndpointService.RemoveModelGalleryEndpoint())
-	app.Get("/models/jobs/:uuid", auth, modelGalleryEndpointService.GetOpStatusEndpoint())
-	app.Get("/models/jobs", auth, modelGalleryEndpointService.GetAllStatusEndpoint())
+		app.Get("/models/available", auth, modelGalleryEndpointService.ListModelFromGalleryEndpoint())
+		app.Get("/models/galleries", auth, modelGalleryEndpointService.ListModelGalleriesEndpoint())
+		app.Post("/models/galleries", auth, modelGalleryEndpointService.AddModelGalleryEndpoint())
+		app.Delete("/models/galleries", auth, modelGalleryEndpointService.RemoveModelGalleryEndpoint())
+		app.Get("/models/jobs/:uuid", auth, modelGalleryEndpointService.GetOpStatusEndpoint())
+		app.Get("/models/jobs", auth, modelGalleryEndpointService.GetAllStatusEndpoint())
+	}
 
 	app.Post("/tts", auth, localai.TTSEndpoint(cl, ml, appConfig))
 
diff --git a/core/p2p/federated.go b/core/p2p/federated.go
index 8e468ef6..454ddc1b 100644
--- a/core/p2p/federated.go
+++ b/core/p2p/federated.go
@@ -80,7 +80,7 @@ func (fs *FederatedServer) SelectLeastUsedServer() string {
 	fs.Lock()
 	defer fs.Unlock()
 
-	log.Debug().Any("request_table", fs.requestTable).Msgf("Current request table")
+	log.Debug().Any("request_table", fs.requestTable).Msgf("SelectLeastUsedServer()")
 
 	// cycle over requestTable and find the entry with the lower number
 	// if there are multiple entries with the same number, select one randomly
@@ -93,7 +93,7 @@ func (fs *FederatedServer) SelectLeastUsedServer() string {
 			minKey = k
 		}
 	}
-	log.Debug().Any("requests_served", min).Msgf("Selected tunnel %s", minKey)
+	log.Debug().Any("requests_served", min).Any("request_table", fs.requestTable).Msgf("Selected tunnel %s", minKey)
 
 	return minKey
 }
@@ -104,7 +104,7 @@ func (fs *FederatedServer) RecordRequest(nodeID string) {
 	// increment the counter for the nodeID in the requestTable
 	fs.requestTable[nodeID]++
 
-	log.Debug().Any("request_table", fs.requestTable).Msgf("Current request table")
+	log.Debug().Any("request_table", fs.requestTable).Any("request", nodeID).Msgf("Recording request")
 }
 
 func (fs *FederatedServer) ensureRecordExist(nodeID string) {
@@ -114,5 +114,5 @@ func (fs *FederatedServer) ensureRecordExist(nodeID string) {
 		fs.requestTable[nodeID] = 0
 	}
 
-	log.Debug().Any("request_table", fs.requestTable).Msgf("Current request table")
+	log.Debug().Any("request_table", fs.requestTable).Any("request", nodeID).Msgf("Ensure record exists")
 }
diff --git a/core/p2p/federated_server.go b/core/p2p/federated_server.go
index 18080b2f..acd6e7bf 100644
--- a/core/p2p/federated_server.go
+++ b/core/p2p/federated_server.go
@@ -46,7 +46,10 @@ func (fs *FederatedServer) proxy(ctx context.Context, node *node.Node) error {
 		return err
 	}
 	//	ll.Info("Binding local port on", srcaddr)
-
+	go func() {
+		<-ctx.Done()
+		l.Close()
+	}()
 	ledger, _ := node.Ledger()
 
 	// Announce ourselves so nodes accepts our connection
@@ -54,17 +57,12 @@ func (fs *FederatedServer) proxy(ctx context.Context, node *node.Node) error {
 		ctx,
 		10*time.Second,
 		func() {
-			// Retrieve current ID for ip in the blockchain
-			//_, found := ledger.GetKey(protocol.UsersLedgerKey, node.Host().ID().String())
-			// If mismatch, update the blockchain
-			//if !found {
 			updatedMap := map[string]interface{}{}
 			updatedMap[node.Host().ID().String()] = &types.User{
 				PeerID:    node.Host().ID().String(),
 				Timestamp: time.Now().String(),
 			}
 			ledger.Add(protocol.UsersLedgerKey, updatedMap)
-			//	}
 		},
 	)
 
diff --git a/core/p2p/p2p.go b/core/p2p/p2p.go
index af2106be..758cb621 100644
--- a/core/p2p/p2p.go
+++ b/core/p2p/p2p.go
@@ -51,6 +51,11 @@ func allocateLocalService(ctx context.Context, node *node.Node, listenAddr, serv
 		zlog.Error().Err(err).Msg("Error listening")
 		return err
 	}
+	go func() {
+		<-ctx.Done()
+		l.Close()
+	}()
+
 	//	ll.Info("Binding local port on", srcaddr)
 
 	ledger, _ := node.Ledger()
@@ -60,17 +65,12 @@ func allocateLocalService(ctx context.Context, node *node.Node, listenAddr, serv
 		ctx,
 		10*time.Second,
 		func() {
-			// Retrieve current ID for ip in the blockchain
-			//_, found := ledger.GetKey(protocol.UsersLedgerKey, node.Host().ID().String())
-			// If mismatch, update the blockchain
-			//if !found {
 			updatedMap := map[string]interface{}{}
 			updatedMap[node.Host().ID().String()] = &types.User{
 				PeerID:    node.Host().ID().String(),
 				Timestamp: time.Now().String(),
 			}
 			ledger.Add(protocol.UsersLedgerKey, updatedMap)
-			//	}
 		},
 	)
 
@@ -197,14 +197,13 @@ func discoveryTunnels(ctx context.Context, n *node.Node, token, servicesID strin
 				return
 			default:
 				time.Sleep(5 * time.Second)
-				zlog.Debug().Msg("Searching for workers")
 
 				data := ledger.LastBlock().Storage[servicesID]
 
 				zlog.Debug().Any("data", ledger.LastBlock().Storage).Msg("Ledger data")
 
 				for k, v := range data {
-					zlog.Info().Msgf("Found worker %s", k)
+					zlog.Debug().Msgf("New worker found in the ledger data '%s'", k)
 					nd := &NodeData{}
 					if err := v.Unmarshal(nd); err != nil {
 						zlog.Error().Msg("cannot unmarshal node data")
@@ -245,8 +244,10 @@ func ensureService(ctx context.Context, n *node.Node, nd *NodeData, sserv string
 		// Start the service
 		port, err := freeport.GetFreePort()
 		if err != nil {
-			fmt.Print(err)
+			zlog.Error().Err(err).Msgf("Could not allocate a free port for %s", nd.ID)
+			return
 		}
+
 		tunnelAddress := fmt.Sprintf("127.0.0.1:%d", port)
 		nd.TunnelAddress = tunnelAddress
 		service[nd.Name] = nodeServiceData{
@@ -310,10 +311,6 @@ func ExposeService(ctx context.Context, host, port, token, servicesID string) er
 		ctx,
 		20*time.Second,
 		func() {
-			// Retrieve current ID for ip in the blockchain
-			//_, found := ledger.GetKey("services_localai", name)
-			// If mismatch, update the blockchain
-			//if !found {
 			updatedMap := map[string]interface{}{}
 			updatedMap[name] = &NodeData{
 				Name:     name,
@@ -321,7 +318,6 @@ func ExposeService(ctx context.Context, host, port, token, servicesID string) er
 				ID:       nodeID(name),
 			}
 			ledger.Add(servicesID, updatedMap)
-			//	}
 		},
 	)
 
@@ -354,7 +350,10 @@ func newNodeOpts(token string) ([]node.Option, error) {
 	if loglevel == "" {
 		loglevel = "info"
 	}
-
+	libp2ploglevel := os.Getenv("LOCALAI_LIBP2P_LOGLEVEL")
+	if libp2ploglevel == "" {
+		libp2ploglevel = "info"
+	}
 	c := config.Config{
 		Limit: config.ResourceLimit{
 			Enable:   noLimits,
@@ -363,7 +362,7 @@ func newNodeOpts(token string) ([]node.Option, error) {
 		NetworkToken:   token,
 		LowProfile:     false,
 		LogLevel:       loglevel,
-		Libp2pLogLevel: "fatal",
+		Libp2pLogLevel: libp2ploglevel,
 		Ledger: config.Ledger{
 			SyncInterval:     defaultInterval,
 			AnnounceInterval: defaultInterval,

From 1ed5af1da8a513eb0c1d404a315fc19cfa3da2e0 Mon Sep 17 00:00:00 2001
From: Dave <dave@gray101.com>
Date: Sat, 17 Aug 2024 03:08:03 -0400
Subject: [PATCH 180/235] fix: devcontainer pt 2 (#3258)

tiny fix of copy path

Signed-off-by: Dave Lee <dave@gray101.com>
---
 .devcontainer/devcontainer.json | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.devcontainer/devcontainer.json b/.devcontainer/devcontainer.json
index f82b62f9..3ab37a2b 100644
--- a/.devcontainer/devcontainer.json
+++ b/.devcontainer/devcontainer.json
@@ -20,5 +20,5 @@
     },
     "forwardPorts": [8080, 3000],
     "postCreateCommand": "git clone https://github.com/mudler/LocalAI.git .",
-    "postStartCommand": "make prepare && cp -r /build/backend-assets /workdir/backend-assets"
+    "postStartCommand": "make prepare && cp -r /build/backend-assets /workspace/backend-assets"
 }
\ No newline at end of file

From 27b03a52f32651ba99d5c0de499fdde90cdb5d8f Mon Sep 17 00:00:00 2001
From: Ettore Di Giacinto <mudler@users.noreply.github.com>
Date: Sat, 17 Aug 2024 15:03:55 +0200
Subject: [PATCH 181/235] fix(p2p): allocate tunnels only when needed (#3259)

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
---
 core/cli/run.go              |  4 ++--
 core/p2p/federated_server.go |  2 +-
 core/p2p/p2p.go              | 35 +++++++++++++++++++----------------
 core/p2p/p2p_disabled.go     |  2 +-
 4 files changed, 23 insertions(+), 20 deletions(-)

diff --git a/core/cli/run.go b/core/cli/run.go
index b2f73ef9..23939548 100644
--- a/core/cli/run.go
+++ b/core/cli/run.go
@@ -135,7 +135,7 @@ func (r *RunCMD) Run(ctx *cliContext.Context) error {
 
 			os.Setenv("LLAMACPP_GRPC_SERVERS", tunnelEnvVar)
 			log.Debug().Msgf("setting LLAMACPP_GRPC_SERVERS to %s", tunnelEnvVar)
-		}); err != nil {
+		}, true); err != nil {
 			return err
 		}
 	}
@@ -153,7 +153,7 @@ func (r *RunCMD) Run(ctx *cliContext.Context) error {
 			return err
 		}
 
-		if err := p2p.ServiceDiscoverer(context.Background(), node, token, p2p.NetworkID(r.Peer2PeerNetworkID, p2p.FederatedID), nil); err != nil {
+		if err := p2p.ServiceDiscoverer(context.Background(), node, token, p2p.NetworkID(r.Peer2PeerNetworkID, p2p.FederatedID), nil, false); err != nil {
 			return err
 		}
 	}
diff --git a/core/p2p/federated_server.go b/core/p2p/federated_server.go
index acd6e7bf..c356ae96 100644
--- a/core/p2p/federated_server.go
+++ b/core/p2p/federated_server.go
@@ -29,7 +29,7 @@ func (f *FederatedServer) Start(ctx context.Context) error {
 
 	if err := ServiceDiscoverer(ctx, n, f.p2ptoken, f.service, func(servicesID string, tunnel NodeData) {
 		log.Debug().Msgf("Discovered node: %s", tunnel.ID)
-	}); err != nil {
+	}, true); err != nil {
 		return err
 	}
 
diff --git a/core/p2p/p2p.go b/core/p2p/p2p.go
index 758cb621..c1039f58 100644
--- a/core/p2p/p2p.go
+++ b/core/p2p/p2p.go
@@ -139,11 +139,11 @@ func allocateLocalService(ctx context.Context, node *node.Node, listenAddr, serv
 
 // This is the main of the server (which keeps the env variable updated)
 // This starts a goroutine that keeps LLAMACPP_GRPC_SERVERS updated with the discovered services
-func ServiceDiscoverer(ctx context.Context, n *node.Node, token, servicesID string, discoveryFunc func(serviceID string, node NodeData)) error {
+func ServiceDiscoverer(ctx context.Context, n *node.Node, token, servicesID string, discoveryFunc func(serviceID string, node NodeData), allocate bool) error {
 	if servicesID == "" {
 		servicesID = defaultServicesID
 	}
-	tunnels, err := discoveryTunnels(ctx, n, token, servicesID)
+	tunnels, err := discoveryTunnels(ctx, n, token, servicesID, allocate)
 	if err != nil {
 		return err
 	}
@@ -170,7 +170,7 @@ func ServiceDiscoverer(ctx context.Context, n *node.Node, token, servicesID stri
 	return nil
 }
 
-func discoveryTunnels(ctx context.Context, n *node.Node, token, servicesID string) (chan NodeData, error) {
+func discoveryTunnels(ctx context.Context, n *node.Node, token, servicesID string, allocate bool) (chan NodeData, error) {
 	tunnels := make(chan NodeData)
 
 	err := n.Start(ctx)
@@ -209,7 +209,7 @@ func discoveryTunnels(ctx context.Context, n *node.Node, token, servicesID strin
 						zlog.Error().Msg("cannot unmarshal node data")
 						continue
 					}
-					ensureService(ctx, n, nd, k)
+					ensureService(ctx, n, nd, k, allocate)
 					muservice.Lock()
 					if _, ok := service[nd.Name]; ok {
 						tunnels <- service[nd.Name].NodeData
@@ -231,7 +231,7 @@ type nodeServiceData struct {
 var service = map[string]nodeServiceData{}
 var muservice sync.Mutex
 
-func ensureService(ctx context.Context, n *node.Node, nd *NodeData, sserv string) {
+func ensureService(ctx context.Context, n *node.Node, nd *NodeData, sserv string, allocate bool) {
 	muservice.Lock()
 	defer muservice.Unlock()
 	if ndService, found := service[nd.Name]; !found {
@@ -240,22 +240,25 @@ func ensureService(ctx context.Context, n *node.Node, nd *NodeData, sserv string
 			zlog.Debug().Msgf("Node %s is offline", nd.ID)
 			return
 		}
-		newCtxm, cancel := context.WithCancel(ctx)
-		// Start the service
-		port, err := freeport.GetFreePort()
-		if err != nil {
-			zlog.Error().Err(err).Msgf("Could not allocate a free port for %s", nd.ID)
-			return
-		}
 
-		tunnelAddress := fmt.Sprintf("127.0.0.1:%d", port)
-		nd.TunnelAddress = tunnelAddress
+		newCtxm, cancel := context.WithCancel(ctx)
+		if allocate {
+			// Start the service
+			port, err := freeport.GetFreePort()
+			if err != nil {
+				zlog.Error().Err(err).Msgf("Could not allocate a free port for %s", nd.ID)
+				return
+			}
+
+			tunnelAddress := fmt.Sprintf("127.0.0.1:%d", port)
+			nd.TunnelAddress = tunnelAddress
+			go allocateLocalService(newCtxm, n, tunnelAddress, sserv)
+			zlog.Debug().Msgf("Starting service %s on %s", sserv, tunnelAddress)
+		}
 		service[nd.Name] = nodeServiceData{
 			NodeData:   *nd,
 			CancelFunc: cancel,
 		}
-		go allocateLocalService(newCtxm, n, tunnelAddress, sserv)
-		zlog.Debug().Msgf("Starting service %s on %s", sserv, tunnelAddress)
 	} else {
 		// Check if the service is still alive
 		// if not cancel the context
diff --git a/core/p2p/p2p_disabled.go b/core/p2p/p2p_disabled.go
index ab1d69dc..92241f42 100644
--- a/core/p2p/p2p_disabled.go
+++ b/core/p2p/p2p_disabled.go
@@ -18,7 +18,7 @@ func (f *FederatedServer) Start(ctx context.Context) error {
 	return fmt.Errorf("not implemented")
 }
 
-func ServiceDiscoverer(ctx context.Context, node *node.Node, token, servicesID string, fn func(string, NodeData)) error {
+func ServiceDiscoverer(ctx context.Context, node *node.Node, token, servicesID string, fn func(string, NodeData), allocate bool) error {
 	return fmt.Errorf("not implemented")
 }
 

From 5d416006ae12b0ad9f58341d3bcf0add1fb1d0c5 Mon Sep 17 00:00:00 2001
From: "LocalAI [bot]" <139863280+localai-bot@users.noreply.github.com>
Date: Sat, 17 Aug 2024 23:46:26 +0200
Subject: [PATCH 182/235] chore: :arrow_up: Update ggerganov/llama.cpp to
 `2fb9267887d24a431892ce4dccc75c7095b0d54d` (#3260)

:arrow_up: Update ggerganov/llama.cpp

Signed-off-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com>
Co-authored-by: mudler <2420543+mudler@users.noreply.github.com>
---
 Makefile | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/Makefile b/Makefile
index 67e4dee3..7e2f5248 100644
--- a/Makefile
+++ b/Makefile
@@ -8,7 +8,7 @@ DETECT_LIBS?=true
 # llama.cpp versions
 GOLLAMA_REPO?=https://github.com/go-skynet/go-llama.cpp
 GOLLAMA_VERSION?=2b57a8ae43e4699d3dc5d1496a1ccd42922993be
-CPPLLAMA_VERSION?=8b3befc0e2ed8fb18b903735831496b8b0c80949
+CPPLLAMA_VERSION?=2fb9267887d24a431892ce4dccc75c7095b0d54d
 
 # go-rwkv version
 RWKV_REPO?=https://github.com/donomii/go-rwkv.cpp

From 1dbb3b8abcf616408f19b095c0f7c5d43565c5b3 Mon Sep 17 00:00:00 2001
From: Ettore Di Giacinto <mudler@users.noreply.github.com>
Date: Sun, 18 Aug 2024 09:26:29 +0200
Subject: [PATCH 183/235] fix(gallery): be consistent and disable UI routes as
 well (#3262)

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
---
 core/http/routes/ui.go | 297 +++++++++++++++++++++--------------------
 1 file changed, 150 insertions(+), 147 deletions(-)

diff --git a/core/http/routes/ui.go b/core/http/routes/ui.go
index 0a9867fe..6dfb3f43 100644
--- a/core/http/routes/ui.go
+++ b/core/http/routes/ui.go
@@ -119,185 +119,188 @@ func RegisterUIRoutes(app *fiber.App,
 		})
 	}
 
-	// Show the Models page (all models)
-	app.Get("/browse", auth, func(c *fiber.Ctx) error {
-		term := c.Query("term")
+	if !appConfig.DisableGalleryEndpoint {
 
-		models, _ := gallery.AvailableGalleryModels(appConfig.Galleries, appConfig.ModelPath)
+		// Show the Models page (all models)
+		app.Get("/browse", auth, func(c *fiber.Ctx) error {
+			term := c.Query("term")
 
-		// Get all available tags
-		allTags := map[string]struct{}{}
-		tags := []string{}
-		for _, m := range models {
-			for _, t := range m.Tags {
-				allTags[t] = struct{}{}
+			models, _ := gallery.AvailableGalleryModels(appConfig.Galleries, appConfig.ModelPath)
+
+			// Get all available tags
+			allTags := map[string]struct{}{}
+			tags := []string{}
+			for _, m := range models {
+				for _, t := range m.Tags {
+					allTags[t] = struct{}{}
+				}
 			}
-		}
-		for t := range allTags {
-			tags = append(tags, t)
-		}
-		sort.Strings(tags)
+			for t := range allTags {
+				tags = append(tags, t)
+			}
+			sort.Strings(tags)
 
-		if term != "" {
-			models = gallery.GalleryModels(models).Search(term)
-		}
+			if term != "" {
+				models = gallery.GalleryModels(models).Search(term)
+			}
 
-		// Get model statuses
-		processingModelsData, taskTypes := modelStatus()
+			// Get model statuses
+			processingModelsData, taskTypes := modelStatus()
 
-		summary := fiber.Map{
-			"Title":            "LocalAI - Models",
-			"Version":          internal.PrintableVersion(),
-			"Models":           template.HTML(elements.ListModels(models, processingModels, galleryService)),
-			"Repositories":     appConfig.Galleries,
-			"AllTags":          tags,
-			"ProcessingModels": processingModelsData,
-			"AvailableModels":  len(models),
-			"IsP2PEnabled":     p2p.IsP2PEnabled(),
+			summary := fiber.Map{
+				"Title":            "LocalAI - Models",
+				"Version":          internal.PrintableVersion(),
+				"Models":           template.HTML(elements.ListModels(models, processingModels, galleryService)),
+				"Repositories":     appConfig.Galleries,
+				"AllTags":          tags,
+				"ProcessingModels": processingModelsData,
+				"AvailableModels":  len(models),
+				"IsP2PEnabled":     p2p.IsP2PEnabled(),
 
-			"TaskTypes": taskTypes,
-			//	"ApplicationConfig": appConfig,
-		}
+				"TaskTypes": taskTypes,
+				//	"ApplicationConfig": appConfig,
+			}
 
-		// Render index
-		return c.Render("views/models", summary)
-	})
+			// Render index
+			return c.Render("views/models", summary)
+		})
 
-	// Show the models, filtered from the user input
-	// https://htmx.org/examples/active-search/
-	app.Post("/browse/search/models", auth, func(c *fiber.Ctx) error {
-		form := struct {
-			Search string `form:"search"`
-		}{}
-		if err := c.BodyParser(&form); err != nil {
-			return c.Status(fiber.StatusBadRequest).SendString(err.Error())
-		}
+		// Show the models, filtered from the user input
+		// https://htmx.org/examples/active-search/
+		app.Post("/browse/search/models", auth, func(c *fiber.Ctx) error {
+			form := struct {
+				Search string `form:"search"`
+			}{}
+			if err := c.BodyParser(&form); err != nil {
+				return c.Status(fiber.StatusBadRequest).SendString(err.Error())
+			}
 
-		models, _ := gallery.AvailableGalleryModels(appConfig.Galleries, appConfig.ModelPath)
+			models, _ := gallery.AvailableGalleryModels(appConfig.Galleries, appConfig.ModelPath)
 
-		return c.SendString(elements.ListModels(gallery.GalleryModels(models).Search(form.Search), processingModels, galleryService))
-	})
+			return c.SendString(elements.ListModels(gallery.GalleryModels(models).Search(form.Search), processingModels, galleryService))
+		})
 
-	/*
+		/*
 
-		Install routes
+			Install routes
 
-	*/
+		*/
 
-	// This route is used when the "Install" button is pressed, we submit here a new job to the gallery service
-	// https://htmx.org/examples/progress-bar/
-	app.Post("/browse/install/model/:id", auth, func(c *fiber.Ctx) error {
-		galleryID := strings.Clone(c.Params("id")) // note: strings.Clone is required for multiple requests!
-		log.Debug().Msgf("UI job submitted to install  : %+v\n", galleryID)
+		// This route is used when the "Install" button is pressed, we submit here a new job to the gallery service
+		// https://htmx.org/examples/progress-bar/
+		app.Post("/browse/install/model/:id", auth, func(c *fiber.Ctx) error {
+			galleryID := strings.Clone(c.Params("id")) // note: strings.Clone is required for multiple requests!
+			log.Debug().Msgf("UI job submitted to install  : %+v\n", galleryID)
 
-		id, err := uuid.NewUUID()
-		if err != nil {
-			return err
-		}
+			id, err := uuid.NewUUID()
+			if err != nil {
+				return err
+			}
 
-		uid := id.String()
+			uid := id.String()
 
-		processingModels.Set(galleryID, uid)
+			processingModels.Set(galleryID, uid)
 
-		op := gallery.GalleryOp{
-			Id:               uid,
-			GalleryModelName: galleryID,
-			Galleries:        appConfig.Galleries,
-		}
-		go func() {
-			galleryService.C <- op
-		}()
+			op := gallery.GalleryOp{
+				Id:               uid,
+				GalleryModelName: galleryID,
+				Galleries:        appConfig.Galleries,
+			}
+			go func() {
+				galleryService.C <- op
+			}()
 
-		return c.SendString(elements.StartProgressBar(uid, "0", "Installation"))
-	})
+			return c.SendString(elements.StartProgressBar(uid, "0", "Installation"))
+		})
 
-	// This route is used when the "Install" button is pressed, we submit here a new job to the gallery service
-	// https://htmx.org/examples/progress-bar/
-	app.Post("/browse/delete/model/:id", auth, func(c *fiber.Ctx) error {
-		galleryID := strings.Clone(c.Params("id")) // note: strings.Clone is required for multiple requests!
-		log.Debug().Msgf("UI job submitted to delete  : %+v\n", galleryID)
-		var galleryName = galleryID
-		if strings.Contains(galleryID, "@") {
-			// if the galleryID contains a @ it means that it's a model from a gallery
-			// but we want to delete it from the local models which does not need
-			// a repository ID
-			galleryName = strings.Split(galleryID, "@")[1]
-		}
+		// This route is used when the "Install" button is pressed, we submit here a new job to the gallery service
+		// https://htmx.org/examples/progress-bar/
+		app.Post("/browse/delete/model/:id", auth, func(c *fiber.Ctx) error {
+			galleryID := strings.Clone(c.Params("id")) // note: strings.Clone is required for multiple requests!
+			log.Debug().Msgf("UI job submitted to delete  : %+v\n", galleryID)
+			var galleryName = galleryID
+			if strings.Contains(galleryID, "@") {
+				// if the galleryID contains a @ it means that it's a model from a gallery
+				// but we want to delete it from the local models which does not need
+				// a repository ID
+				galleryName = strings.Split(galleryID, "@")[1]
+			}
 
-		id, err := uuid.NewUUID()
-		if err != nil {
-			return err
-		}
+			id, err := uuid.NewUUID()
+			if err != nil {
+				return err
+			}
 
-		uid := id.String()
+			uid := id.String()
 
-		// Track the deletion job by galleryID and galleryName
-		// The GalleryID contains information about the repository,
-		// while the GalleryName is ONLY the name of the model
-		processingModels.Set(galleryName, uid)
-		processingModels.Set(galleryID, uid)
+			// Track the deletion job by galleryID and galleryName
+			// The GalleryID contains information about the repository,
+			// while the GalleryName is ONLY the name of the model
+			processingModels.Set(galleryName, uid)
+			processingModels.Set(galleryID, uid)
 
-		op := gallery.GalleryOp{
-			Id:               uid,
-			Delete:           true,
-			GalleryModelName: galleryName,
-		}
-		go func() {
-			galleryService.C <- op
-			cl.RemoveBackendConfig(galleryName)
-		}()
+			op := gallery.GalleryOp{
+				Id:               uid,
+				Delete:           true,
+				GalleryModelName: galleryName,
+			}
+			go func() {
+				galleryService.C <- op
+				cl.RemoveBackendConfig(galleryName)
+			}()
 
-		return c.SendString(elements.StartProgressBar(uid, "0", "Deletion"))
-	})
+			return c.SendString(elements.StartProgressBar(uid, "0", "Deletion"))
+		})
 
-	// Display the job current progress status
-	// If the job is done, we trigger the /browse/job/:uid route
-	// https://htmx.org/examples/progress-bar/
-	app.Get("/browse/job/progress/:uid", auth, func(c *fiber.Ctx) error {
-		jobUID := strings.Clone(c.Params("uid")) // note: strings.Clone is required for multiple requests!
+		// Display the job current progress status
+		// If the job is done, we trigger the /browse/job/:uid route
+		// https://htmx.org/examples/progress-bar/
+		app.Get("/browse/job/progress/:uid", auth, func(c *fiber.Ctx) error {
+			jobUID := strings.Clone(c.Params("uid")) // note: strings.Clone is required for multiple requests!
 
-		status := galleryService.GetStatus(jobUID)
-		if status == nil {
-			//fmt.Errorf("could not find any status for ID")
-			return c.SendString(elements.ProgressBar("0"))
-		}
+			status := galleryService.GetStatus(jobUID)
+			if status == nil {
+				//fmt.Errorf("could not find any status for ID")
+				return c.SendString(elements.ProgressBar("0"))
+			}
 
-		if status.Progress == 100 {
-			c.Set("HX-Trigger", "done") // this triggers /browse/job/:uid (which is when the job is done)
-			return c.SendString(elements.ProgressBar("100"))
-		}
-		if status.Error != nil {
-			// TODO: instead of deleting the job, we should keep it in the cache and make it dismissable by the user
+			if status.Progress == 100 {
+				c.Set("HX-Trigger", "done") // this triggers /browse/job/:uid (which is when the job is done)
+				return c.SendString(elements.ProgressBar("100"))
+			}
+			if status.Error != nil {
+				// TODO: instead of deleting the job, we should keep it in the cache and make it dismissable by the user
+				processingModels.DeleteUUID(jobUID)
+				return c.SendString(elements.ErrorProgress(status.Error.Error(), status.GalleryModelName))
+			}
+
+			return c.SendString(elements.ProgressBar(fmt.Sprint(status.Progress)))
+		})
+
+		// this route is hit when the job is done, and we display the
+		// final state (for now just displays "Installation completed")
+		app.Get("/browse/job/:uid", auth, func(c *fiber.Ctx) error {
+			jobUID := strings.Clone(c.Params("uid")) // note: strings.Clone is required for multiple requests!
+
+			status := galleryService.GetStatus(jobUID)
+
+			galleryID := ""
 			processingModels.DeleteUUID(jobUID)
-			return c.SendString(elements.ErrorProgress(status.Error.Error(), status.GalleryModelName))
-		}
+			if galleryID == "" {
+				log.Debug().Msgf("no processing model found for job : %+v\n", jobUID)
+			}
 
-		return c.SendString(elements.ProgressBar(fmt.Sprint(status.Progress)))
-	})
+			log.Debug().Msgf("JOB finished  : %+v\n", status)
+			showDelete := true
+			displayText := "Installation completed"
+			if status.Deletion {
+				showDelete = false
+				displayText = "Deletion completed"
+			}
 
-	// this route is hit when the job is done, and we display the
-	// final state (for now just displays "Installation completed")
-	app.Get("/browse/job/:uid", auth, func(c *fiber.Ctx) error {
-		jobUID := strings.Clone(c.Params("uid")) // note: strings.Clone is required for multiple requests!
-
-		status := galleryService.GetStatus(jobUID)
-
-		galleryID := ""
-		processingModels.DeleteUUID(jobUID)
-		if galleryID == "" {
-			log.Debug().Msgf("no processing model found for job : %+v\n", jobUID)
-		}
-
-		log.Debug().Msgf("JOB finished  : %+v\n", status)
-		showDelete := true
-		displayText := "Installation completed"
-		if status.Deletion {
-			showDelete = false
-			displayText = "Deletion completed"
-		}
-
-		return c.SendString(elements.DoneProgress(galleryID, displayText, showDelete))
-	})
+			return c.SendString(elements.DoneProgress(galleryID, displayText, showDelete))
+		})
+	}
 
 	// Show the Chat page
 	app.Get("/chat/:model", auth, func(c *fiber.Ctx) error {

From 3f74b34f06b3074a984e6666a616c8db8d5026f8 Mon Sep 17 00:00:00 2001
From: "LocalAI [bot]" <139863280+localai-bot@users.noreply.github.com>
Date: Sun, 18 Aug 2024 23:46:02 +0200
Subject: [PATCH 184/235] chore: :arrow_up: Update ggerganov/llama.cpp to
 `554b049068de24201d19dde2fa83e35389d4585d` (#3263)

:arrow_up: Update ggerganov/llama.cpp

Signed-off-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com>
Co-authored-by: mudler <2420543+mudler@users.noreply.github.com>
---
 Makefile | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/Makefile b/Makefile
index 7e2f5248..08a6c36d 100644
--- a/Makefile
+++ b/Makefile
@@ -8,7 +8,7 @@ DETECT_LIBS?=true
 # llama.cpp versions
 GOLLAMA_REPO?=https://github.com/go-skynet/go-llama.cpp
 GOLLAMA_VERSION?=2b57a8ae43e4699d3dc5d1496a1ccd42922993be
-CPPLLAMA_VERSION?=2fb9267887d24a431892ce4dccc75c7095b0d54d
+CPPLLAMA_VERSION?=554b049068de24201d19dde2fa83e35389d4585d
 
 # go-rwkv version
 RWKV_REPO?=https://github.com/donomii/go-rwkv.cpp

From ad449a237e5316dee774357190d245ca933cb20a Mon Sep 17 00:00:00 2001
From: Ettore Di Giacinto <mudler@users.noreply.github.com>
Date: Mon, 19 Aug 2024 09:49:47 +0200
Subject: [PATCH 185/235] models(gallery): add SmolLM (#3265)

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
---
 gallery/index.yaml | 28 ++++++++++++++++++++++++++++
 1 file changed, 28 insertions(+)

diff --git a/gallery/index.yaml b/gallery/index.yaml
index 5152317b..ee68acab 100644
--- a/gallery/index.yaml
+++ b/gallery/index.yaml
@@ -1,4 +1,32 @@
 ---
+## SmolLM
+- &smollm
+  url: "github:mudler/LocalAI/gallery/chatml.yaml@master"
+  name: "smollm-1.7b-instruct"
+  icon: https://huggingface.co/datasets/HuggingFaceTB/images/resolve/main/banner_smol.png
+  tags:
+    - llm
+    - gguf
+    - gpu
+    - smollm
+    - chatml
+    - cpu
+  urls:
+    - https://huggingface.co/MaziyarPanahi/SmolLM-1.7B-Instruct-GGUF
+    - https://huggingface.co/HuggingFaceTB/SmolLM-1.7B-Instruct
+  description: |
+    SmolLM is a series of small language models available in three sizes: 135M, 360M, and 1.7B parameters.
+
+    These models are pre-trained on SmolLM-Corpus, a curated collection of high-quality educational and synthetic data designed for training LLMs. For further details, we refer to our blogpost.
+
+    To build SmolLM-Instruct, we finetuned the base models on publicly available datasets.
+  overrides:
+    parameters:
+      model: SmolLM-1.7B-Instruct.Q4_K_M.gguf
+  files:
+    - filename: SmolLM-1.7B-Instruct.Q4_K_M.gguf
+      sha256: 2b07eb2293ed3fc544a9858beda5bfb03dcabda6aa6582d3c85768c95f498d28
+      uri: huggingface://MaziyarPanahi/SmolLM-1.7B-Instruct-GGUF/SmolLM-1.7B-Instruct.Q4_K_M.gguf
 ## LLama3.1
 - &llama31
   url: "github:mudler/LocalAI/gallery/llama3.1-instruct.yaml@master"

From 857443e2b51914af19278141762144d16de09b4e Mon Sep 17 00:00:00 2001
From: Ettore Di Giacinto <mudler@users.noreply.github.com>
Date: Mon, 19 Aug 2024 11:27:52 +0200
Subject: [PATCH 186/235] models(gallery): add mahou-1.3-llama3.1-8b (#3266)

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
---
 gallery/index.yaml | 16 ++++++++++++++++
 1 file changed, 16 insertions(+)

diff --git a/gallery/index.yaml b/gallery/index.yaml
index ee68acab..a9be4e12 100644
--- a/gallery/index.yaml
+++ b/gallery/index.yaml
@@ -557,6 +557,22 @@
     - filename: Cathallama-70B.Q4_K_M.gguf
       sha256: 7bbac0849a8da82e7912a493a15fa07d605f1ffbe7337a322f17e09195511022
       uri: huggingface://mradermacher/Cathallama-70B-GGUF/Cathallama-70B.Q4_K_M.gguf
+- !!merge <<: *llama31
+  url: "github:mudler/LocalAI/gallery/chatml.yaml@master"
+  name: "mahou-1.3-llama3.1-8b"
+  icon: https://huggingface.co/flammenai/Mahou-1.0-mistral-7B/resolve/main/mahou1.png
+  urls:
+    - https://huggingface.co/mradermacher/Mahou-1.3-llama3.1-8B-GGUF
+    - https://huggingface.co/flammenai/Mahou-1.3-llama3.1-8B
+  description: |
+    Mahou is designed to provide short messages in a conversational context. It is capable of casual conversation and character roleplay.
+  overrides:
+    parameters:
+      model: Mahou-1.3-llama3.1-8B.Q4_K_M.gguf
+  files:
+    - filename: Mahou-1.3-llama3.1-8B.Q4_K_M.gguf
+      sha256: 88bfdca2f6077d789d3e0f161d19711aa208a6d9a02cce96a2276c69413b3594
+      uri: huggingface://mradermacher/Mahou-1.3-llama3.1-8B-GGUF/Mahou-1.3-llama3.1-8B.Q4_K_M.gguf
 - &deepseek
   ## Deepseek
   url: "github:mudler/LocalAI/gallery/deepseek.yaml@master"

From f2f372b7f5c8713042290c05c80684aa4f06f1ad Mon Sep 17 00:00:00 2001
From: Ettore Di Giacinto <mudler@users.noreply.github.com>
Date: Mon, 19 Aug 2024 11:34:38 +0200
Subject: [PATCH 187/235] models(gallery): add fireball-llama-3.11-8b-v1orpo
 (#3267)

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
---
 gallery/index.yaml | 17 +++++++++++++++++
 1 file changed, 17 insertions(+)

diff --git a/gallery/index.yaml b/gallery/index.yaml
index a9be4e12..d6269885 100644
--- a/gallery/index.yaml
+++ b/gallery/index.yaml
@@ -354,6 +354,23 @@
     - filename: calme-2.3-legalkit-8b.i1-Q4_K_M.gguf
       sha256: b71dfea8bbd73b0fbd5793ef462b8540c24e1c52a47b1794561adb88109a9e80
       uri: huggingface://mradermacher/calme-2.3-legalkit-8b-i1-GGUF/calme-2.3-legalkit-8b.i1-Q4_K_M.gguf
+- !!merge <<: *llama31
+  name: "fireball-llama-3.11-8b-v1orpo"
+  icon: https://huggingface.co/EpistemeAI/Fireball-Llama-3.1-8B-v1dpo/resolve/main/fireball-llama.JPG
+  urls:
+    - https://huggingface.co/mradermacher/Fireball-Llama-3.11-8B-v1orpo-GGUF
+  description: |
+    Developed by: EpistemeAI
+    License: apache-2.0
+    Finetuned from model : unsloth/Meta-Llama-3.1-8B-Instruct-bnb-4bit
+    Finetuned methods: DPO (Direct Preference Optimization) & ORPO (Odds Ratio Preference Optimization)
+  overrides:
+    parameters:
+      model: Fireball-Llama-3.11-8B-v1orpo.Q4_K_M.gguf
+  files:
+    - filename: Fireball-Llama-3.11-8B-v1orpo.Q4_K_M.gguf
+      sha256: c61a1f4ee4f05730ac6af754dc8dfddf34eba4486ffa320864e16620d6527731
+      uri: huggingface://mradermacher/Fireball-Llama-3.11-8B-v1orpo-GGUF/Fireball-Llama-3.11-8B-v1orpo.Q4_K_M.gguf
 ## Uncensored models
 - !!merge <<: *llama31
   name: "humanish-roleplay-llama-3.1-8b-i1"

From e67c9ae3bf51ea754c6a8ee9062358fad5420322 Mon Sep 17 00:00:00 2001
From: Ettore Di Giacinto <mudler@users.noreply.github.com>
Date: Mon, 19 Aug 2024 11:34:49 +0200
Subject: [PATCH 188/235] models(gallery): add rocinante-12b-v1.1 (#3268)

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
---
 gallery/index.yaml | 16 ++++++++++++++++
 1 file changed, 16 insertions(+)

diff --git a/gallery/index.yaml b/gallery/index.yaml
index d6269885..0805c023 100644
--- a/gallery/index.yaml
+++ b/gallery/index.yaml
@@ -1088,6 +1088,22 @@
     - filename: MN-12B-Celeste-V1.9.Q4_K_M.gguf
       sha256: 019daeaa63d82d55d1ea623b9c255deea6793af4044bb4994d2b4d09e8959f7b
       uri: huggingface://mradermacher/MN-12B-Celeste-V1.9-GGUF/MN-12B-Celeste-V1.9.Q4_K_M.gguf
+- !!merge <<: *mistral03
+  url: "github:mudler/LocalAI/gallery/chatml.yaml@master"
+  icon: https://cdn-uploads.huggingface.co/production/uploads/65f2fd1c25b848bd061b5c2e/ybqwvRJAtBPqtulQlKW93.gif
+  name: "rocinante-12b-v1.1"
+  urls:
+    - https://huggingface.co/TheDrummer/Rocinante-12B-v1.1-GGUF
+    - https://huggingface.co/TheDrummer/Rocinante-12B-v1.1
+  description: |
+     A versatile workhorse for any adventure!
+  overrides:
+    parameters:
+      model: Rocinante-12B-v1.1-Q4_K_M.gguf
+  files:
+    - filename: Rocinante-12B-v1.1-Q4_K_M.gguf
+      sha256: bdeaeefac79cff944ae673e6924c9f82f7eed789669a32a09997db398790b0b5
+      uri: huggingface://TheDrummer/Rocinante-12B-v1.1-GGUF/Rocinante-12B-v1.1-Q4_K_M.gguf
 - &mudler
   ### START mudler's LocalAI specific-models
   url: "github:mudler/LocalAI/gallery/mudler.yaml@master"

From 1651f25d037961c60f313e95010411fc8890d122 Mon Sep 17 00:00:00 2001
From: Ettore Di Giacinto <mudler@users.noreply.github.com>
Date: Mon, 19 Aug 2024 11:39:03 +0200
Subject: [PATCH 189/235] models(gallery): add pantheon-rp-1.6-12b-nemo (#3269)

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
---
 gallery/index.yaml | 23 +++++++++++++++++++++++
 1 file changed, 23 insertions(+)

diff --git a/gallery/index.yaml b/gallery/index.yaml
index 0805c023..6d18acf7 100644
--- a/gallery/index.yaml
+++ b/gallery/index.yaml
@@ -1104,6 +1104,29 @@
     - filename: Rocinante-12B-v1.1-Q4_K_M.gguf
       sha256: bdeaeefac79cff944ae673e6924c9f82f7eed789669a32a09997db398790b0b5
       uri: huggingface://TheDrummer/Rocinante-12B-v1.1-GGUF/Rocinante-12B-v1.1-Q4_K_M.gguf
+- !!merge <<: *mistral03
+  name: "pantheon-rp-1.6-12b-nemo"
+  icon: https://huggingface.co/Gryphe/Pantheon-RP-1.6-12b-Nemo/resolve/main/Pantheon.png
+  url: "github:mudler/LocalAI/gallery/chatml.yaml@master"
+  urls:
+    - https://huggingface.co/bartowski/Pantheon-RP-1.6-12b-Nemo-GGUF
+    - https://huggingface.co/Gryphe/Pantheon-RP-1.6-12b-Nemo
+  description: |
+    Welcome to the next iteration of my Pantheon model series, in which I strive to introduce a whole collection of personas that can be summoned with a simple activation phrase. The huge variety in personalities introduced also serve to enhance the general roleplay experience.
+    Changes in version 1.6:
+    The final finetune now consists of data that is equally split between Markdown and novel-style roleplay. This should solve Pantheon's greatest weakness.
+    The base was redone. (Details below)
+    Select Claude-specific phrases were rewritten, boosting variety in the model's responses.
+    Aiva no longer serves as both persona and assistant, with the assistant role having been given to Lyra.
+    Stella's dialogue received some post-fix alterations since the model really loved the phrase "Fuck me sideways".
+    Your user feedback is critical to me so don't hesitate to tell me whether my model is either 1. terrible, 2. awesome or 3. somewhere in-between.
+  overrides:
+    parameters:
+      model: Pantheon-RP-1.6-12b-Nemo-Q4_K_M.gguf
+  files:
+    - filename: Pantheon-RP-1.6-12b-Nemo-Q4_K_M.gguf
+      sha256: cf3465c183bf4ecbccd1b6b480f687e0160475b04c87e2f1e5ebc8baa0f4c7aa
+      uri: huggingface://bartowski/Pantheon-RP-1.6-12b-Nemo-GGUF/Pantheon-RP-1.6-12b-Nemo-Q4_K_M.gguf
 - &mudler
   ### START mudler's LocalAI specific-models
   url: "github:mudler/LocalAI/gallery/mudler.yaml@master"

From 1465e3dfd15e075717ef00ec0ce09b8419341230 Mon Sep 17 00:00:00 2001
From: Ettore Di Giacinto <mudler@users.noreply.github.com>
Date: Mon, 19 Aug 2024 11:39:47 +0200
Subject: [PATCH 190/235] models(gallery): add llama-3.1-storm-8b-q4_k_m
 (#3270)

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
---
 gallery/index.yaml | 18 ++++++++++++++++++
 1 file changed, 18 insertions(+)

diff --git a/gallery/index.yaml b/gallery/index.yaml
index 6d18acf7..2a10723b 100644
--- a/gallery/index.yaml
+++ b/gallery/index.yaml
@@ -371,6 +371,24 @@
     - filename: Fireball-Llama-3.11-8B-v1orpo.Q4_K_M.gguf
       sha256: c61a1f4ee4f05730ac6af754dc8dfddf34eba4486ffa320864e16620d6527731
       uri: huggingface://mradermacher/Fireball-Llama-3.11-8B-v1orpo-GGUF/Fireball-Llama-3.11-8B-v1orpo.Q4_K_M.gguf
+- !!merge <<: *llama31
+  name: "llama-3.1-storm-8b-q4_k_m"
+  icon: https://cdn-uploads.huggingface.co/production/uploads/64c75c1237333ccfef30a602/tmOlbERGKP7JSODa6T06J.jpeg
+  urls:
+    - https://huggingface.co/mudler/Llama-3.1-Storm-8B-Q4_K_M-GGUF
+    - https://huggingface.co/akjindal53244/Llama-3.1-Storm-8B
+  description: |
+    We present the Llama-3.1-Storm-8B model that outperforms Meta AI's Llama-3.1-8B-Instruct and Hermes-3-Llama-3.1-8B models significantly across diverse benchmarks as shown in the performance comparison plot in the next section. Our approach consists of three key steps:
+    - Self-Curation: We applied two self-curation methods to select approximately 1 million high-quality examples from a pool of about 3 million open-source examples. Our curation criteria focused on educational value and difficulty level, using the same SLM for annotation instead of larger models (e.g. 70B, 405B).
+    - Targeted fine-tuning: We performed Spectrum-based targeted fine-tuning over the Llama-3.1-8B-Instruct model. The Spectrum method accelerates training by selectively targeting layer modules based on their signal-to-noise ratio (SNR), and freezing the remaining modules. In our work, 50% of layers are frozen.
+    - Model Merging: We merged our fine-tuned model with the Llama-Spark model using SLERP method. The merging method produces a blended model with characteristics smoothly interpolated from both parent models, ensuring the resultant model captures the essence of both its parents. Llama-3.1-Storm-8B improves Llama-3.1-8B-Instruct across 10 diverse benchmarks. These benchmarks cover areas such as instruction-following, knowledge-driven QA, reasoning, truthful answer generation, and function calling.
+  overrides:
+    parameters:
+      model: llama-3.1-storm-8b-q4_k_m.gguf
+  files:
+    - filename: llama-3.1-storm-8b-q4_k_m.gguf
+      sha256: d714e960211ee0fe6113d3131a6573e438f37debd07e1067d2571298624414a0
+      uri: huggingface://mudler/Llama-3.1-Storm-8B-Q4_K_M-GGUF/llama-3.1-storm-8b-q4_k_m.gguf
 ## Uncensored models
 - !!merge <<: *llama31
   name: "humanish-roleplay-llama-3.1-8b-i1"

From dd270d58bd7c0b1498f66cf54dfd46e2cccb3989 Mon Sep 17 00:00:00 2001
From: Ettore Di Giacinto <mudler@users.noreply.github.com>
Date: Mon, 19 Aug 2024 11:57:47 +0200
Subject: [PATCH 191/235] ci(explorer): disable CGO

Signed-off-by: Ettore Di Giacinto <mudler@users.noreply.github.com>
---
 .github/workflows/deploy-explorer.yaml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.github/workflows/deploy-explorer.yaml b/.github/workflows/deploy-explorer.yaml
index 71a14183..7b5c0484 100644
--- a/.github/workflows/deploy-explorer.yaml
+++ b/.github/workflows/deploy-explorer.yaml
@@ -31,7 +31,7 @@ jobs:
           make protogen-go
       - name: Build api
         run: |
-          make build-api
+          CGO_ENABLED=0 make build-api
       - name: rm
         uses: appleboy/ssh-action@v1.0.3
         with:

From d58f9c333b65abf3d9eb12693115af1dc6863297 Mon Sep 17 00:00:00 2001
From: Ettore Di Giacinto <mudler@localai.io>
Date: Mon, 19 Aug 2024 12:41:13 +0200
Subject: [PATCH 192/235] chore(p2p): lower default dht interval default

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
---
 core/p2p/p2p.go | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/core/p2p/p2p.go b/core/p2p/p2p.go
index c1039f58..1a45da23 100644
--- a/core/p2p/p2p.go
+++ b/core/p2p/p2p.go
@@ -29,7 +29,7 @@ import (
 
 func GenerateToken() string {
 	// Generates a new config and exit
-	newData := node.GenerateNewConnectionData(900)
+	newData := node.GenerateNewConnectionData(120)
 	return newData.Base64()
 }
 

From e4c696d9669038a57bc4268bccdeca76babda38c Mon Sep 17 00:00:00 2001
From: Ettore Di Giacinto <mudler@localai.io>
Date: Mon, 19 Aug 2024 12:45:51 +0200
Subject: [PATCH 193/235] chore(p2p): customize defaults

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
---
 core/p2p/p2p.go | 28 ++++++++++++++++++++++++++--
 1 file changed, 26 insertions(+), 2 deletions(-)

diff --git a/core/p2p/p2p.go b/core/p2p/p2p.go
index 1a45da23..14dc00f8 100644
--- a/core/p2p/p2p.go
+++ b/core/p2p/p2p.go
@@ -21,16 +21,40 @@ import (
 	"github.com/mudler/edgevpn/pkg/protocol"
 	"github.com/mudler/edgevpn/pkg/services"
 	"github.com/mudler/edgevpn/pkg/types"
+	eutils "github.com/mudler/edgevpn/pkg/utils"
 	"github.com/phayes/freeport"
 	zlog "github.com/rs/zerolog/log"
 
 	"github.com/mudler/edgevpn/pkg/logger"
 )
 
+func generateNewConnectionData() *node.YAMLConnectionConfig {
+	maxMessSize := 20 << 20 // 20MB
+	keyLength := 43
+
+	return &node.YAMLConnectionConfig{
+		MaxMessageSize: maxMessSize,
+		RoomName:       eutils.RandStringRunes(keyLength),
+		Rendezvous:     eutils.RandStringRunes(keyLength),
+		MDNS:           eutils.RandStringRunes(keyLength),
+		OTP: node.OTP{
+			DHT: node.OTPConfig{
+				Key:      eutils.RandStringRunes(keyLength),
+				Interval: 120,
+				Length:   keyLength,
+			},
+			Crypto: node.OTPConfig{
+				Key:      eutils.RandStringRunes(keyLength),
+				Interval: 9000,
+				Length:   keyLength,
+			},
+		},
+	}
+}
+
 func GenerateToken() string {
 	// Generates a new config and exit
-	newData := node.GenerateNewConnectionData(120)
-	return newData.Base64()
+	return generateNewConnectionData().Base64()
 }
 
 func IsP2PEnabled() bool {

From 13cb7960bdc61d1109717853913c55a1b291aece Mon Sep 17 00:00:00 2001
From: Ettore Di Giacinto <mudler@users.noreply.github.com>
Date: Mon, 19 Aug 2024 18:05:02 +0200
Subject: [PATCH 194/235] chore(ux): add animated header with anime.js in p2p
 sections (#3271)

feat(p2p): add animated header with anime.js

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
---
 core/http/static/p2panimation.js   | 73 ++++++++++++++++++++++++++++++
 core/http/views/explorer.html      | 62 ++++++++-----------------
 core/http/views/p2p.html           | 26 +++++++----
 core/http/views/partials/head.html | 69 ++++++++++++++++++++++++++--
 embedded/webui_static.yaml         |  4 +-
 5 files changed, 178 insertions(+), 56 deletions(-)
 create mode 100644 core/http/static/p2panimation.js

diff --git a/core/http/static/p2panimation.js b/core/http/static/p2panimation.js
new file mode 100644
index 00000000..f31ed3f9
--- /dev/null
+++ b/core/http/static/p2panimation.js
@@ -0,0 +1,73 @@
+const canvas = document.getElementById('networkCanvas');
+const ctx = canvas.getContext('2d');
+
+canvas.width = window.innerWidth;
+canvas.height = window.innerHeight;
+
+const particles = [];
+const lines = [];
+
+class Particle {
+    constructor(x, y) {
+        this.x = x;
+        this.y = y;
+        this.radius = 2 + Math.random() * 2;
+        this.color = `rgba(0, 255, 204, ${Math.random()})`;
+        this.speed = Math.random() * 2 + 1;
+        this.angle = Math.random() * Math.PI * 2;
+    }
+
+    update() {
+        this.x += Math.cos(this.angle) * this.speed;
+        this.y += Math.sin(this.angle) * this.speed;
+
+        if (this.x < 0 || this.x > canvas.width || this.y < 0 || this.y > canvas.height) {
+            this.x = Math.random() * canvas.width;
+            this.y = Math.random() * canvas.height;
+        }
+    }
+
+    draw() {
+        ctx.beginPath();
+        ctx.arc(this.x, this.y, this.radius, 0, Math.PI * 2, false);
+        ctx.fillStyle = this.color;
+        ctx.fill();
+    }
+}
+
+function connectParticles() {
+    for (let i = 0; i < particles.length; i++) {
+        for (let j = i + 1; j < particles.length; j++) {
+            const distance = Math.hypot(particles[i].x - particles[j].x, particles[i].y - particles[j].y);
+            if (distance < 150) {
+                ctx.beginPath();
+                ctx.moveTo(particles[i].x, particles[i].y);
+                ctx.lineTo(particles[j].x, particles[j].y);
+                ctx.strokeStyle = `rgba(0, 255, 204, ${1 - distance / 150})`;
+                ctx.stroke();
+            }
+        }
+    }
+}
+
+function initParticles(num) {
+    for (let i = 0; i < num; i++) {
+        particles.push(new Particle(Math.random() * canvas.width, Math.random() * canvas.height));
+    }
+}
+
+function animate() {
+    ctx.clearRect(0, 0, canvas.width, canvas.height);
+
+    particles.forEach(particle => {
+        particle.update();
+        particle.draw();
+    });
+
+    connectParticles();
+
+    requestAnimationFrame(animate);
+}
+
+initParticles(100);
+animate();
\ No newline at end of file
diff --git a/core/http/views/explorer.html b/core/http/views/explorer.html
index 1db3dc9b..9843b47b 100644
--- a/core/http/views/explorer.html
+++ b/core/http/views/explorer.html
@@ -155,54 +155,29 @@
             right: 10px;
             color: #e2e8f0;
         }
-        .fa-circle-nodes {
-           /* font-size: 100px; /* Adjust the size as needed */
-            animation: rotateCircleNodes 8s linear infinite; /* Slow and fluid rotation */
-            display: inline-block;
-        }
-
-        @keyframes rotateCircleNodes {
-            0% { transform: rotate(0deg); }
-            100% { transform: rotate(360deg); }
-        }
-        /* Animation for the warning box */
-        .fa-flask {
-         /*  font-size: 100px; /* Adjust the size as needed */
-            animation: shakeFlask 3s ease-in-out infinite; /* Smooth easing and longer duration for fluidity */
-            transform-origin: bottom center;
-        }
-
-        @keyframes shakeFlask {
-            0%, 10% { transform: rotate(0deg); } /* Start and end still */
-            20% { transform: rotate(-10deg); } /* Smooth transition to left */
-            30% { transform: rotate(10deg); } /* Smooth transition to right */
-            40% { transform: rotate(-8deg); } /* Smooth transition to left */
-            50% { transform: rotate(8deg); } /* Smooth transition to right */
-            60% { transform: rotate(-5deg); } /* Smooth transition to left */
-            70% { transform: rotate(5deg); } /* Smooth transition to right */
-            80% { transform: rotate(-2deg); } /* Smooth transition to left */
-            90% { transform: rotate(2deg); } /* Smooth transition to right */
-            100% { transform: rotate(0deg); } /* Return to center */
-        }
 </style>
 
 <body class="bg-gray-900 text-gray-200">
     <div class="flex flex-col min-h-screen" x-data="networkClusters()" x-init="init()">
         {{template "views/partials/navbar_explorer" .}}
-
-        <header class="text-center py-12">
-            <h1 class="text-5xl font-bold text-gray-100">
-                <i class="fa-solid fa-circle-nodes mr-2"></i> Network Clusters Explorer   
-
-            </h1>
-            <p class="mt-4 text-lg">
-                View the clusters and workers available in each network.
-                <a href="https://localai.io/features/distribute/" target="_blank">
-                    <i class="fas fa-circle-info pr-2"></i>
-                </a>
-            </p>
-
-        </header>
+        <div class="animation-container">
+            <canvas id="networkCanvas"></canvas>
+            <div class="text-overlay">
+                <header class="text-center py-12">
+                    <h1 class="text-5xl font-bold text-gray-100">
+                        <i class="fa-solid fa-circle-nodes mr-2"></i> Network Clusters Explorer   
+        
+                    </h1>
+                    <p class="mt-4 text-lg">
+                        View the clusters and workers available in each network.
+                        <a href="https://localai.io/features/distribute/" target="_blank">
+                            <i class="fas fa-circle-info pr-2"></i>
+                        </a>
+                    </p>
+        
+                </header>
+            </div>
+        </div>
 
         <div class="container mx-auto px-4 flex-grow">
         <!-- Warning Box -->
@@ -395,6 +370,7 @@
                 }
             }
         </script>
+        <script src="/static/p2panimation.js"></script>
 
         {{template "views/partials/footer" .}}
     </div>
diff --git a/core/http/views/p2p.html b/core/http/views/p2p.html
index a8c51310..80d8ee84 100644
--- a/core/http/views/p2p.html
+++ b/core/http/views/p2p.html
@@ -8,13 +8,23 @@
     {{template "views/partials/navbar" .}}
     <div class="container mx-auto px-4 flex-grow">
         <div class="workers mt-12 text-center">
-      
-            <h2 class="text-3xl font-semibold text-gray-100 mb-8">
-                <i class="fa-solid fa-circle-nodes"></i> Distributed inference with P2P   
-                <a href="https://localai.io/features/distribute/" target="_blank">
-                    <i class="fas fa-circle-info pr-2"></i>
-                </a> 
-            </h2> 
+            <div class="animation-container">
+                <canvas id="networkCanvas"></canvas>
+                <div class="text-overlay">
+                    <header class="text-center py-12">
+                        <h1 class="text-5xl font-bold text-gray-100">
+                            <i class="fa-solid fa-circle-nodes mr-2"></i> Distributed inference with P2P 
+                        </h1>
+                        <p class="mt-4 text-lg">
+                            Distribute computation by sharing and loadbalancing instances or  sharding model weights.
+                            <a href="https://localai.io/features/distribute/" target="_blank">
+                                <i class="fas fa-circle-info pr-2"></i>
+                            </a>
+                        </p>
+            
+                    </header>
+                </div>
+            </div>      
             <h5 class="mb-4 text-justify">LocalAI uses P2P technologies to enable distribution of work between peers. It is possible to share an instance with Federation and/or split the weights of a model across peers (only available with llama.cpp models). You can now share computational resources between your devices or your friends!</h5>
             <!-- Warning box if p2p token is empty and p2p is enabled -->
             {{ if and .IsP2PEnabled (eq .P2PToken "") }}
@@ -144,7 +154,7 @@
 
     {{template "views/partials/footer" .}}
 </div>
-
+<script src="/static/p2panimation.js"></script>
 <style>
     .token {
         word-break: break-all;
diff --git a/core/http/views/partials/head.html b/core/http/views/partials/head.html
index 954fc625..cfbc30f5 100644
--- a/core/http/views/partials/head.html
+++ b/core/http/views/partials/head.html
@@ -6,6 +6,7 @@
     rel="stylesheet"
     href="/static/assets/highlightjs.css"
   />
+  <script defer src="/static/assets/anime.min.js"></script>
     <script
     defer
     src="/static/assets/highlightjs.js"
@@ -47,8 +48,68 @@
       },
     };
   </script>
-    <link href="/static/assets/fontawesome/css/fontawesome.css" rel="stylesheet" />
-    <link href="/static/assets/fontawesome/css/brands.css" rel="stylesheet" />
-    <link href="/static/assets/fontawesome/css/solid.css" rel="stylesheet" />
-    <script src="/static/assets/htmx.js" crossorigin="anonymous"></script>
+  <link href="/static/assets/fontawesome/css/fontawesome.css" rel="stylesheet" />
+  <link href="/static/assets/fontawesome/css/brands.css" rel="stylesheet" />
+  <link href="/static/assets/fontawesome/css/solid.css" rel="stylesheet" />
+  <script src="/static/assets/htmx.js" crossorigin="anonymous"></script>
+  <!-- P2P Animation START -->
+  <style>
+    .animation-container {
+        position: relative;
+        width: 100%;
+        height: 25vh;
+        display: flex;
+        justify-content: center;
+        align-items: center;
+        overflow: hidden;
+    }
+
+    canvas {
+        position: absolute;
+        top: 0;
+        left: 0;
+    }
+
+    .text-overlay {
+        position: absolute;
+        top: 50%;
+        left: 50%;
+        transform: translate(-50%, -50%);
+        text-align: center;
+        z-index: 1;
+    }
+  </style>
+  <!-- P2P Animation END -->
+  <!-- Flask and node animation -->
+  <style>
+            .fa-circle-nodes {
+           /* font-size: 100px; /* Adjust the size as needed */
+            animation: rotateCircleNodes 8s linear infinite; /* Slow and fluid rotation */
+            display: inline-block;
+        }
+
+        @keyframes rotateCircleNodes {
+            0% { transform: rotate(0deg); }
+            100% { transform: rotate(360deg); }
+        }
+        /* Animation for the warning box */
+        .fa-flask {
+         /*  font-size: 100px; /* Adjust the size as needed */
+            animation: shakeFlask 3s ease-in-out infinite; /* Smooth easing and longer duration for fluidity */
+            transform-origin: bottom center;
+        }
+
+        @keyframes shakeFlask {
+            0%, 10% { transform: rotate(0deg); } /* Start and end still */
+            20% { transform: rotate(-10deg); } /* Smooth transition to left */
+            30% { transform: rotate(10deg); } /* Smooth transition to right */
+            40% { transform: rotate(-8deg); } /* Smooth transition to left */
+            50% { transform: rotate(8deg); } /* Smooth transition to right */
+            60% { transform: rotate(-5deg); } /* Smooth transition to left */
+            70% { transform: rotate(5deg); } /* Smooth transition to right */
+            80% { transform: rotate(-2deg); } /* Smooth transition to left */
+            90% { transform: rotate(2deg); } /* Smooth transition to right */
+            100% { transform: rotate(0deg); } /* Return to center */
+        }
+  </style>
 </head>
\ No newline at end of file
diff --git a/embedded/webui_static.yaml b/embedded/webui_static.yaml
index c65d64ee..6120ccb7 100644
--- a/embedded/webui_static.yaml
+++ b/embedded/webui_static.yaml
@@ -57,4 +57,6 @@
 - filename: "KFOlCnqEu92Fr1MmYUtfBBc9.ttf"
   url: "https://fonts.gstatic.com/s/roboto/v30/KFOlCnqEu92Fr1MmYUtfBBc9.ttf"
   sha: "361a50f8a6c816ba4306c5290b7e487a726e1b4dcc3d8d7e4acf1fc2dae9f551"
-
+- filename: "anime.js"
+  url: "https://raw.githubusercontent.com/juliangarnier/anime/master/lib/anime.min.js"
+  sha: "bceef94f964481f7680d95e7fbbe5a8c20d3945a926a754874898a578db7c7ab"
\ No newline at end of file

From bb6d06f0d13642b653ecf1d8762b2b84e3c7e5fd Mon Sep 17 00:00:00 2001
From: Ettore Di Giacinto <mudler@users.noreply.github.com>
Date: Mon, 19 Aug 2024 19:58:06 +0200
Subject: [PATCH 195/235] fix(parler-tts): bump and require after build type
 deps (#3272)

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
---
 backend/python/parler-tts/requirements-after.txt | 1 +
 backend/python/parler-tts/requirements.txt       | 1 -
 2 files changed, 1 insertion(+), 1 deletion(-)
 create mode 100644 backend/python/parler-tts/requirements-after.txt

diff --git a/backend/python/parler-tts/requirements-after.txt b/backend/python/parler-tts/requirements-after.txt
new file mode 100644
index 00000000..63599411
--- /dev/null
+++ b/backend/python/parler-tts/requirements-after.txt
@@ -0,0 +1 @@
+git+https://github.com/huggingface/parler-tts.git@8e465f1b5fcd223478e07175cb40494d19ffbe17
\ No newline at end of file
diff --git a/backend/python/parler-tts/requirements.txt b/backend/python/parler-tts/requirements.txt
index 297ddd0b..22ae46ad 100644
--- a/backend/python/parler-tts/requirements.txt
+++ b/backend/python/parler-tts/requirements.txt
@@ -1,4 +1,3 @@
 grpcio==1.65.1
 protobuf
-git+https://github.com/huggingface/parler-tts.git@10016fb0300c0dc31a0fb70e26f3affee7b62f16
 certifi
\ No newline at end of file

From 73c9b3598d71b30876b446dd4edf25a9b3ce0e93 Mon Sep 17 00:00:00 2001
From: Ettore Di Giacinto <mudler@users.noreply.github.com>
Date: Mon, 19 Aug 2024 19:58:17 +0200
Subject: [PATCH 196/235] chore(p2p): make commands easier to copy-paste
 (#3273)

chore(p2p): make box easier to copy-paste

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
---
 core/http/views/p2p.html           | 63 ++++++++++++++++++++----------
 core/http/views/partials/head.html | 10 +++++
 core/p2p/p2p.go                    |  2 +-
 3 files changed, 54 insertions(+), 21 deletions(-)

diff --git a/core/http/views/p2p.html b/core/http/views/p2p.html
index 80d8ee84..52548e33 100644
--- a/core/http/views/p2p.html
+++ b/core/http/views/p2p.html
@@ -1,10 +1,9 @@
 <!DOCTYPE html>
 <html lang="en">
 {{template "views/partials/head" .}}
-
 <body class="bg-gray-900 text-gray-200">
-<div class="flex flex-col min-h-screen">
-   
+<div class="flex flex-col min-h-screen" x-data="{}">
+
     {{template "views/partials/navbar" .}}
     <div class="container mx-auto px-4 flex-grow">
         <div class="workers mt-12 text-center">
@@ -26,6 +25,16 @@
                 </div>
             </div>      
             <h5 class="mb-4 text-justify">LocalAI uses P2P technologies to enable distribution of work between peers. It is possible to share an instance with Federation and/or split the weights of a model across peers (only available with llama.cpp models). You can now share computational resources between your devices or your friends!</h5>
+
+            <div class="bg-gray-800 p-6 rounded-lg shadow-lg mb-12 text-left">
+                <p class="text-lg font-bold mb-4 mt-1">
+                    Network token
+                    <i class="fa-solid fa-copy copy-icon float-right"></i>
+                </p>
+                <code class="block bg-gray-700 text-yellow-300 p-4 rounded-lg break-words" @click="copyClipboard($el.textContent)">{{.P2PToken}}</code><br>
+                The network token can be used to either share the instance or join a federation or a worker network. Below you will find a few examples on how to start a new instance or a worker with the token and you will be able to see the available workers and federated nodes.
+            </div>
+        
             <!-- Warning box if p2p token is empty and p2p is enabled -->
             {{ if and .IsP2PEnabled (eq .P2PToken "") }}
             <div class="bg-red-500 p-4 rounded-lg shadow-lg mb-12 text-left">
@@ -50,7 +59,6 @@
 
                 <h3 class="text-2xl font-semibold text-gray-100 mb-6"><i class="fa-solid fa-book"></i> Start a federated instance</h3>
 
-                
                 <!-- Tabs navigation -->
                 <ul class="mb-5 flex list-none flex-row flex-wrap ps-0" role="tablist" data-twe-nav-ref>
                     <li role="presentation" class="flex-auto text-center">
@@ -62,22 +70,25 @@
                 </ul>
 
                 <!-- Tabs content -->
-                <div class="mb-6">
-                    
+                <div class="mb-6">                    
                     <div class="tabcontent hidden opacity-100 transition-opacity duration-150 ease-linear data-[twe-tab-active]:block p-4" id="tabs-federated-cli" role="tabpanel" aria-labelledby="tabs-federated-cli" data-twe-tab-active>
 
-
-                        <p class="mb-2">To start a new instance to share:</p>
-                        <code class="block bg-gray-700 text-yellow-300 p-4 rounded-lg break-words">
+                        <p class="text-lg font-bold mb-4 mt-1">
+                            To start a new instance to share:
+                            <i class="fa-solid fa-copy copy-icon float-right"></i>
+                        </p>
+                        <code class="block bg-gray-700 text-yellow-300 p-4 rounded-lg break-words" @click="copyClipboard($el.textContent)">
                             # Start a new instance to share with --federated and a TOKEN<br>
                             export TOKEN="<span class="token">{{.P2PToken}}</span>"<br>
                             local-ai run --federated --p2p
                         </code>
 
                         <p class="mt-2">Note: If you don't have a token do not specify it and use the generated one that you can find in this page.</p>
-
-                        <p class="mb-2">To start a new federated load balancer:</p>
-                        <code class="block bg-gray-700 text-yellow-300 p-4 rounded-lg break-words">
+                        <p class="text-lg font-bold mb-4 mt-1">
+                            To start a new federated load balancer:
+                            <i class="fa-solid fa-copy copy-icon float-right"></i>
+                        </p>
+                        <code class="block bg-gray-700 text-yellow-300 p-4 rounded-lg break-words" @click="copyClipboard($el.textContent)">
                             export TOKEN="<span class="token">{{.P2PToken}}</span>"<br>
                             local-ai federated
                         </code>
@@ -87,13 +98,19 @@
                         <p class="mt-2">For all the options available, please refer to the <a href="https://localai.io/features/distribute/#starting-workers" target="_blank" class="text-yellow-300 hover:text-yellow-400">documentation</a>.</p>
                     </div>
                     <div class="tabcontent hidden opacity-0 transition-opacity duration-150 ease-linear data-[twe-tab-active]:block p-4" id="tabs-federated-docker" role="tabpanel" aria-labelledby="tabs-federated-docker">
-                        <p class="mb-2">To start a new federated instance:</p>
-                        <code class="block bg-gray-700 text-yellow-300 p-4 rounded-lg break-words">
+                        <p class="text-lg font-bold mb-4 mt-1">
+                            To start a new federated instance:
+                            <i class="fa-solid fa-copy copy-icon float-right"></i>
+                        </p>
+                        <code class="block bg-gray-700 text-yellow-300 p-4 rounded-lg break-words" @click="copyClipboard($el.textContent)">
                             docker run -ti --net host -e TOKEN="<span class="token">{{.P2PToken}}</span>" --name local-ai -p 8080:8080 localai/localai:latest-cpu run --federated --p2p
                         </code>
 
-                        <p class="mb-2">To start a new federated server (port to 9090):</p>
-                        <code class="block bg-gray-700 text-yellow-300 p-4 rounded-lg break-words">
+                        <p class="text-lg font-bold mb-4 mt-1">
+                            To start a new federated server with Docker (port to 9090):
+                            <i class="fa-solid fa-copy copy-icon float-right"></i>
+                        </p>
+                        <code class="block bg-gray-700 text-yellow-300 p-4 rounded-lg break-words" @click="copyClipboard($el.textContent)">
                             docker run -ti --net host -e TOKEN="<span class="token">{{.P2PToken}}</span>" --name local-ai -p 9090:8080 localai/localai:latest-cpu federated
                         </code>
 
@@ -129,8 +146,11 @@
                 <!-- Tabs content -->
                 <div class="mb-6">
                     <div class="tabcontent hidden opacity-100 transition-opacity duration-150 ease-linear data-[twe-tab-active]:block p-4" id="tabs-cli" role="tabpanel" aria-labelledby="tabs-cli" data-twe-tab-active>
-                        <p class="mb-2">To start a new worker, run the following command:</p>
-                        <code class="block bg-gray-700 text-yellow-300 p-4 rounded-lg break-words">
+                        <p class="text-lg font-bold mb-4 mt-1">
+                            To start a new worker, run the following command:
+                            <i class="fa-solid fa-copy copy-icon float-right"></i>
+                        </p>
+                        <code class="block bg-gray-700 text-yellow-300 p-4 rounded-lg break-words" @click="copyClipboard($el.textContent)">
                             export TOKEN="<span class="token">{{.P2PToken}}</span>"<br>
                             local-ai worker p2p-llama-cpp-rpc
                         </code>
@@ -138,8 +158,11 @@
                         <p class="mt-2">For all the options available, please refer to the <a href="https://localai.io/features/distribute/#starting-workers" target="_blank" class="text-yellow-300 hover:text-yellow-400">documentation</a>.</p>
                     </div>
                     <div class="tabcontent hidden opacity-0 transition-opacity duration-150 ease-linear data-[twe-tab-active]:block p-4" id="tabs-docker" role="tabpanel" aria-labelledby="tabs-docker">
-                        <p class="mb-2">To start a new worker with docker, run the following command:</p>
-                        <code class="block bg-gray-700 text-yellow-300 p-4 rounded-lg break-words">
+                        <p class="text-lg font-bold mb-4 mt-1">
+                            To start a new worker with docker, run the following command:
+                            <i class="fa-solid fa-copy copy-icon float-right"></i>
+                        </p>
+                        <code class="block bg-gray-700 text-yellow-300 p-4 rounded-lg break-words"  @click="copyClipboard($el.textContent)">
                             docker run -ti --net host -e TOKEN="<span class="token">{{.P2PToken}}</span>" --name local-ai -p 8080:8080 localai/localai:latest-cpu worker p2p-llama-cpp-rpc
                         </code>
 
diff --git a/core/http/views/partials/head.html b/core/http/views/partials/head.html
index cfbc30f5..8d072093 100644
--- a/core/http/views/partials/head.html
+++ b/core/http/views/partials/head.html
@@ -47,6 +47,16 @@
         preflight: false,
       },
     };
+    function copyClipboard(token) {
+      navigator.clipboard.writeText(token)
+      .then(() => {
+          console.log('Text copied to clipboard:', token);
+          alert('Text copied to clipboard!');
+      })
+      .catch(err => {
+          console.error('Failed to copy token:', err);
+      });
+    }
   </script>
   <link href="/static/assets/fontawesome/css/fontawesome.css" rel="stylesheet" />
   <link href="/static/assets/fontawesome/css/brands.css" rel="stylesheet" />
diff --git a/core/p2p/p2p.go b/core/p2p/p2p.go
index 14dc00f8..a5e7715d 100644
--- a/core/p2p/p2p.go
+++ b/core/p2p/p2p.go
@@ -379,7 +379,7 @@ func newNodeOpts(token string) ([]node.Option, error) {
 	}
 	libp2ploglevel := os.Getenv("LOCALAI_LIBP2P_LOGLEVEL")
 	if libp2ploglevel == "" {
-		libp2ploglevel = "info"
+		libp2ploglevel = "fatal"
 	}
 	c := config.Config{
 		Limit: config.ResourceLimit{

From 0c84c7b1ccd50bb01442bdbe6d95f48e48fbf98c Mon Sep 17 00:00:00 2001
From: Ettore Di Giacinto <mudler@users.noreply.github.com>
Date: Mon, 19 Aug 2024 20:40:55 +0200
Subject: [PATCH 197/235] chore(ux): allow to create and drag dots in the
 animation (#3287)

Make the animation more interactive!

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
---
 core/http/static/p2panimation.js | 99 +++++++++++++++++++++++++++-----
 1 file changed, 85 insertions(+), 14 deletions(-)

diff --git a/core/http/static/p2panimation.js b/core/http/static/p2panimation.js
index f31ed3f9..d5599c63 100644
--- a/core/http/static/p2panimation.js
+++ b/core/http/static/p2panimation.js
@@ -1,29 +1,46 @@
 const canvas = document.getElementById('networkCanvas');
 const ctx = canvas.getContext('2d');
 
-canvas.width = window.innerWidth;
-canvas.height = window.innerHeight;
+let particles = [];
+let isDragging = false;
+let dragParticle = null;
+const maxParticles = 100; // Maximum number of particles
+const dragAreaRadius = 10; // Increased area for easier dragging
 
-const particles = [];
-const lines = [];
+// Function to resize canvas based on aspect ratio
+function resizeCanvas() {
+    canvas.width = window.innerWidth;
+    canvas.height = Math.min(window.innerHeight, 400); // Maintain a max height of 400px
+}
+
+// Adjust the canvas size when the window resizes
+window.addEventListener('resize', resizeCanvas);
+
+// Initialize canvas size
+resizeCanvas();
 
 class Particle {
     constructor(x, y) {
         this.x = x;
         this.y = y;
-        this.radius = 2 + Math.random() * 2;
-        this.color = `rgba(0, 255, 204, ${Math.random()})`;
-        this.speed = Math.random() * 2 + 1;
-        this.angle = Math.random() * Math.PI * 2;
+        this.radius = 4;
+        this.color = `rgba(0, 255, 204, 1)`;
+        this.speedX = (Math.random() - 0.5) * 2; // Random horizontal speed
+        this.speedY = (Math.random() - 0.5) * 2; // Random vertical speed
     }
 
     update() {
-        this.x += Math.cos(this.angle) * this.speed;
-        this.y += Math.sin(this.angle) * this.speed;
+        if (!isDragging || dragParticle !== this) {
+            this.x += this.speedX;
+            this.y += this.speedY;
 
-        if (this.x < 0 || this.x > canvas.width || this.y < 0 || this.y > canvas.height) {
-            this.x = Math.random() * canvas.width;
-            this.y = Math.random() * canvas.height;
+            // Bounce off the edges of the canvas
+            if (this.x < 0 || this.x > canvas.width) {
+                this.speedX *= -1;
+            }
+            if (this.y < 0 || this.y > canvas.height) {
+                this.speedY *= -1;
+            }
         }
     }
 
@@ -33,6 +50,11 @@ class Particle {
         ctx.fillStyle = this.color;
         ctx.fill();
     }
+
+    isMouseOver(mouseX, mouseY) {
+        // Increase the draggable area by checking if the mouse is within a larger radius
+        return Math.hypot(mouseX - this.x, mouseY - this.y) < dragAreaRadius;
+    }
 }
 
 function connectParticles() {
@@ -69,5 +91,54 @@ function animate() {
     requestAnimationFrame(animate);
 }
 
-initParticles(100);
+// Handle mouse click to create a new particle
+canvas.addEventListener('click', (e) => {
+    const rect = canvas.getBoundingClientRect();
+    const mouseX = e.clientX - rect.left;
+    const mouseY = e.clientY - rect.top;
+
+    const newParticle = new Particle(mouseX, mouseY);
+    particles.push(newParticle);
+
+    // Limit the number of particles to the maximum
+    if (particles.length > maxParticles) {
+        particles.shift(); // Remove the oldest particle
+    }
+});
+
+// Handle mouse down for dragging
+canvas.addEventListener('mousedown', (e) => {
+    const rect = canvas.getBoundingClientRect();
+    const mouseX = e.clientX - rect.left;
+    const mouseY = e.clientY - rect.top;
+
+    for (let particle of particles) {
+        if (particle.isMouseOver(mouseX, mouseY)) {
+            isDragging = true;
+            dragParticle = particle;
+            break;
+        }
+    }
+});
+
+// Handle mouse move for dragging
+canvas.addEventListener('mousemove', (e) => {
+    if (isDragging && dragParticle) {
+        const rect = canvas.getBoundingClientRect();
+        const mouseX = e.clientX - rect.left;
+        const mouseY = e.clientY - rect.top;
+
+        dragParticle.x = mouseX;
+        dragParticle.y = mouseY;
+    }
+});
+
+// Handle mouse up to stop dragging
+canvas.addEventListener('mouseup', () => {
+    isDragging = false;
+    dragParticle = null;
+});
+
+// Initialize and start the animation
+initParticles(maxParticles);
 animate();
\ No newline at end of file

From 42fba91521fe1df684cd57f7c12aaa3651cdd318 Mon Sep 17 00:00:00 2001
From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com>
Date: Mon, 19 Aug 2024 21:23:26 +0000
Subject: [PATCH 198/235] chore(deps): Bump langchain from 0.2.12 to 0.2.14 in
 /examples/langchain-chroma (#3275)

chore(deps): Bump langchain in /examples/langchain-chroma

Bumps [langchain](https://github.com/langchain-ai/langchain) from 0.2.12 to 0.2.14.
- [Release notes](https://github.com/langchain-ai/langchain/releases)
- [Commits](https://github.com/langchain-ai/langchain/compare/langchain==0.2.12...langchain==0.2.14)

---
updated-dependencies:
- dependency-name: langchain
  dependency-type: direct:production
  update-type: version-update:semver-patch
...

Signed-off-by: dependabot[bot] <support@github.com>
Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>
---
 examples/langchain-chroma/requirements.txt | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/examples/langchain-chroma/requirements.txt b/examples/langchain-chroma/requirements.txt
index 16701ca3..69413810 100644
--- a/examples/langchain-chroma/requirements.txt
+++ b/examples/langchain-chroma/requirements.txt
@@ -1,4 +1,4 @@
-langchain==0.2.12
+langchain==0.2.14
 openai==1.40.5
 chromadb==0.5.5
 llama-index==0.10.65
\ No newline at end of file

From 8758aa4ecf437baae4d15331f65ff11cc07fa8b8 Mon Sep 17 00:00:00 2001
From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com>
Date: Mon, 19 Aug 2024 22:31:06 +0000
Subject: [PATCH 199/235] chore(deps): Bump grpcio from 1.65.4 to 1.65.5 in
 /backend/python/openvoice (#3282)

chore(deps): Bump grpcio in /backend/python/openvoice

Bumps [grpcio](https://github.com/grpc/grpc) from 1.65.4 to 1.65.5.
- [Release notes](https://github.com/grpc/grpc/releases)
- [Changelog](https://github.com/grpc/grpc/blob/master/doc/grpc_release_schedule.md)
- [Commits](https://github.com/grpc/grpc/compare/v1.65.4...v1.65.5)

---
updated-dependencies:
- dependency-name: grpcio
  dependency-type: direct:production
  update-type: version-update:semver-patch
...

Signed-off-by: dependabot[bot] <support@github.com>
Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>
---
 backend/python/openvoice/requirements-intel.txt | 2 +-
 backend/python/openvoice/requirements.txt       | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/backend/python/openvoice/requirements-intel.txt b/backend/python/openvoice/requirements-intel.txt
index 85618c86..25921f8f 100644
--- a/backend/python/openvoice/requirements-intel.txt
+++ b/backend/python/openvoice/requirements-intel.txt
@@ -2,7 +2,7 @@
 intel-extension-for-pytorch
 torch
 optimum[openvino]
-grpcio==1.65.4
+grpcio==1.65.5
 protobuf
 librosa==0.9.1
 faster-whisper==1.0.3
diff --git a/backend/python/openvoice/requirements.txt b/backend/python/openvoice/requirements.txt
index cc40adbc..13ce9c28 100644
--- a/backend/python/openvoice/requirements.txt
+++ b/backend/python/openvoice/requirements.txt
@@ -1,4 +1,4 @@
-grpcio==1.65.4
+grpcio==1.65.5
 protobuf
 librosa
 faster-whisper

From 9337a01e9d060015c11eff24fe8a04a8d280b955 Mon Sep 17 00:00:00 2001
From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com>
Date: Mon, 19 Aug 2024 22:38:54 +0000
Subject: [PATCH 200/235] chore(deps): Bump docs/themes/hugo-theme-relearn from
 `8b14837` to `82a5e98` (#3274)

chore(deps): Bump docs/themes/hugo-theme-relearn

Bumps [docs/themes/hugo-theme-relearn](https://github.com/McShelby/hugo-theme-relearn) from `8b14837` to `82a5e98`.
- [Release notes](https://github.com/McShelby/hugo-theme-relearn/releases)
- [Commits](https://github.com/McShelby/hugo-theme-relearn/compare/8b148373366a643684eaa4b3fc5f8cfc4f9d4341...82a5e9876c67f4c86b9e37e825e27c951ce18d54)

---
updated-dependencies:
- dependency-name: docs/themes/hugo-theme-relearn
  dependency-type: direct:production
...

Signed-off-by: dependabot[bot] <support@github.com>
Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>
---
 docs/themes/hugo-theme-relearn | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/docs/themes/hugo-theme-relearn b/docs/themes/hugo-theme-relearn
index 8b148373..82a5e987 160000
--- a/docs/themes/hugo-theme-relearn
+++ b/docs/themes/hugo-theme-relearn
@@ -1 +1 @@
-Subproject commit 8b148373366a643684eaa4b3fc5f8cfc4f9d4341
+Subproject commit 82a5e9876c67f4c86b9e37e825e27c951ce18d54

From a85c4f96e0886d614d93828132338c2df4a49d04 Mon Sep 17 00:00:00 2001
From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com>
Date: Mon, 19 Aug 2024 23:19:43 +0000
Subject: [PATCH 201/235] chore(deps): Bump grpcio from 1.65.4 to 1.65.5 in
 /backend/python/bark (#3285)

Bumps [grpcio](https://github.com/grpc/grpc) from 1.65.4 to 1.65.5.
- [Release notes](https://github.com/grpc/grpc/releases)
- [Changelog](https://github.com/grpc/grpc/blob/master/doc/grpc_release_schedule.md)
- [Commits](https://github.com/grpc/grpc/compare/v1.65.4...v1.65.5)

---
updated-dependencies:
- dependency-name: grpcio
  dependency-type: direct:production
  update-type: version-update:semver-patch
...

Signed-off-by: dependabot[bot] <support@github.com>
Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>
---
 backend/python/bark/requirements.txt | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/backend/python/bark/requirements.txt b/backend/python/bark/requirements.txt
index 93f9fb78..08bfaec3 100644
--- a/backend/python/bark/requirements.txt
+++ b/backend/python/bark/requirements.txt
@@ -1,4 +1,4 @@
 bark==0.1.5
-grpcio==1.65.4
+grpcio==1.65.5
 protobuf
 certifi
\ No newline at end of file

From 20f9f267e8ad02e2771dc3720f3c4db9e75a0d3b Mon Sep 17 00:00:00 2001
From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com>
Date: Mon, 19 Aug 2024 23:37:57 +0000
Subject: [PATCH 202/235] chore(deps): Bump grpcio from 1.65.1 to 1.65.5 in
 /backend/python/parler-tts (#3283)

chore(deps): Bump grpcio in /backend/python/parler-tts

Bumps [grpcio](https://github.com/grpc/grpc) from 1.65.1 to 1.65.5.
- [Release notes](https://github.com/grpc/grpc/releases)
- [Changelog](https://github.com/grpc/grpc/blob/master/doc/grpc_release_schedule.md)
- [Commits](https://github.com/grpc/grpc/compare/v1.65.1...v1.65.5)

---
updated-dependencies:
- dependency-name: grpcio
  dependency-type: direct:production
  update-type: version-update:semver-patch
...

Signed-off-by: dependabot[bot] <support@github.com>
Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>
---
 backend/python/parler-tts/requirements.txt | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/backend/python/parler-tts/requirements.txt b/backend/python/parler-tts/requirements.txt
index 22ae46ad..920971ce 100644
--- a/backend/python/parler-tts/requirements.txt
+++ b/backend/python/parler-tts/requirements.txt
@@ -1,3 +1,3 @@
-grpcio==1.65.1
+grpcio==1.65.5
 protobuf
 certifi
\ No newline at end of file

From b9da06dafe681d0003ce2b6b94eb1741ab0eb144 Mon Sep 17 00:00:00 2001
From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com>
Date: Tue, 20 Aug 2024 01:57:59 +0000
Subject: [PATCH 203/235] chore(deps): Bump grpcio from 1.65.4 to 1.65.5 in
 /backend/python/common/template (#3291)

chore(deps): Bump grpcio in /backend/python/common/template

Bumps [grpcio](https://github.com/grpc/grpc) from 1.65.4 to 1.65.5.
- [Release notes](https://github.com/grpc/grpc/releases)
- [Changelog](https://github.com/grpc/grpc/blob/master/doc/grpc_release_schedule.md)
- [Commits](https://github.com/grpc/grpc/compare/v1.65.4...v1.65.5)

---
updated-dependencies:
- dependency-name: grpcio
  dependency-type: direct:production
  update-type: version-update:semver-patch
...

Signed-off-by: dependabot[bot] <support@github.com>
Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>
---
 backend/python/common/template/requirements.txt | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/backend/python/common/template/requirements.txt b/backend/python/common/template/requirements.txt
index ad97e2ae..35173155 100644
--- a/backend/python/common/template/requirements.txt
+++ b/backend/python/common/template/requirements.txt
@@ -1,2 +1,2 @@
-grpcio==1.65.4
+grpcio==1.65.5
 protobuf
\ No newline at end of file

From 721340ec9a35fa3962d4f96aa0aa7e2295235c23 Mon Sep 17 00:00:00 2001
From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com>
Date: Tue, 20 Aug 2024 02:09:41 +0000
Subject: [PATCH 204/235] chore(deps): Bump grpcio from 1.65.1 to 1.65.5 in
 /backend/python/sentencetransformers (#3292)

chore(deps): Bump grpcio in /backend/python/sentencetransformers

Bumps [grpcio](https://github.com/grpc/grpc) from 1.65.1 to 1.65.5.
- [Release notes](https://github.com/grpc/grpc/releases)
- [Changelog](https://github.com/grpc/grpc/blob/master/doc/grpc_release_schedule.md)
- [Commits](https://github.com/grpc/grpc/compare/v1.65.1...v1.65.5)

---
updated-dependencies:
- dependency-name: grpcio
  dependency-type: direct:production
  update-type: version-update:semver-patch
...

Signed-off-by: dependabot[bot] <support@github.com>
Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>
---
 backend/python/sentencetransformers/requirements.txt | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/backend/python/sentencetransformers/requirements.txt b/backend/python/sentencetransformers/requirements.txt
index 22ae46ad..920971ce 100644
--- a/backend/python/sentencetransformers/requirements.txt
+++ b/backend/python/sentencetransformers/requirements.txt
@@ -1,3 +1,3 @@
-grpcio==1.65.1
+grpcio==1.65.5
 protobuf
 certifi
\ No newline at end of file

From 9ba108bd5babb1633fb81c0e8a6c38be7731f9c2 Mon Sep 17 00:00:00 2001
From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com>
Date: Tue, 20 Aug 2024 02:36:20 +0000
Subject: [PATCH 205/235] chore(deps): Bump grpcio from 1.65.4 to 1.65.5 in
 /backend/python/vall-e-x (#3294)

chore(deps): Bump grpcio in /backend/python/vall-e-x

Bumps [grpcio](https://github.com/grpc/grpc) from 1.65.4 to 1.65.5.
- [Release notes](https://github.com/grpc/grpc/releases)
- [Changelog](https://github.com/grpc/grpc/blob/master/doc/grpc_release_schedule.md)
- [Commits](https://github.com/grpc/grpc/compare/v1.65.4...v1.65.5)

---
updated-dependencies:
- dependency-name: grpcio
  dependency-type: direct:production
  update-type: version-update:semver-patch
...

Signed-off-by: dependabot[bot] <support@github.com>
Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>
---
 backend/python/vall-e-x/requirements.txt | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/backend/python/vall-e-x/requirements.txt b/backend/python/vall-e-x/requirements.txt
index 2a8d18b1..920971ce 100644
--- a/backend/python/vall-e-x/requirements.txt
+++ b/backend/python/vall-e-x/requirements.txt
@@ -1,3 +1,3 @@
-grpcio==1.65.4
+grpcio==1.65.5
 protobuf
 certifi
\ No newline at end of file

From 0c31d1a4c897f9d264d051a6323744923e455d01 Mon Sep 17 00:00:00 2001
From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com>
Date: Tue, 20 Aug 2024 03:37:40 +0000
Subject: [PATCH 206/235] chore(deps): Bump grpcio from 1.65.4 to 1.65.5 in
 /backend/python/transformers (#3296)

chore(deps): Bump grpcio in /backend/python/transformers

Bumps [grpcio](https://github.com/grpc/grpc) from 1.65.4 to 1.65.5.
- [Release notes](https://github.com/grpc/grpc/releases)
- [Changelog](https://github.com/grpc/grpc/blob/master/doc/grpc_release_schedule.md)
- [Commits](https://github.com/grpc/grpc/compare/v1.65.4...v1.65.5)

---
updated-dependencies:
- dependency-name: grpcio
  dependency-type: direct:production
  update-type: version-update:semver-patch
...

Signed-off-by: dependabot[bot] <support@github.com>
Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>
---
 backend/python/transformers/requirements.txt | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/backend/python/transformers/requirements.txt b/backend/python/transformers/requirements.txt
index 318560d9..5531ea0e 100644
--- a/backend/python/transformers/requirements.txt
+++ b/backend/python/transformers/requirements.txt
@@ -1,4 +1,4 @@
-grpcio==1.65.4
+grpcio==1.65.5
 protobuf
 certifi
 setuptools==69.5.1 # https://github.com/mudler/LocalAI/issues/2406
\ No newline at end of file

From 10024905a008dbebce077f8dc43670c304045c0e Mon Sep 17 00:00:00 2001
From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com>
Date: Tue, 20 Aug 2024 04:36:21 +0000
Subject: [PATCH 207/235] chore(deps): Bump grpcio from 1.65.0 to 1.65.5 in
 /backend/python/exllama (#3299)

chore(deps): Bump grpcio in /backend/python/exllama

Bumps [grpcio](https://github.com/grpc/grpc) from 1.65.0 to 1.65.5.
- [Release notes](https://github.com/grpc/grpc/releases)
- [Changelog](https://github.com/grpc/grpc/blob/master/doc/grpc_release_schedule.md)
- [Commits](https://github.com/grpc/grpc/compare/v1.65.0...v1.65.5)

---
updated-dependencies:
- dependency-name: grpcio
  dependency-type: direct:production
  update-type: version-update:semver-patch
...

Signed-off-by: dependabot[bot] <support@github.com>
Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>
---
 backend/python/exllama/requirements.txt | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/backend/python/exllama/requirements.txt b/backend/python/exllama/requirements.txt
index 835671a2..99b81098 100644
--- a/backend/python/exllama/requirements.txt
+++ b/backend/python/exllama/requirements.txt
@@ -1,4 +1,4 @@
-grpcio==1.65.0
+grpcio==1.65.5
 protobuf
 certifi
 setuptools
\ No newline at end of file

From a7a27a5082413a40a363601b1c9cbe82794882da Mon Sep 17 00:00:00 2001
From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com>
Date: Tue, 20 Aug 2024 05:39:54 +0000
Subject: [PATCH 208/235] chore(deps): Bump grpcio from 1.65.4 to 1.65.5 in
 /backend/python/vllm (#3301)

Bumps [grpcio](https://github.com/grpc/grpc) from 1.65.4 to 1.65.5.
- [Release notes](https://github.com/grpc/grpc/releases)
- [Changelog](https://github.com/grpc/grpc/blob/master/doc/grpc_release_schedule.md)
- [Commits](https://github.com/grpc/grpc/compare/v1.65.4...v1.65.5)

---
updated-dependencies:
- dependency-name: grpcio
  dependency-type: direct:production
  update-type: version-update:semver-patch
...

Signed-off-by: dependabot[bot] <support@github.com>
Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>
---
 backend/python/vllm/requirements.txt | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/backend/python/vllm/requirements.txt b/backend/python/vllm/requirements.txt
index 99dc865e..99b81098 100644
--- a/backend/python/vllm/requirements.txt
+++ b/backend/python/vllm/requirements.txt
@@ -1,4 +1,4 @@
-grpcio==1.65.4
+grpcio==1.65.5
 protobuf
 certifi
 setuptools
\ No newline at end of file

From f0702e5ff86e3d4a240dddd2357eef42c3ce0da9 Mon Sep 17 00:00:00 2001
From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com>
Date: Tue, 20 Aug 2024 06:29:48 +0000
Subject: [PATCH 209/235] chore(deps): Bump langchain from 0.2.12 to 0.2.14 in
 /examples/functions (#3304)

Bumps [langchain](https://github.com/langchain-ai/langchain) from 0.2.12 to 0.2.14.
- [Release notes](https://github.com/langchain-ai/langchain/releases)
- [Commits](https://github.com/langchain-ai/langchain/compare/langchain==0.2.12...langchain==0.2.14)

---
updated-dependencies:
- dependency-name: langchain
  dependency-type: direct:production
  update-type: version-update:semver-patch
...

Signed-off-by: dependabot[bot] <support@github.com>
Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>
---
 examples/functions/requirements.txt | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/examples/functions/requirements.txt b/examples/functions/requirements.txt
index d24cb5ec..3f98581e 100644
--- a/examples/functions/requirements.txt
+++ b/examples/functions/requirements.txt
@@ -1,2 +1,2 @@
-langchain==0.2.12
+langchain==0.2.14
 openai==1.40.4

From 7fade2ffbde911071cf41f56f11cae86eefc072d Mon Sep 17 00:00:00 2001
From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com>
Date: Tue, 20 Aug 2024 07:12:29 +0000
Subject: [PATCH 210/235] chore(deps): Bump numpy from 2.0.1 to 2.1.0 in
 /examples/langchain/langchainpy-localai-example (#3310)

chore(deps): Bump numpy

Bumps [numpy](https://github.com/numpy/numpy) from 2.0.1 to 2.1.0.
- [Release notes](https://github.com/numpy/numpy/releases)
- [Changelog](https://github.com/numpy/numpy/blob/main/doc/RELEASE_WALKTHROUGH.rst)
- [Commits](https://github.com/numpy/numpy/compare/v2.0.1...v2.1.0)

---
updated-dependencies:
- dependency-name: numpy
  dependency-type: direct:production
  update-type: version-update:semver-minor
...

Signed-off-by: dependabot[bot] <support@github.com>
Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>
---
 examples/langchain/langchainpy-localai-example/requirements.txt | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/examples/langchain/langchainpy-localai-example/requirements.txt b/examples/langchain/langchainpy-localai-example/requirements.txt
index b9d161c5..479a15c0 100644
--- a/examples/langchain/langchainpy-localai-example/requirements.txt
+++ b/examples/langchain/langchainpy-localai-example/requirements.txt
@@ -17,7 +17,7 @@ marshmallow-enum==1.5.1
 multidict==6.0.5
 mypy-extensions==1.0.0
 numexpr==2.10.1
-numpy==2.0.1
+numpy==2.1.0
 openai==1.40.6
 openapi-schema-pydantic==1.2.4
 packaging>=23.2

From 1d651bbfade1f6f5c8168ae1fbb710ed258526eb Mon Sep 17 00:00:00 2001
From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com>
Date: Tue, 20 Aug 2024 07:36:30 +0000
Subject: [PATCH 211/235] chore(deps): Bump grpcio from 1.65.1 to 1.65.5 in
 /backend/python/mamba (#3313)

Bumps [grpcio](https://github.com/grpc/grpc) from 1.65.1 to 1.65.5.
- [Release notes](https://github.com/grpc/grpc/releases)
- [Changelog](https://github.com/grpc/grpc/blob/master/doc/grpc_release_schedule.md)
- [Commits](https://github.com/grpc/grpc/compare/v1.65.1...v1.65.5)

---
updated-dependencies:
- dependency-name: grpcio
  dependency-type: direct:production
  update-type: version-update:semver-patch
...

Signed-off-by: dependabot[bot] <support@github.com>
Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>
---
 backend/python/mamba/requirements.txt | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/backend/python/mamba/requirements.txt b/backend/python/mamba/requirements.txt
index 22ae46ad..920971ce 100644
--- a/backend/python/mamba/requirements.txt
+++ b/backend/python/mamba/requirements.txt
@@ -1,3 +1,3 @@
-grpcio==1.65.1
+grpcio==1.65.5
 protobuf
 certifi
\ No newline at end of file

From 9475a6fa05f0cd5ee2b6ca0735a8b3f40c4a36b7 Mon Sep 17 00:00:00 2001
From: Ettore Di Giacinto <mudler@users.noreply.github.com>
Date: Tue, 20 Aug 2024 10:01:38 +0200
Subject: [PATCH 212/235] chore: drop petals (#3316)

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
---
 .github/dependabot.yml                        |   4 -
 .github/workflows/test-extra.yml              |  26 ----
 Dockerfile                                    |   5 +-
 Makefile                                      |  13 +-
 backend/python/petals/Makefile                |  31 ----
 backend/python/petals/backend.py              | 140 ------------------
 backend/python/petals/install.sh              |  14 --
 backend/python/petals/requirements-cpu.txt    |   3 -
 .../python/petals/requirements-cublas11.txt   |   3 -
 .../python/petals/requirements-cublas12.txt   |   2 -
 .../python/petals/requirements-hipblas.txt    |   3 -
 backend/python/petals/requirements-intel.txt  |   6 -
 backend/python/petals/requirements.txt        |   2 -
 backend/python/petals/run.sh                  |   4 -
 backend/python/petals/test.py                 |  58 --------
 backend/python/petals/test.sh                 |   6 -
 .../content/docs/features/GPU-acceleration.md |   1 -
 .../docs/reference/compatibility-table.md     |   1 -
 18 files changed, 3 insertions(+), 319 deletions(-)
 delete mode 100644 backend/python/petals/Makefile
 delete mode 100755 backend/python/petals/backend.py
 delete mode 100755 backend/python/petals/install.sh
 delete mode 100644 backend/python/petals/requirements-cpu.txt
 delete mode 100644 backend/python/petals/requirements-cublas11.txt
 delete mode 100644 backend/python/petals/requirements-cublas12.txt
 delete mode 100644 backend/python/petals/requirements-hipblas.txt
 delete mode 100644 backend/python/petals/requirements-intel.txt
 delete mode 100644 backend/python/petals/requirements.txt
 delete mode 100755 backend/python/petals/run.sh
 delete mode 100644 backend/python/petals/test.py
 delete mode 100755 backend/python/petals/test.sh

diff --git a/.github/dependabot.yml b/.github/dependabot.yml
index 91b06ba8..5016ebdb 100644
--- a/.github/dependabot.yml
+++ b/.github/dependabot.yml
@@ -67,10 +67,6 @@ updates:
     directory: "/backend/python/parler-tts"
     schedule:
       interval: "weekly"
-  - package-ecosystem: "pip"
-    directory: "/backend/python/petals"
-    schedule:
-      interval: "weekly"
   - package-ecosystem: "pip"
     directory: "/backend/python/rerankers"
     schedule:
diff --git a/.github/workflows/test-extra.yml b/.github/workflows/test-extra.yml
index e969a95f..8b37b52d 100644
--- a/.github/workflows/test-extra.yml
+++ b/.github/workflows/test-extra.yml
@@ -168,32 +168,6 @@ jobs:
            make --jobs=5 --output-sync=target -C backend/python/transformers-musicgen
            make --jobs=5 --output-sync=target -C backend/python/transformers-musicgen test
 
-
-
-  # tests-petals:
-  #   runs-on: ubuntu-latest
-  #   steps:
-  #     - name: Clone
-  #       uses: actions/checkout@v4
-  #       with:
-  #         submodules: true
-  #     - name: Dependencies
-  #       run: |
-  #         sudo apt-get update
-  #         sudo apt-get install build-essential ffmpeg
-  #         # Install UV
-  #         curl -LsSf https://astral.sh/uv/install.sh | sh
-  #         sudo apt-get install -y ca-certificates cmake curl patch python3-pip
-  #         sudo apt-get install -y libopencv-dev
-  #         pip install --user --no-cache-dir grpcio-tools==1.64.1
-
-  #     - name: Test petals
-  #       run: |
-  #          make --jobs=5 --output-sync=target -C backend/python/petals
-  #          make --jobs=5 --output-sync=target -C backend/python/petals test
-
-
-
   # tests-bark:
   #   runs-on: ubuntu-latest
   #   steps:
diff --git a/Dockerfile b/Dockerfile
index 0dfaaa19..55a5e310 100644
--- a/Dockerfile
+++ b/Dockerfile
@@ -13,7 +13,7 @@ ARG TARGETARCH
 ARG TARGETVARIANT
 
 ENV DEBIAN_FRONTEND=noninteractive
-ENV EXTERNAL_GRPC_BACKENDS="coqui:/build/backend/python/coqui/run.sh,huggingface-embeddings:/build/backend/python/sentencetransformers/run.sh,petals:/build/backend/python/petals/run.sh,transformers:/build/backend/python/transformers/run.sh,sentencetransformers:/build/backend/python/sentencetransformers/run.sh,rerankers:/build/backend/python/rerankers/run.sh,autogptq:/build/backend/python/autogptq/run.sh,bark:/build/backend/python/bark/run.sh,diffusers:/build/backend/python/diffusers/run.sh,exllama:/build/backend/python/exllama/run.sh,openvoice:/build/backend/python/openvoice/run.sh,vall-e-x:/build/backend/python/vall-e-x/run.sh,vllm:/build/backend/python/vllm/run.sh,mamba:/build/backend/python/mamba/run.sh,exllama2:/build/backend/python/exllama2/run.sh,transformers-musicgen:/build/backend/python/transformers-musicgen/run.sh,parler-tts:/build/backend/python/parler-tts/run.sh"
+ENV EXTERNAL_GRPC_BACKENDS="coqui:/build/backend/python/coqui/run.sh,huggingface-embeddings:/build/backend/python/sentencetransformers/run.sh,transformers:/build/backend/python/transformers/run.sh,sentencetransformers:/build/backend/python/sentencetransformers/run.sh,rerankers:/build/backend/python/rerankers/run.sh,autogptq:/build/backend/python/autogptq/run.sh,bark:/build/backend/python/bark/run.sh,diffusers:/build/backend/python/diffusers/run.sh,exllama:/build/backend/python/exllama/run.sh,openvoice:/build/backend/python/openvoice/run.sh,vall-e-x:/build/backend/python/vall-e-x/run.sh,vllm:/build/backend/python/vllm/run.sh,mamba:/build/backend/python/mamba/run.sh,exllama2:/build/backend/python/exllama2/run.sh,transformers-musicgen:/build/backend/python/transformers-musicgen/run.sh,parler-tts:/build/backend/python/parler-tts/run.sh"
 
 
 RUN apt-get update && \
@@ -396,9 +396,6 @@ RUN if [[ ( "${EXTRA_BACKENDS}" =~ "vall-e-x" || -z "${EXTRA_BACKENDS}" ) && "$I
     if [[ ( "${EXTRA_BACKENDS}" =~ "openvoice" || -z "${EXTRA_BACKENDS}" ) && "$IMAGE_TYPE" == "extras" ]]; then \
         make -C backend/python/openvoice \
     ; fi && \
-    if [[ ( "${EXTRA_BACKENDS}" =~ "petals" || -z "${EXTRA_BACKENDS}" ) && "$IMAGE_TYPE" == "extras" ]]; then \
-        make -C backend/python/petals \
-    ; fi && \
     if [[ ( "${EXTRA_BACKENDS}" =~ "sentencetransformers" || -z "${EXTRA_BACKENDS}" ) && "$IMAGE_TYPE" == "extras" ]]; then \
         make -C backend/python/sentencetransformers \
     ; fi && \
diff --git a/Makefile b/Makefile
index 08a6c36d..1697cc0c 100644
--- a/Makefile
+++ b/Makefile
@@ -534,10 +534,10 @@ protogen-go-clean:
 	$(RM) bin/*
 
 .PHONY: protogen-python
-protogen-python: autogptq-protogen bark-protogen coqui-protogen diffusers-protogen exllama-protogen exllama2-protogen mamba-protogen petals-protogen rerankers-protogen sentencetransformers-protogen transformers-protogen parler-tts-protogen transformers-musicgen-protogen vall-e-x-protogen vllm-protogen openvoice-protogen
+protogen-python: autogptq-protogen bark-protogen coqui-protogen diffusers-protogen exllama-protogen exllama2-protogen mamba-protogen rerankers-protogen sentencetransformers-protogen transformers-protogen parler-tts-protogen transformers-musicgen-protogen vall-e-x-protogen vllm-protogen openvoice-protogen
 
 .PHONY: protogen-python-clean
-protogen-python-clean: autogptq-protogen-clean bark-protogen-clean coqui-protogen-clean diffusers-protogen-clean exllama-protogen-clean exllama2-protogen-clean mamba-protogen-clean petals-protogen-clean sentencetransformers-protogen-clean rerankers-protogen-clean transformers-protogen-clean transformers-musicgen-protogen-clean parler-tts-protogen-clean vall-e-x-protogen-clean vllm-protogen-clean openvoice-protogen-clean
+protogen-python-clean: autogptq-protogen-clean bark-protogen-clean coqui-protogen-clean diffusers-protogen-clean exllama-protogen-clean exllama2-protogen-clean mamba-protogen-clean sentencetransformers-protogen-clean rerankers-protogen-clean transformers-protogen-clean transformers-musicgen-protogen-clean parler-tts-protogen-clean vall-e-x-protogen-clean vllm-protogen-clean openvoice-protogen-clean
 
 .PHONY: autogptq-protogen
 autogptq-protogen:
@@ -595,14 +595,6 @@ mamba-protogen:
 mamba-protogen-clean:
 	$(MAKE) -C backend/python/mamba protogen-clean
 
-.PHONY: petals-protogen
-petals-protogen:
-	$(MAKE) -C backend/python/petals protogen
-
-.PHONY: petals-protogen-clean
-petals-protogen-clean:
-	$(MAKE) -C backend/python/petals protogen-clean
-
 .PHONY: rerankers-protogen
 rerankers-protogen:
 	$(MAKE) -C backend/python/rerankers protogen
@@ -684,7 +676,6 @@ prepare-extra-conda-environments: protogen-python
 	$(MAKE) -C backend/python/vall-e-x
 	$(MAKE) -C backend/python/openvoice
 	$(MAKE) -C backend/python/exllama
-	$(MAKE) -C backend/python/petals
 	$(MAKE) -C backend/python/exllama2
 
 prepare-test-extra: protogen-python
diff --git a/backend/python/petals/Makefile b/backend/python/petals/Makefile
deleted file mode 100644
index 81b06c29..00000000
--- a/backend/python/petals/Makefile
+++ /dev/null
@@ -1,31 +0,0 @@
-.PHONY: petals
-petals: protogen
-	@echo "Creating virtual environment..."
-	bash install.sh "petals.yml"
-	@echo "Virtual environment created."
-
-.PHONY: run
-run: protogen
-	@echo "Running petals..."
-	bash run.sh
-	@echo "petals run."
-
-.PHONY: test
-test: protogen
-	@echo "Testing petals..."
-	bash test.sh
-	@echo "petals tested."
-
-.PHONY: protogen
-protogen: backend_pb2_grpc.py backend_pb2.py
-
-.PHONY: protogen-clean
-protogen-clean:
-	$(RM) backend_pb2_grpc.py backend_pb2.py
-
-backend_pb2_grpc.py backend_pb2.py:
-	python3 -m grpc_tools.protoc -I../.. --python_out=. --grpc_python_out=. backend.proto
-
-.PHONY: clean
-clean: protogen-clean
-	rm -rf venv __pycache__
\ No newline at end of file
diff --git a/backend/python/petals/backend.py b/backend/python/petals/backend.py
deleted file mode 100755
index 73bcc4a0..00000000
--- a/backend/python/petals/backend.py
+++ /dev/null
@@ -1,140 +0,0 @@
-#!/usr/bin/env python3
-from concurrent import futures
-import time
-import argparse
-import signal
-import sys
-import os
-
-import backend_pb2
-import backend_pb2_grpc
-
-import grpc
-import torch
-from transformers import AutoTokenizer
-from petals import AutoDistributedModelForCausalLM
-
-_ONE_DAY_IN_SECONDS = 60 * 60 * 24
-
-# If MAX_WORKERS are specified in the environment use it, otherwise default to 1
-MAX_WORKERS = int(os.environ.get('PYTHON_GRPC_MAX_WORKERS', '1'))
-
-# Implement the BackendServicer class with the service methods
-class BackendServicer(backend_pb2_grpc.BackendServicer):
-    """
-    A gRPC servicer that implements the Backend service defined in backend.proto.
-    """
-    def Health(self, request, context):
-        """
-        Returns a health check message.
-
-        Args:
-            request: The health check request.
-            context: The gRPC context.
-
-        Returns:
-            backend_pb2.Reply: The health check reply.
-        """
-        return backend_pb2.Reply(message=bytes("OK", 'utf-8'))
-
-    def LoadModel(self, request, context):
-        """
-        Loads a language model.
-
-        Args:
-            request: The load model request.
-            context: The gRPC context.
-
-        Returns:
-            backend_pb2.Result: The load model result.
-        """
-        try:
-            self.tokenizer = AutoTokenizer.from_pretrained(request.Model, use_fast=False, add_bos_token=False)
-            self.model = AutoDistributedModelForCausalLM.from_pretrained(request.Model)
-            self.cuda = False
-            if request.CUDA:
-                self.model = self.model.cuda()
-                self.cuda = True
-
-        except Exception as err:
-            return backend_pb2.Result(success=False, message=f"Unexpected {err=}, {type(err)=}")
-        return backend_pb2.Result(message="Model loaded successfully", success=True)
-
-    def Predict(self, request, context):
-        """
-        Generates text based on the given prompt and sampling parameters.
-
-        Args:
-            request: The predict request.
-            context: The gRPC context.
-
-        Returns:
-            backend_pb2.Result: The predict result.
-        """
-
-        inputs = self.tokenizer(request.Prompt, return_tensors="pt")["input_ids"]
-        if self.cuda:
-            inputs = inputs.cuda()
- 
-        if request.Tokens == 0:
-            # Max to max value if tokens are not specified
-            request.Tokens = 8192
-
-        # TODO: kwargs and map all parameters
-        outputs = self.model.generate(inputs, max_new_tokens=request.Tokens)
-
-        generated_text = self.tokenizer.decode(outputs[0])
-        # Remove prompt from response if present
-        if request.Prompt in generated_text:
-            generated_text = generated_text.replace(request.Prompt, "")
-
-        return backend_pb2.Result(message=bytes(generated_text, encoding='utf-8'))
-
-    def PredictStream(self, request, context):
-        """
-        Generates text based on the given prompt and sampling parameters, and streams the results.
-
-        Args:
-            request: The predict stream request.
-            context: The gRPC context.
-
-        Returns:
-            backend_pb2.Result: The predict stream result.
-        """
-        # Implement PredictStream RPC
-        #for reply in some_data_generator():
-        #    yield reply
-        # Not implemented yet
-        return self.Predict(request, context)
-
-def serve(address):
-    server = grpc.server(futures.ThreadPoolExecutor(max_workers=MAX_WORKERS))
-    backend_pb2_grpc.add_BackendServicer_to_server(BackendServicer(), server)
-    server.add_insecure_port(address)
-    server.start()
-    print("Server started. Listening on: " + address, file=sys.stderr)
-
-    # Define the signal handler function
-    def signal_handler(sig, frame):
-        print("Received termination signal. Shutting down...")
-        server.stop(0)
-        sys.exit(0)
-
-    # Set the signal handlers for SIGINT and SIGTERM
-    signal.signal(signal.SIGINT, signal_handler)
-    signal.signal(signal.SIGTERM, signal_handler)
-
-    try:
-        while True:
-            time.sleep(_ONE_DAY_IN_SECONDS)
-    except KeyboardInterrupt:
-        server.stop(0)
-
-if __name__ == "__main__":
-    parser = argparse.ArgumentParser(description="Run the gRPC server.")
-    parser.add_argument(
-        "--addr", default="localhost:50051", help="The address to bind the server to."
-    )
-    args = parser.parse_args()
-
-    serve(args.addr)
diff --git a/backend/python/petals/install.sh b/backend/python/petals/install.sh
deleted file mode 100755
index 36443ef1..00000000
--- a/backend/python/petals/install.sh
+++ /dev/null
@@ -1,14 +0,0 @@
-#!/bin/bash
-set -e
-
-source $(dirname $0)/../common/libbackend.sh
-
-# This is here because the Intel pip index is broken and returns 200 status codes for every package name, it just doesn't return any package links.
-# This makes uv think that the package exists in the Intel pip index, and by default it stops looking at other pip indexes once it finds a match.
-# We need uv to continue falling through to the pypi default index to find optimum[openvino] in the pypi index
-# the --upgrade actually allows us to *downgrade* torch to the version provided in the Intel pip index
-if [ "x${BUILD_PROFILE}" == "xintel" ]; then
-    EXTRA_PIP_INSTALL_FLAGS+=" --upgrade --index-strategy=unsafe-first-match"
-fi
-
-installRequirements
diff --git a/backend/python/petals/requirements-cpu.txt b/backend/python/petals/requirements-cpu.txt
deleted file mode 100644
index bbcdc8cd..00000000
--- a/backend/python/petals/requirements-cpu.txt
+++ /dev/null
@@ -1,3 +0,0 @@
-transformers
-accelerate
-torch
\ No newline at end of file
diff --git a/backend/python/petals/requirements-cublas11.txt b/backend/python/petals/requirements-cublas11.txt
deleted file mode 100644
index f7683016..00000000
--- a/backend/python/petals/requirements-cublas11.txt
+++ /dev/null
@@ -1,3 +0,0 @@
---extra-index-url https://download.pytorch.org/whl/cu118
-torch
-transformers
diff --git a/backend/python/petals/requirements-cublas12.txt b/backend/python/petals/requirements-cublas12.txt
deleted file mode 100644
index 4f492ddc..00000000
--- a/backend/python/petals/requirements-cublas12.txt
+++ /dev/null
@@ -1,2 +0,0 @@
-torch
-transformers
diff --git a/backend/python/petals/requirements-hipblas.txt b/backend/python/petals/requirements-hipblas.txt
deleted file mode 100644
index 8a4e2ff0..00000000
--- a/backend/python/petals/requirements-hipblas.txt
+++ /dev/null
@@ -1,3 +0,0 @@
---extra-index-url https://download.pytorch.org/whl/rocm6.0
-torch
-transformers
diff --git a/backend/python/petals/requirements-intel.txt b/backend/python/petals/requirements-intel.txt
deleted file mode 100644
index 4e3ed017..00000000
--- a/backend/python/petals/requirements-intel.txt
+++ /dev/null
@@ -1,6 +0,0 @@
---extra-index-url https://pytorch-extension.intel.com/release-whl/stable/xpu/us/
-intel-extension-for-pytorch
-torch
-optimum[openvino]
-setuptools==72.1.0 # https://github.com/mudler/LocalAI/issues/2406
-transformers
\ No newline at end of file
diff --git a/backend/python/petals/requirements.txt b/backend/python/petals/requirements.txt
deleted file mode 100644
index 0755fe01..00000000
--- a/backend/python/petals/requirements.txt
+++ /dev/null
@@ -1,2 +0,0 @@
-git+https://github.com/bigscience-workshop/petals
-certifi
\ No newline at end of file
diff --git a/backend/python/petals/run.sh b/backend/python/petals/run.sh
deleted file mode 100755
index 375c07e5..00000000
--- a/backend/python/petals/run.sh
+++ /dev/null
@@ -1,4 +0,0 @@
-#!/bin/bash
-source $(dirname $0)/../common/libbackend.sh
-
-startBackend $@
\ No newline at end of file
diff --git a/backend/python/petals/test.py b/backend/python/petals/test.py
deleted file mode 100644
index 586d2443..00000000
--- a/backend/python/petals/test.py
+++ /dev/null
@@ -1,58 +0,0 @@
-import unittest
-import subprocess
-import time
-import backend_pb2
-import backend_pb2_grpc
-
-import grpc
-
-import unittest
-import subprocess
-import time
-import grpc
-import backend_pb2_grpc
-import backend_pb2
-
-class TestBackendServicer(unittest.TestCase):
-    """
-    TestBackendServicer is the class that tests the gRPC service.
-
-    This class contains methods to test the startup and shutdown of the gRPC service.
-    """
-    def setUp(self):
-        self.service = subprocess.Popen(["python", "backend.py", "--addr", "localhost:50051"])
-        time.sleep(10)
-
-    def tearDown(self) -> None:
-        self.service.terminate()
-        self.service.wait()
-
-    def test_server_startup(self):
-        try:
-            self.setUp()
-            with grpc.insecure_channel("localhost:50051") as channel:
-                stub = backend_pb2_grpc.BackendStub(channel)
-                response = stub.Health(backend_pb2.HealthMessage())
-                self.assertEqual(response.message, b'OK')
-        except Exception as err:
-            print(err)
-            self.fail("Server failed to start")
-        finally:
-            self.tearDown()
-    def test_load_model(self):
-        """
-        This method tests if the model is loaded successfully
-        """
-        try:
-            self.setUp()
-            with grpc.insecure_channel("localhost:50051") as channel:
-                stub = backend_pb2_grpc.BackendStub(channel)
-                response = stub.LoadModel(backend_pb2.ModelOptions(Model="bigscience/bloom-560m"))
-                print(response)
-                self.assertTrue(response.success)
-                self.assertEqual(response.message, "Model loaded successfully")
-        except Exception as err:
-            print(err)
-            self.fail("LoadModel service failed")
-        finally:
-            self.tearDown()
diff --git a/backend/python/petals/test.sh b/backend/python/petals/test.sh
deleted file mode 100755
index 6940b066..00000000
--- a/backend/python/petals/test.sh
+++ /dev/null
@@ -1,6 +0,0 @@
-#!/bin/bash
-set -e
-
-source $(dirname $0)/../common/libbackend.sh
-
-runUnittests
diff --git a/docs/content/docs/features/GPU-acceleration.md b/docs/content/docs/features/GPU-acceleration.md
index b382309e..ae146ca6 100644
--- a/docs/content/docs/features/GPU-acceleration.md
+++ b/docs/content/docs/features/GPU-acceleration.md
@@ -150,7 +150,6 @@ The devices in the following list have been tested with `hipblas` images running
 | exllama | no | none |
 | exllama2 | no | none |
 | mamba | no | none |
-| petals | no | none |
 | sentencetransformers | no | none |
 | transformers-musicgen | no | none |
 | vall-e-x | no | none |
diff --git a/docs/content/docs/reference/compatibility-table.md b/docs/content/docs/reference/compatibility-table.md
index 31b47bb7..f76ad85d 100644
--- a/docs/content/docs/reference/compatibility-table.md
+++ b/docs/content/docs/reference/compatibility-table.md
@@ -44,7 +44,6 @@ LocalAI will attempt to automatically load models which are not explicitly confi
 | `transformers-musicgen`  |                    | no                       | Audio generation                | no                               | no                   | N/A |
 | [tinydream](https://github.com/symisc/tiny-dream#tiny-dreaman-embedded-header-only-stable-diffusion-inference-c-librarypixlabiotiny-dream)         | stablediffusion               | no                       | Image                 | no                                | no                   | N/A |
 | `coqui` | Coqui    | no                       | Audio generation and Voice cloning    | no                               | no                   | CPU/CUDA |
-| `petals` | Various GPTs and quantization formats | yes                      | GPT             | no | no                  | CPU/CUDA |
 | `transformers` | Various GPTs and quantization formats | yes                      | GPT, embeddings            | yes | yes****                  | CPU/CUDA/XPU |
 
 Note: any backend name listed above can be used in the `backend` field of the model configuration file (See [the advanced section]({{%relref "docs/advanced" %}})).

From a199d98fb7536bbf2db3c476253710d3485ad13a Mon Sep 17 00:00:00 2001
From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com>
Date: Tue, 20 Aug 2024 08:32:57 +0000
Subject: [PATCH 213/235] chore(deps): Bump grpcio from 1.65.4 to 1.65.5 in
 /backend/python/coqui (#3306)

Bumps [grpcio](https://github.com/grpc/grpc) from 1.65.4 to 1.65.5.
- [Release notes](https://github.com/grpc/grpc/releases)
- [Changelog](https://github.com/grpc/grpc/blob/master/doc/grpc_release_schedule.md)
- [Commits](https://github.com/grpc/grpc/compare/v1.65.4...v1.65.5)

---
updated-dependencies:
- dependency-name: grpcio
  dependency-type: direct:production
  update-type: version-update:semver-patch
...

Signed-off-by: dependabot[bot] <support@github.com>
Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>
---
 backend/python/coqui/requirements.txt | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/backend/python/coqui/requirements.txt b/backend/python/coqui/requirements.txt
index 35c62449..6125f739 100644
--- a/backend/python/coqui/requirements.txt
+++ b/backend/python/coqui/requirements.txt
@@ -1,4 +1,4 @@
 TTS==0.22.0
-grpcio==1.65.4
+grpcio==1.65.5
 protobuf
 certifi
\ No newline at end of file

From 0ccf35ba45c89f15e87a01adc1590ed674875718 Mon Sep 17 00:00:00 2001
From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com>
Date: Tue, 20 Aug 2024 08:57:58 +0000
Subject: [PATCH 214/235] chore(deps): Bump grpcio from 1.65.4 to 1.65.5 in
 /backend/python/transformers-musicgen (#3308)

chore(deps): Bump grpcio in /backend/python/transformers-musicgen

Bumps [grpcio](https://github.com/grpc/grpc) from 1.65.4 to 1.65.5.
- [Release notes](https://github.com/grpc/grpc/releases)
- [Changelog](https://github.com/grpc/grpc/blob/master/doc/grpc_release_schedule.md)
- [Commits](https://github.com/grpc/grpc/compare/v1.65.4...v1.65.5)

---
updated-dependencies:
- dependency-name: grpcio
  dependency-type: direct:production
  update-type: version-update:semver-patch
...

Signed-off-by: dependabot[bot] <support@github.com>
Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>
---
 backend/python/transformers-musicgen/requirements.txt | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/backend/python/transformers-musicgen/requirements.txt b/backend/python/transformers-musicgen/requirements.txt
index 420b968c..a0076112 100644
--- a/backend/python/transformers-musicgen/requirements.txt
+++ b/backend/python/transformers-musicgen/requirements.txt
@@ -1,4 +1,4 @@
-grpcio==1.65.4
+grpcio==1.65.5
 protobuf
 scipy==1.14.0
 certifi
\ No newline at end of file

From d903925fe7d2d23bc48b216ecc3bf3eb75d42af1 Mon Sep 17 00:00:00 2001
From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com>
Date: Tue, 20 Aug 2024 09:18:05 +0000
Subject: [PATCH 215/235] chore(deps): Bump langchain-community from 0.2.11 to
 0.2.12 in /examples/langchain/langchainpy-localai-example (#3311)

chore(deps): Bump langchain-community

Bumps [langchain-community](https://github.com/langchain-ai/langchain) from 0.2.11 to 0.2.12.
- [Release notes](https://github.com/langchain-ai/langchain/releases)
- [Commits](https://github.com/langchain-ai/langchain/compare/langchain-community==0.2.11...langchain-community==0.2.12)

---
updated-dependencies:
- dependency-name: langchain-community
  dependency-type: direct:production
  update-type: version-update:semver-patch
...

Signed-off-by: dependabot[bot] <support@github.com>
Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>
---
 examples/langchain/langchainpy-localai-example/requirements.txt | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/examples/langchain/langchainpy-localai-example/requirements.txt b/examples/langchain/langchainpy-localai-example/requirements.txt
index 479a15c0..8b28ad32 100644
--- a/examples/langchain/langchainpy-localai-example/requirements.txt
+++ b/examples/langchain/langchainpy-localai-example/requirements.txt
@@ -11,7 +11,7 @@ frozenlist==1.4.1
 greenlet==3.0.3
 idna==3.7
 langchain==0.2.12
-langchain-community==0.2.11
+langchain-community==0.2.12
 marshmallow==3.21.3
 marshmallow-enum==1.5.1
 multidict==6.0.5

From dfa183551edd008952a843945c4e6d740123da65 Mon Sep 17 00:00:00 2001
From: Ettore Di Giacinto <mudler@users.noreply.github.com>
Date: Tue, 20 Aug 2024 12:14:47 +0200
Subject: [PATCH 216/235] fix: add llvm to extra images (#3321)

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
---
 Dockerfile | 1 +
 1 file changed, 1 insertion(+)

diff --git a/Dockerfile b/Dockerfile
index 55a5e310..3c0fbc7a 100644
--- a/Dockerfile
+++ b/Dockerfile
@@ -81,6 +81,7 @@ RUN apt-get update && \
         espeak \
         python3-pip \
         python-is-python3 \
+        llvm \
         python3-dev \
         python3-venv && \
     apt-get clean && \

From 9a8a24993204972dc0b9503f7fd43bf2fc0a7342 Mon Sep 17 00:00:00 2001
From: Dave <dave@gray101.com>
Date: Tue, 20 Aug 2024 06:16:21 -0400
Subject: [PATCH 217/235] feat: devcontainer part 3 (#3318)

* stash initial fixes, attempt to open branch inside container

Signed-off-by: Dave Lee <dave@gray101.com>

* add yq, from inside DC

Signed-off-by: Dave Lee <dave@gray101.com>

* stash progress, rebuild container

Signed-off-by: Dave Lee <dave@gray101.com>

* snap

Signed-off-by: Dave Lee <dave@gray101.com>

* split builder into builder-sd, will speed up devcontainer build times and potentially help caching in other situations.

Signed-off-by: Dave Lee <dave@gray101.com>

* fix yq

Signed-off-by: Dave Lee <dave@gray101.com>

* fix paths

Signed-off-by: Dave Lee <dave@gray101.com>

* fix paths - new folder to bypass the .dockerignore which _should_ exclude the other files

Signed-off-by: Dave Lee <dave@gray101.com>

* fix

Signed-off-by: Dave Lee <dave@gray101.com>

* fix ]

Signed-off-by: Dave Lee <dave@gray101.com>

---------

Signed-off-by: Dave Lee <dave@gray101.com>
---
 .devcontainer-scripts/postcreate.sh           | 17 +++++++++++
 .devcontainer-scripts/poststart.sh            | 16 ++++++++++
 .devcontainer/customization/README.md         | 20 +++++++++++++
 .devcontainer/devcontainer.json               |  4 +--
 .devcontainer/docker-compose-devcontainer.yml |  2 ++
 .gitignore                                    |  3 ++
 Dockerfile                                    | 29 ++++++++++++++-----
 7 files changed, 82 insertions(+), 9 deletions(-)
 create mode 100644 .devcontainer-scripts/postcreate.sh
 create mode 100644 .devcontainer-scripts/poststart.sh
 create mode 100644 .devcontainer/customization/README.md

diff --git a/.devcontainer-scripts/postcreate.sh b/.devcontainer-scripts/postcreate.sh
new file mode 100644
index 00000000..3f903509
--- /dev/null
+++ b/.devcontainer-scripts/postcreate.sh
@@ -0,0 +1,17 @@
+#!/bin/bash
+
+cd /workspace
+
+# Get the files into the volume without a bind mount
+if [ ! -d ".git" ]; then
+    git clone https://github.com/mudler/LocalAI.git .
+else
+    git fetch
+fi
+
+echo "Standard Post-Create script completed."
+
+if [ -f "/devcontainer-customization/postcreate.sh" ]; then
+    echo "Launching customization postcreate.sh"
+    bash "/devcontainer-customization/postcreate.sh"
+fi
\ No newline at end of file
diff --git a/.devcontainer-scripts/poststart.sh b/.devcontainer-scripts/poststart.sh
new file mode 100644
index 00000000..196e821d
--- /dev/null
+++ b/.devcontainer-scripts/poststart.sh
@@ -0,0 +1,16 @@
+#!/bin/bash
+
+cd /workspace
+
+# Grab the pre-stashed backend assets to avoid build issues
+cp -r /build/backend-assets /workspace/backend-assets
+
+# Ensures generated source files are present upon load
+make prepare
+
+echo "Standard Post-Start script completed."
+
+if [ -f "/devcontainer-customization/poststart.sh" ]; then
+    echo "Launching customization poststart.sh"
+    bash "/devcontainer-customization/poststart.sh"
+fi
\ No newline at end of file
diff --git a/.devcontainer/customization/README.md b/.devcontainer/customization/README.md
new file mode 100644
index 00000000..3e2a3778
--- /dev/null
+++ b/.devcontainer/customization/README.md
@@ -0,0 +1,20 @@
+Place any additional resources your environment requires in this directory
+
+Script hooks are currently called for:
+`postcreate.sh` and `poststart.sh`
+
+If files with those names exist here, they will be called at the end of the normal script.
+
+This is a good place to set things like `git config --global user.name` are set - and to handle any other files that are mounted via this directory.
+
+An example of a useful script might be:
+
+```
+#!/bin/bash
+gcn=$(git config --global user.name)
+if [ -z "$gcn" ]; then
+    git config --global user.name YOUR.NAME
+    git config --global user.email YOUR.EMAIL
+    git remote add PREFIX FORK_URL
+fi
+```
\ No newline at end of file
diff --git a/.devcontainer/devcontainer.json b/.devcontainer/devcontainer.json
index 3ab37a2b..37c81ffc 100644
--- a/.devcontainer/devcontainer.json
+++ b/.devcontainer/devcontainer.json
@@ -19,6 +19,6 @@
         }
     },
     "forwardPorts": [8080, 3000],
-    "postCreateCommand": "git clone https://github.com/mudler/LocalAI.git .",
-    "postStartCommand": "make prepare && cp -r /build/backend-assets /workspace/backend-assets"
+    "postCreateCommand": "bash /.devcontainer-scripts/postcreate.sh",
+    "postStartCommand": "bash /.devcontainer-scripts/poststart.sh"
 }
\ No newline at end of file
diff --git a/.devcontainer/docker-compose-devcontainer.yml b/.devcontainer/docker-compose-devcontainer.yml
index f7de406e..8795d64d 100644
--- a/.devcontainer/docker-compose-devcontainer.yml
+++ b/.devcontainer/docker-compose-devcontainer.yml
@@ -14,6 +14,8 @@ services:
       - 8080:8080
     volumes:
       - localai_workspace:/workspace
+      - ../models:/host-models
+      - ./customization:/devcontainer-customization
     command: /bin/sh -c "while sleep 1000; do :; done"
     cap_add:
       - SYS_PTRACE
diff --git a/.gitignore b/.gitignore
index 096689c5..65eb9257 100644
--- a/.gitignore
+++ b/.gitignore
@@ -54,3 +54,6 @@ docs/static/gallery.html
 
 # backend virtual environments
 **/venv
+
+# per-developer customization files for the development container
+.devcontainer/customization/*
\ No newline at end of file
diff --git a/Dockerfile b/Dockerfile
index 3c0fbc7a..e38675c3 100644
--- a/Dockerfile
+++ b/Dockerfile
@@ -260,9 +260,9 @@ EOT
 ###################################
 ###################################
 
-# The builder target compiles LocalAI. This target is not the target that will be uploaded to the registry.
-# Adjustments to the build process should likely be made here.
-FROM builder-base AS builder
+# This first portion of builder holds the layers specifically used to build backend-assets/grpc/stablediffusion
+# In most cases, builder is the image you should be using - however, this can save build time if one just needs to copy backend-assets/grpc/stablediffusion and nothing else.
+FROM builder-base AS builder-sd
 
 COPY . .
 COPY .git .
@@ -273,6 +273,13 @@ RUN make prepare
 # stablediffusion does not tolerate a newer version of abseil, build it first
 RUN GRPC_BACKENDS=backend-assets/grpc/stablediffusion make build
 
+###################################
+###################################
+
+# The builder target compiles LocalAI. This target is not the target that will be uploaded to the registry.
+# Adjustments to the build process should likely be made here.
+FROM builder-sd AS builder
+
 # Install the pre-built GRPC
 COPY --from=grpc /opt/grpc /usr/local
 
@@ -299,9 +306,9 @@ ARG FFMPEG
 
 COPY --from=grpc /opt/grpc /usr/local
 
-# This is somewhat of a dirty hack as this dev machine has issues with stablediffusion... but it should also speed up devcontainers?
-# localai/localai:latest-aio-cpu
-COPY --from=builder /build/backend-assets/grpc/stablediffusion /build/backend-assets/grpc/stablediffusion
+COPY --from=builder-sd /build/backend-assets/grpc/stablediffusion /build/backend-assets/grpc/stablediffusion
+
+COPY .devcontainer-scripts /.devcontainer-scripts
 
 # Add FFmpeg
 RUN if [ "${FFMPEG}" = "true" ]; then \
@@ -312,8 +319,16 @@ RUN if [ "${FFMPEG}" = "true" ]; then \
         rm -rf /var/lib/apt/lists/* \
     ; fi
 
+RUN apt-get update && \
+    apt-get install -y --no-install-recommends \
+        ssh less && \
+    apt-get clean && \
+    rm -rf /var/lib/apt/lists/*
+
 RUN go install github.com/go-delve/delve/cmd/dlv@latest
 
+RUN go install github.com/mikefarah/yq/v4@latest
+
 ###################################
 ###################################
 
@@ -367,7 +382,7 @@ COPY --from=builder /build/local-ai ./
 COPY --from=builder /build/sources/go-piper/piper-phonemize/pi/lib/* /usr/lib/
 
 # do not let stablediffusion rebuild (requires an older version of absl)
-COPY --from=builder /build/backend-assets/grpc/stablediffusion ./backend-assets/grpc/stablediffusion
+COPY --from=builder-sd /build/backend-assets/grpc/stablediffusion ./backend-assets/grpc/stablediffusion
 
 # Change the shell to bash so we can use [[ tests below
 SHELL ["/bin/bash", "-c"]

From a495515e103d5bdb2b1ef221aa576136bbf28f1f Mon Sep 17 00:00:00 2001
From: "LocalAI [bot]" <139863280+localai-bot@users.noreply.github.com>
Date: Tue, 20 Aug 2024 12:16:39 +0200
Subject: [PATCH 218/235] chore: :arrow_up: Update ggerganov/llama.cpp to
 `cfac111e2b3953cdb6b0126e67a2487687646971` (#3315)

:arrow_up: Update ggerganov/llama.cpp

Signed-off-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com>
Co-authored-by: mudler <2420543+mudler@users.noreply.github.com>
---
 Makefile | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/Makefile b/Makefile
index 1697cc0c..108a38ab 100644
--- a/Makefile
+++ b/Makefile
@@ -8,7 +8,7 @@ DETECT_LIBS?=true
 # llama.cpp versions
 GOLLAMA_REPO?=https://github.com/go-skynet/go-llama.cpp
 GOLLAMA_VERSION?=2b57a8ae43e4699d3dc5d1496a1ccd42922993be
-CPPLLAMA_VERSION?=554b049068de24201d19dde2fa83e35389d4585d
+CPPLLAMA_VERSION?=cfac111e2b3953cdb6b0126e67a2487687646971
 
 # go-rwkv version
 RWKV_REPO?=https://github.com/donomii/go-rwkv.cpp

From 5afea9babf60c631c61bfadb7be09772b1152aed Mon Sep 17 00:00:00 2001
From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com>
Date: Tue, 20 Aug 2024 11:49:54 +0000
Subject: [PATCH 219/235] chore(deps): Bump openai from 1.40.4 to 1.41.1 in
 /examples/functions (#3319)

Bumps [openai](https://github.com/openai/openai-python) from 1.40.4 to 1.41.1.
- [Release notes](https://github.com/openai/openai-python/releases)
- [Changelog](https://github.com/openai/openai-python/blob/main/CHANGELOG.md)
- [Commits](https://github.com/openai/openai-python/compare/v1.40.4...v1.41.1)

---
updated-dependencies:
- dependency-name: openai
  dependency-type: direct:production
  update-type: version-update:semver-minor
...

Signed-off-by: dependabot[bot] <support@github.com>
Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>
---
 examples/functions/requirements.txt | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/examples/functions/requirements.txt b/examples/functions/requirements.txt
index 3f98581e..78ffdab7 100644
--- a/examples/functions/requirements.txt
+++ b/examples/functions/requirements.txt
@@ -1,2 +1,2 @@
 langchain==0.2.14
-openai==1.40.4
+openai==1.41.1

From bcd3c1deb294af86739804fce22ecf6f8113fdbe Mon Sep 17 00:00:00 2001
From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com>
Date: Tue, 20 Aug 2024 12:32:36 +0000
Subject: [PATCH 220/235] chore(deps): Bump openai from 1.40.6 to 1.41.1 in
 /examples/langchain/langchainpy-localai-example (#3320)

chore(deps): Bump openai

Bumps [openai](https://github.com/openai/openai-python) from 1.40.6 to 1.41.1.
- [Release notes](https://github.com/openai/openai-python/releases)
- [Changelog](https://github.com/openai/openai-python/blob/main/CHANGELOG.md)
- [Commits](https://github.com/openai/openai-python/compare/v1.40.6...v1.41.1)

---
updated-dependencies:
- dependency-name: openai
  dependency-type: direct:production
  update-type: version-update:semver-minor
...

Signed-off-by: dependabot[bot] <support@github.com>
Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>
---
 examples/langchain/langchainpy-localai-example/requirements.txt | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/examples/langchain/langchainpy-localai-example/requirements.txt b/examples/langchain/langchainpy-localai-example/requirements.txt
index 8b28ad32..f8a6b381 100644
--- a/examples/langchain/langchainpy-localai-example/requirements.txt
+++ b/examples/langchain/langchainpy-localai-example/requirements.txt
@@ -18,7 +18,7 @@ multidict==6.0.5
 mypy-extensions==1.0.0
 numexpr==2.10.1
 numpy==2.1.0
-openai==1.40.6
+openai==1.41.1
 openapi-schema-pydantic==1.2.4
 packaging>=23.2
 pydantic==2.8.2

From 6d350ccce0f3472a82c617fdaf69b5c2b5139c88 Mon Sep 17 00:00:00 2001
From: Ettore Di Giacinto <mudler@users.noreply.github.com>
Date: Tue, 20 Aug 2024 14:45:36 +0200
Subject: [PATCH 221/235] feat(federation): do not allocate local services for
 load balancing (#3337)

* refactor: extract proxy into functions

* feat(federation): do not allocate services, directly connect with libp2p

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>

---------

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
---
 core/p2p/federated.go        |   8 +--
 core/p2p/federated_server.go |  63 +++++-------------
 core/p2p/node.go             |  14 ++++
 core/p2p/p2p.go              | 120 ++++++++++++++++++-----------------
 4 files changed, 96 insertions(+), 109 deletions(-)

diff --git a/core/p2p/federated.go b/core/p2p/federated.go
index 454ddc1b..6475715e 100644
--- a/core/p2p/federated.go
+++ b/core/p2p/federated.go
@@ -40,9 +40,9 @@ func (fs *FederatedServer) RandomServer() string {
 	var tunnelAddresses []string
 	for _, v := range GetAvailableNodes(fs.service) {
 		if v.IsOnline() {
-			tunnelAddresses = append(tunnelAddresses, v.TunnelAddress)
+			tunnelAddresses = append(tunnelAddresses, v.ID)
 		} else {
-			delete(fs.requestTable, v.TunnelAddress) // make sure it's not tracked
+			delete(fs.requestTable, v.ID) // make sure it's not tracked
 			log.Info().Msgf("Node %s is offline", v.ID)
 		}
 	}
@@ -61,8 +61,8 @@ func (fs *FederatedServer) syncTableStatus() {
 
 	for _, v := range GetAvailableNodes(fs.service) {
 		if v.IsOnline() {
-			fs.ensureRecordExist(v.TunnelAddress)
-			currentTunnels[v.TunnelAddress] = struct{}{}
+			fs.ensureRecordExist(v.ID)
+			currentTunnels[v.ID] = struct{}{}
 		}
 	}
 
diff --git a/core/p2p/federated_server.go b/core/p2p/federated_server.go
index c356ae96..87df633f 100644
--- a/core/p2p/federated_server.go
+++ b/core/p2p/federated_server.go
@@ -8,16 +8,12 @@ import (
 	"errors"
 	"fmt"
 	"net"
-	"time"
 
 	"github.com/mudler/edgevpn/pkg/node"
-	"github.com/mudler/edgevpn/pkg/protocol"
-	"github.com/mudler/edgevpn/pkg/types"
 	"github.com/rs/zerolog/log"
 )
 
 func (f *FederatedServer) Start(ctx context.Context) error {
-
 	n, err := NewNode(f.p2ptoken)
 	if err != nil {
 		return fmt.Errorf("creating a new node: %w", err)
@@ -29,7 +25,7 @@ func (f *FederatedServer) Start(ctx context.Context) error {
 
 	if err := ServiceDiscoverer(ctx, n, f.p2ptoken, f.service, func(servicesID string, tunnel NodeData) {
 		log.Debug().Msgf("Discovered node: %s", tunnel.ID)
-	}, true); err != nil {
+	}, false); err != nil {
 		return err
 	}
 
@@ -50,21 +46,8 @@ func (fs *FederatedServer) proxy(ctx context.Context, node *node.Node) error {
 		<-ctx.Done()
 		l.Close()
 	}()
-	ledger, _ := node.Ledger()
 
-	// Announce ourselves so nodes accepts our connection
-	ledger.Announce(
-		ctx,
-		10*time.Second,
-		func() {
-			updatedMap := map[string]interface{}{}
-			updatedMap[node.Host().ID().String()] = &types.User{
-				PeerID:    node.Host().ID().String(),
-				Timestamp: time.Now().String(),
-			}
-			ledger.Add(protocol.UsersLedgerKey, updatedMap)
-		},
-	)
+	nodeAnnounce(ctx, node)
 
 	defer l.Close()
 	for {
@@ -82,52 +65,36 @@ func (fs *FederatedServer) proxy(ctx context.Context, node *node.Node) error {
 
 			// Handle connections in a new goroutine, forwarding to the p2p service
 			go func() {
-				tunnelAddr := ""
-
+				workerID := ""
 				if fs.workerTarget != "" {
-					for _, v := range GetAvailableNodes(fs.service) {
-						if v.ID == fs.workerTarget {
-							tunnelAddr = v.TunnelAddress
-							break
-						}
-					}
+					workerID = fs.workerTarget
 				} else if fs.loadBalanced {
 					log.Debug().Msgf("Load balancing request")
 
-					tunnelAddr = fs.SelectLeastUsedServer()
-					if tunnelAddr == "" {
+					workerID = fs.SelectLeastUsedServer()
+					if workerID == "" {
 						log.Debug().Msgf("Least used server not found, selecting random")
-						tunnelAddr = fs.RandomServer()
+						workerID = fs.RandomServer()
 					}
-
 				} else {
-					tunnelAddr = fs.RandomServer()
+					workerID = fs.RandomServer()
 				}
 
-				if tunnelAddr == "" {
+				if workerID == "" {
 					log.Error().Msg("No available nodes yet")
 					return
 				}
 
-				log.Debug().Msgf("Selected tunnel %s", tunnelAddr)
-
-				tunnelConn, err := net.Dial("tcp", tunnelAddr)
-				if err != nil {
-					log.Error().Err(err).Msg("Error connecting to tunnel")
+				log.Debug().Msgf("Selected node %s", workerID)
+				nodeData, exists := GetNode(fs.service, workerID)
+				if !exists {
+					log.Error().Msgf("Node %s not found", workerID)
 					return
 				}
 
-				log.Info().Msgf("Redirecting %s to %s", conn.LocalAddr().String(), tunnelConn.RemoteAddr().String())
-				closer := make(chan struct{}, 2)
-				go copyStream(closer, tunnelConn, conn)
-				go copyStream(closer, conn, tunnelConn)
-				<-closer
-
-				tunnelConn.Close()
-				conn.Close()
-
+				proxyP2PConnection(ctx, node, nodeData.ServiceID, conn)
 				if fs.loadBalanced {
-					fs.RecordRequest(tunnelAddr)
+					fs.RecordRequest(workerID)
 				}
 			}()
 		}
diff --git a/core/p2p/node.go b/core/p2p/node.go
index b89bb7c6..6c43dde0 100644
--- a/core/p2p/node.go
+++ b/core/p2p/node.go
@@ -14,6 +14,7 @@ type NodeData struct {
 	Name          string
 	ID            string
 	TunnelAddress string
+	ServiceID     string
 	LastSeen      time.Time
 }
 
@@ -39,6 +40,19 @@ func GetAvailableNodes(serviceID string) []NodeData {
 	return availableNodes
 }
 
+func GetNode(serviceID, nodeID string) (NodeData, bool) {
+	if serviceID == "" {
+		serviceID = defaultServicesID
+	}
+	mu.Lock()
+	defer mu.Unlock()
+	if _, ok := nodes[serviceID]; !ok {
+		return NodeData{}, false
+	}
+	nd, exists := nodes[serviceID][nodeID]
+	return nd, exists
+}
+
 func AddNode(serviceID string, node NodeData) {
 	if serviceID == "" {
 		serviceID = defaultServicesID
diff --git a/core/p2p/p2p.go b/core/p2p/p2p.go
index a5e7715d..1bc46e7d 100644
--- a/core/p2p/p2p.go
+++ b/core/p2p/p2p.go
@@ -66,22 +66,7 @@ func nodeID(s string) string {
 	return fmt.Sprintf("%s-%s", hostname, s)
 }
 
-func allocateLocalService(ctx context.Context, node *node.Node, listenAddr, service string) error {
-
-	zlog.Info().Msgf("Allocating service '%s' on: %s", service, listenAddr)
-	// Open local port for listening
-	l, err := net.Listen("tcp", listenAddr)
-	if err != nil {
-		zlog.Error().Err(err).Msg("Error listening")
-		return err
-	}
-	go func() {
-		<-ctx.Done()
-		l.Close()
-	}()
-
-	//	ll.Info("Binding local port on", srcaddr)
-
+func nodeAnnounce(ctx context.Context, node *node.Node) {
 	ledger, _ := node.Ledger()
 
 	// Announce ourselves so nodes accepts our connection
@@ -97,6 +82,66 @@ func allocateLocalService(ctx context.Context, node *node.Node, listenAddr, serv
 			ledger.Add(protocol.UsersLedgerKey, updatedMap)
 		},
 	)
+}
+
+func proxyP2PConnection(ctx context.Context, node *node.Node, serviceID string, conn net.Conn) {
+	ledger, _ := node.Ledger()
+	// Retrieve current ID for ip in the blockchain
+	existingValue, found := ledger.GetKey(protocol.ServicesLedgerKey, serviceID)
+	service := &types.Service{}
+	existingValue.Unmarshal(service)
+	// If mismatch, update the blockchain
+	if !found {
+		zlog.Error().Msg("Service not found on blockchain")
+		conn.Close()
+		//	ll.Debugf("service '%s' not found on blockchain", serviceID)
+		return
+	}
+
+	// Decode the Peer
+	d, err := peer.Decode(service.PeerID)
+	if err != nil {
+		zlog.Error().Msg("cannot decode peer")
+
+		conn.Close()
+		//	ll.Debugf("could not decode peer '%s'", service.PeerID)
+		return
+	}
+
+	// Open a stream
+	stream, err := node.Host().NewStream(ctx, d, protocol.ServiceProtocol.ID())
+	if err != nil {
+		zlog.Error().Err(err).Msg("cannot open stream peer")
+
+		conn.Close()
+		//	ll.Debugf("could not open stream '%s'", err.Error())
+		return
+	}
+	//	ll.Debugf("(service %s) Redirecting", serviceID, l.Addr().String())
+	zlog.Info().Msgf("Redirecting %s to %s", conn.LocalAddr().String(), stream.Conn().RemoteMultiaddr().String())
+	closer := make(chan struct{}, 2)
+	go copyStream(closer, stream, conn)
+	go copyStream(closer, conn, stream)
+	<-closer
+
+	stream.Close()
+	conn.Close()
+}
+
+func allocateLocalService(ctx context.Context, node *node.Node, listenAddr, service string) error {
+	zlog.Info().Msgf("Allocating service '%s' on: %s", service, listenAddr)
+	// Open local port for listening
+	l, err := net.Listen("tcp", listenAddr)
+	if err != nil {
+		zlog.Error().Err(err).Msg("Error listening")
+		return err
+	}
+	go func() {
+		<-ctx.Done()
+		l.Close()
+	}()
+
+	nodeAnnounce(ctx, node)
 
 	defer l.Close()
 	for {
@@ -114,47 +159,7 @@ func allocateLocalService(ctx context.Context, node *node.Node, listenAddr, serv
 
 			// Handle connections in a new goroutine, forwarding to the p2p service
 			go func() {
-				// Retrieve current ID for ip in the blockchain
-				existingValue, found := ledger.GetKey(protocol.ServicesLedgerKey, service)
-				service := &types.Service{}
-				existingValue.Unmarshal(service)
-				// If mismatch, update the blockchain
-				if !found {
-					zlog.Error().Msg("Service not found on blockchain")
-					conn.Close()
-					//	ll.Debugf("service '%s' not found on blockchain", serviceID)
-					return
-				}
-
-				// Decode the Peer
-				d, err := peer.Decode(service.PeerID)
-				if err != nil {
-					zlog.Error().Msg("cannot decode peer")
-
-					conn.Close()
-					//	ll.Debugf("could not decode peer '%s'", service.PeerID)
-					return
-				}
-
-				// Open a stream
-				stream, err := node.Host().NewStream(ctx, d, protocol.ServiceProtocol.ID())
-				if err != nil {
-					zlog.Error().Msg("cannot open stream peer")
-
-					conn.Close()
-					//	ll.Debugf("could not open stream '%s'", err.Error())
-					return
-				}
-				//	ll.Debugf("(service %s) Redirecting", serviceID, l.Addr().String())
-				zlog.Info().Msgf("Redirecting %s to %s", conn.LocalAddr().String(), stream.Conn().RemoteMultiaddr().String())
-				closer := make(chan struct{}, 2)
-				go copyStream(closer, stream, conn)
-				go copyStream(closer, conn, stream)
-				<-closer
-
-				stream.Close()
-				conn.Close()
-				//	ll.Infof("(service %s) Done handling %s", serviceID, l.Addr().String())
+				proxyP2PConnection(ctx, node, service, conn)
 			}()
 		}
 	}
@@ -258,6 +263,7 @@ var muservice sync.Mutex
 func ensureService(ctx context.Context, n *node.Node, nd *NodeData, sserv string, allocate bool) {
 	muservice.Lock()
 	defer muservice.Unlock()
+	nd.ServiceID = sserv
 	if ndService, found := service[nd.Name]; !found {
 		if !nd.IsOnline() {
 			// if node is offline and not present, do nothing

From b06046fe4c3dfd3c29a5af5aa8e5f6f63736e8d3 Mon Sep 17 00:00:00 2001
From: Ettore Di Giacinto <mudler@users.noreply.github.com>
Date: Tue, 20 Aug 2024 18:06:55 +0200
Subject: [PATCH 222/235] chore: install llvm 10

Signed-off-by: Ettore Di Giacinto <mudler@users.noreply.github.com>
---
 Dockerfile | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/Dockerfile b/Dockerfile
index e38675c3..e5cbbc83 100644
--- a/Dockerfile
+++ b/Dockerfile
@@ -81,13 +81,12 @@ RUN apt-get update && \
         espeak \
         python3-pip \
         python-is-python3 \
-        llvm \
         python3-dev \
         python3-venv && \
     apt-get clean && \
     rm -rf /var/lib/apt/lists/* && \
     pip install --upgrade pip
-
+RUN curl -L https://apt.llvm.org/llvm.sh -o llvm.sh && chmod +x llvm.sh && ./llvm.sh 10
 # Install grpcio-tools (the version in 22.04 is too old)
 RUN pip install --user grpcio-tools
 

From c12d121783a0ab412e8427b804ef6b83de886886 Mon Sep 17 00:00:00 2001
From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com>
Date: Tue, 20 Aug 2024 16:12:02 +0000
Subject: [PATCH 223/235] chore(deps): Bump llama-index from 0.10.65 to
 0.10.67.post1 in /examples/langchain-chroma (#3335)

chore(deps): Bump llama-index in /examples/langchain-chroma

Bumps [llama-index](https://github.com/run-llama/llama_index) from 0.10.65 to 0.10.67.post1.
- [Release notes](https://github.com/run-llama/llama_index/releases)
- [Changelog](https://github.com/run-llama/llama_index/blob/main/CHANGELOG.md)
- [Commits](https://github.com/run-llama/llama_index/compare/v0.10.65...v0.10.67.post1)

---
updated-dependencies:
- dependency-name: llama-index
  dependency-type: direct:production
  update-type: version-update:semver-patch
...

Signed-off-by: dependabot[bot] <support@github.com>
Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>
---
 examples/langchain-chroma/requirements.txt | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/examples/langchain-chroma/requirements.txt b/examples/langchain-chroma/requirements.txt
index 69413810..171ab009 100644
--- a/examples/langchain-chroma/requirements.txt
+++ b/examples/langchain-chroma/requirements.txt
@@ -1,4 +1,4 @@
 langchain==0.2.14
 openai==1.40.5
 chromadb==0.5.5
-llama-index==0.10.65
\ No newline at end of file
+llama-index==0.10.67.post1
\ No newline at end of file

From d02a0f6f01d5c4a926a2d67190cb55d7aca23b66 Mon Sep 17 00:00:00 2001
From: Ettore Di Giacinto <mudler@users.noreply.github.com>
Date: Tue, 20 Aug 2024 18:27:10 +0200
Subject: [PATCH 224/235] ci: add llvm dependencies

Signed-off-by: Ettore Di Giacinto <mudler@users.noreply.github.com>
---
 Dockerfile | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/Dockerfile b/Dockerfile
index e5cbbc83..f69ec067 100644
--- a/Dockerfile
+++ b/Dockerfile
@@ -81,7 +81,7 @@ RUN apt-get update && \
         espeak \
         python3-pip \
         python-is-python3 \
-        python3-dev \
+        python3-dev lsb_release wget add-apt-repository gpg \
         python3-venv && \
     apt-get clean && \
     rm -rf /var/lib/apt/lists/* && \

From a28b3771a7d7f9b3d0273b66632ee2ab84fcd87e Mon Sep 17 00:00:00 2001
From: Ettore Di Giacinto <mudler@users.noreply.github.com>
Date: Tue, 20 Aug 2024 19:17:35 +0200
Subject: [PATCH 225/235] chore(deps): update edgevpn (#3340)

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
---
 go.mod | 43 ++++++++++++++++++++++---------------------
 go.sum | 45 +++++++++++++++++++++++++++++++++++++++++++++
 2 files changed, 67 insertions(+), 21 deletions(-)

diff --git a/go.mod b/go.mod
index dcece45c..60d16577 100644
--- a/go.mod
+++ b/go.mod
@@ -29,10 +29,10 @@ require (
 	github.com/jaypipes/ghw v0.12.0
 	github.com/joho/godotenv v1.5.1
 	github.com/klauspost/cpuid/v2 v2.2.8
-	github.com/libp2p/go-libp2p v0.35.4
+	github.com/libp2p/go-libp2p v0.36.2
 	github.com/mholt/archiver/v3 v3.5.1
 	github.com/microcosm-cc/bluemonday v1.0.26
-	github.com/mudler/edgevpn v0.27.0
+	github.com/mudler/edgevpn v0.27.1
 	github.com/mudler/go-processmanager v0.0.0-20230818213616-f204007f963c
 	github.com/mudler/go-stable-diffusion v0.0.0-20240429204715-4a3cd6aeae6f
 	github.com/nomic-ai/gpt4all/gpt4all-bindings/golang v0.0.0-20240606155928-41c9013fa46a
@@ -41,7 +41,7 @@ require (
 	github.com/ory/dockertest/v3 v3.10.0
 	github.com/otiai10/openaigo v1.7.0
 	github.com/phayes/freeport v0.0.0-20220201140144-74d24b5ae9f5
-	github.com/prometheus/client_golang v1.19.1
+	github.com/prometheus/client_golang v1.20.0
 	github.com/rs/zerolog v1.33.0
 	github.com/russross/blackfriday v1.6.0
 	github.com/sashabaranov/go-openai v1.26.2
@@ -72,25 +72,26 @@ require (
 	github.com/labstack/gommon v0.4.2 // indirect
 	github.com/moby/docker-image-spec v1.3.1 // indirect
 	github.com/munnerz/goautoneg v0.0.0-20191010083416-a7dc8b61c822 // indirect
-	github.com/pion/datachannel v1.5.6 // indirect
-	github.com/pion/dtls/v2 v2.2.11 // indirect
-	github.com/pion/ice/v2 v2.3.25 // indirect
-	github.com/pion/interceptor v0.1.29 // indirect
+	github.com/pion/datachannel v1.5.8 // indirect
+	github.com/pion/dtls/v2 v2.2.12 // indirect
+	github.com/pion/ice/v2 v2.3.34 // indirect
+	github.com/pion/interceptor v0.1.30 // indirect
 	github.com/pion/logging v0.2.2 // indirect
 	github.com/pion/mdns v0.0.12 // indirect
 	github.com/pion/randutil v0.1.0 // indirect
 	github.com/pion/rtcp v1.2.14 // indirect
-	github.com/pion/rtp v1.8.6 // indirect
-	github.com/pion/sctp v1.8.16 // indirect
+	github.com/pion/rtp v1.8.9 // indirect
+	github.com/pion/sctp v1.8.33 // indirect
 	github.com/pion/sdp/v3 v3.0.9 // indirect
-	github.com/pion/srtp/v2 v2.0.18 // indirect
+	github.com/pion/srtp/v2 v2.0.20 // indirect
 	github.com/pion/stun v0.6.1 // indirect
-	github.com/pion/transport/v2 v2.2.5 // indirect
+	github.com/pion/transport/v2 v2.2.10 // indirect
 	github.com/pion/turn/v2 v2.1.6 // indirect
-	github.com/pion/webrtc/v3 v3.2.40 // indirect
+	github.com/pion/webrtc/v3 v3.3.0 // indirect
 	github.com/russross/blackfriday/v2 v2.1.0 // indirect
-	github.com/urfave/cli/v2 v2.27.3 // indirect
+	github.com/urfave/cli/v2 v2.27.4 // indirect
 	github.com/valyala/fasttemplate v1.2.2 // indirect
+	github.com/wlynxg/anet v0.0.4 // indirect
 	github.com/xrash/smetrics v0.0.0-20240521201337-686a1a2994c1 // indirect
 	go.uber.org/mock v0.4.0 // indirect
 )
@@ -131,7 +132,7 @@ require (
 	github.com/docker/go-connections v0.5.0 // indirect
 	github.com/docker/go-units v0.5.0 // indirect
 	github.com/dsnet/compress v0.0.2-0.20210315054119-f66993602bf5 // indirect
-	github.com/elastic/gosigar v0.14.2 // indirect
+	github.com/elastic/gosigar v0.14.3 // indirect
 	github.com/flynn/noise v1.1.0 // indirect
 	github.com/francoispqt/gojay v1.2.13 // indirect
 	github.com/ghodss/yaml v1.0.0 // indirect
@@ -185,7 +186,7 @@ require (
 	github.com/libp2p/go-libp2p-asn-util v0.4.1 // indirect
 	github.com/libp2p/go-libp2p-kad-dht v0.25.2 // indirect
 	github.com/libp2p/go-libp2p-kbucket v0.6.3 // indirect
-	github.com/libp2p/go-libp2p-pubsub v0.11.0 // indirect
+	github.com/libp2p/go-libp2p-pubsub v0.12.0 // indirect
 	github.com/libp2p/go-libp2p-record v0.2.0 // indirect
 	github.com/libp2p/go-libp2p-routing-helpers v0.7.2 // indirect
 	github.com/libp2p/go-msgio v0.3.0 // indirect
@@ -201,7 +202,7 @@ require (
 	github.com/mattn/go-colorable v0.1.13 // indirect
 	github.com/mattn/go-isatty v0.0.20 // indirect
 	github.com/mattn/go-runewidth v0.0.15 // indirect
-	github.com/miekg/dns v1.1.61 // indirect
+	github.com/miekg/dns v1.1.62 // indirect
 	github.com/mikioh/tcpinfo v0.0.0-20190314235526-30a79bb1804b // indirect
 	github.com/mikioh/tcpopt v0.0.0-20190314235656-172688c1accc // indirect
 	github.com/minio/sha256-simd v1.0.1 // indirect
@@ -246,7 +247,7 @@ require (
 	github.com/prometheus/common v0.55.0 // indirect
 	github.com/prometheus/procfs v0.15.1 // indirect
 	github.com/quic-go/qpack v0.4.0 // indirect
-	github.com/quic-go/quic-go v0.44.0 // indirect
+	github.com/quic-go/quic-go v0.46.0 // indirect
 	github.com/quic-go/webtransport-go v0.8.0 // indirect
 	github.com/raulk/go-watchdog v1.3.0 // indirect
 	github.com/rivo/uniseg v0.4.7 // indirect
@@ -278,12 +279,12 @@ require (
 	go.opencensus.io v0.24.0 // indirect
 	go.opentelemetry.io/otel/sdk v1.28.0 // indirect
 	go.opentelemetry.io/otel/trace v1.28.0 // indirect
-	go.uber.org/dig v1.17.1 // indirect
-	go.uber.org/fx v1.22.1 // indirect
+	go.uber.org/dig v1.18.0 // indirect
+	go.uber.org/fx v1.22.2 // indirect
 	go.uber.org/multierr v1.11.0 // indirect
 	go.uber.org/zap v1.27.0 // indirect
 	golang.org/x/crypto v0.26.0 // indirect
-	golang.org/x/exp v0.0.0-20240719175910-8a7402abbf56 // indirect
+	golang.org/x/exp v0.0.0-20240808152545-0cdaa3abc0fa // indirect
 	golang.org/x/mod v0.20.0 // indirect
 	golang.org/x/net v0.28.0 // indirect
 	golang.org/x/sync v0.8.0 // indirect
@@ -299,5 +300,5 @@ require (
 	gopkg.in/fsnotify.v1 v1.4.7 // indirect
 	gopkg.in/tomb.v1 v1.0.0-20141024135613-dd632973f1e7 // indirect
 	howett.net/plist v1.0.0 // indirect
-	lukechampine.com/blake3 v1.2.1 // indirect
+	lukechampine.com/blake3 v1.3.0 // indirect
 )
diff --git a/go.sum b/go.sum
index db47c36b..7b72d401 100644
--- a/go.sum
+++ b/go.sum
@@ -131,6 +131,8 @@ github.com/dustin/go-humanize v1.0.0/go.mod h1:HtrtbFcZ19U5GC7JDqmcUSB87Iq5E25Kn
 github.com/elastic/gosigar v0.12.0/go.mod h1:iXRIGg2tLnu7LBdpqzyQfGDEidKCfWcCMS0WKyPWoMs=
 github.com/elastic/gosigar v0.14.2 h1:Dg80n8cr90OZ7x+bAax/QjoW/XqTI11RmA79ZwIm9/4=
 github.com/elastic/gosigar v0.14.2/go.mod h1:iXRIGg2tLnu7LBdpqzyQfGDEidKCfWcCMS0WKyPWoMs=
+github.com/elastic/gosigar v0.14.3 h1:xwkKwPia+hSfg9GqrCUKYdId102m9qTJIIr7egmK/uo=
+github.com/elastic/gosigar v0.14.3/go.mod h1:iXRIGg2tLnu7LBdpqzyQfGDEidKCfWcCMS0WKyPWoMs=
 github.com/elliotchance/orderedmap/v2 v2.2.0 h1:7/2iwO98kYT4XkOjA9mBEIwvi4KpGB4cyHeOFOnj4Vk=
 github.com/elliotchance/orderedmap/v2 v2.2.0/go.mod h1:85lZyVbpGaGvHvnKa7Qhx7zncAdBIBq6u56Hb1PRU5Q=
 github.com/envoyproxy/go-control-plane v0.9.0/go.mod h1:YTl/9mNaCwkRvm6d1a2C3ymFceY/DCBVvsKhRF0iEA4=
@@ -379,6 +381,8 @@ github.com/libp2p/go-libp2p v0.35.2 h1:287oHbuplkrLdAF+syB0n/qDgd50AUBtEODqS0e0H
 github.com/libp2p/go-libp2p v0.35.2/go.mod h1:RKCDNt30IkFipGL0tl8wQW/3zVWEGFUZo8g2gAKxwjU=
 github.com/libp2p/go-libp2p v0.35.4 h1:FDiBUYLkueFwsuNJUZaxKRdpKvBOWU64qQPL768bSeg=
 github.com/libp2p/go-libp2p v0.35.4/go.mod h1:RKCDNt30IkFipGL0tl8wQW/3zVWEGFUZo8g2gAKxwjU=
+github.com/libp2p/go-libp2p v0.36.2 h1:BbqRkDaGC3/5xfaJakLV/BrpjlAuYqSB0lRvtzL3B/U=
+github.com/libp2p/go-libp2p v0.36.2/go.mod h1:XO3joasRE4Eup8yCTTP/+kX+g92mOgRaadk46LmPhHY=
 github.com/libp2p/go-libp2p-asn-util v0.4.1 h1:xqL7++IKD9TBFMgnLPZR6/6iYhawHKHl950SO9L6n94=
 github.com/libp2p/go-libp2p-asn-util v0.4.1/go.mod h1:d/NI6XZ9qxw67b4e+NgpQexCIiFYJjErASrYW4PFDN8=
 github.com/libp2p/go-libp2p-kad-dht v0.25.2 h1:FOIk9gHoe4YRWXTu8SY9Z1d0RILol0TrtApsMDPjAVQ=
@@ -387,6 +391,8 @@ github.com/libp2p/go-libp2p-kbucket v0.6.3 h1:p507271wWzpy2f1XxPzCQG9NiN6R6lHL9G
 github.com/libp2p/go-libp2p-kbucket v0.6.3/go.mod h1:RCseT7AH6eJWxxk2ol03xtP9pEHetYSPXOaJnOiD8i0=
 github.com/libp2p/go-libp2p-pubsub v0.11.0 h1:+JvS8Kty0OiyUiN0i8H5JbaCgjnJTRnTHe4rU88dLFc=
 github.com/libp2p/go-libp2p-pubsub v0.11.0/go.mod h1:QEb+hEV9WL9wCiUAnpY29FZR6W3zK8qYlaml8R4q6gQ=
+github.com/libp2p/go-libp2p-pubsub v0.12.0 h1:PENNZjSfk8KYxANRlpipdS7+BfLmOl3L2E/6vSNjbdI=
+github.com/libp2p/go-libp2p-pubsub v0.12.0/go.mod h1:Oi0zw9aw8/Y5GC99zt+Ef2gYAl+0nZlwdJonDyOz/sE=
 github.com/libp2p/go-libp2p-record v0.2.0 h1:oiNUOCWno2BFuxt3my4i1frNrt7PerzB3queqa1NkQ0=
 github.com/libp2p/go-libp2p-record v0.2.0/go.mod h1:I+3zMkvvg5m2OcSdoL0KPljyJyvNDFGKX7QdlpYUcwk=
 github.com/libp2p/go-libp2p-routing-helpers v0.7.2 h1:xJMFyhQ3Iuqnk9Q2dYE1eUTzsah7NLw3Qs2zjUV78T0=
@@ -436,6 +442,8 @@ github.com/miekg/dns v1.1.41/go.mod h1:p6aan82bvRIyn+zDIv9xYNUpwa73JcSh9BKwknJys
 github.com/miekg/dns v1.1.43/go.mod h1:+evo5L0630/F6ca/Z9+GAqzhjGyn8/c+TBaOyfEl0V4=
 github.com/miekg/dns v1.1.61 h1:nLxbwF3XxhwVSm8g9Dghm9MHPaUZuqhPiGL+675ZmEs=
 github.com/miekg/dns v1.1.61/go.mod h1:mnAarhS3nWaW+NVP2wTkYVIZyHNJ098SJZUki3eykwQ=
+github.com/miekg/dns v1.1.62 h1:cN8OuEF1/x5Rq6Np+h1epln8OiyPWV+lROx9LxcGgIQ=
+github.com/miekg/dns v1.1.62/go.mod h1:mvDlcItzm+br7MToIKqkglaGhlFMHJ9DTNNWONWXbNQ=
 github.com/mikioh/tcp v0.0.0-20190314235350-803a9b46060c h1:bzE/A84HN25pxAuk9Eej1Kz9OUelF97nAc82bDquQI8=
 github.com/mikioh/tcp v0.0.0-20190314235350-803a9b46060c/go.mod h1:0SQS9kMwD2VsyFEB++InYyBJroV/FRmBgcydeSUcJms=
 github.com/mikioh/tcpinfo v0.0.0-20190314235526-30a79bb1804b h1:z78hV3sbSMAUoyUMM0I83AUIT6Hu17AWfgjzIbtrYFc=
@@ -473,6 +481,8 @@ github.com/mudler/edgevpn v0.26.2 h1:OK4jfk7sYjuU7vCh+geUJk38lsxRgMk+EdsS9s0hioE
 github.com/mudler/edgevpn v0.26.2/go.mod h1:lplntB9N6LzGNqeSM3XHCq8kyDPsNhY3jqEbWGD2WaQ=
 github.com/mudler/edgevpn v0.27.0 h1:FnBVzPs098DTgbUkiwm22n30hmEVBAq+PVpXanqx6qo=
 github.com/mudler/edgevpn v0.27.0/go.mod h1:Hwvr+i+dePgn/Yh+EMMvqcw9ByUCLAWD9TgYtJYV95Y=
+github.com/mudler/edgevpn v0.27.1 h1:UKW7/JW4l2cBAPMRnlZRHbuFDGrv7resVJlFD34WBDE=
+github.com/mudler/edgevpn v0.27.1/go.mod h1:PK7rl0QQQTdlpie9rlaS7DguH500ogqproQli/QwrxU=
 github.com/mudler/go-piper v0.0.0-20240315144837-9d0100873a7d h1:8udOFrDf/I83JL0/u22j6U6Q9z9LoSdby2a/DWdd0/s=
 github.com/mudler/go-piper v0.0.0-20240315144837-9d0100873a7d/go.mod h1:O7SwdSWMilAWhBZMK9N9Y/oBDyMMzshE3ju8Xkexwig=
 github.com/mudler/go-processmanager v0.0.0-20230818213616-f204007f963c h1:CI5uGwqBpN8N7BrSKC+nmdfw+9nPQIDyjHHlaIiitZI=
@@ -569,13 +579,21 @@ github.com/pierrec/lz4/v4 v4.1.2 h1:qvY3YFXRQE/XB8MlLzJH7mSzBs74eA2gg52YTk6jUPM=
 github.com/pierrec/lz4/v4 v4.1.2/go.mod h1:gZWDp/Ze/IJXGXf23ltt2EXimqmTUXEy0GFuRQyBid4=
 github.com/pion/datachannel v1.5.6 h1:1IxKJntfSlYkpUj8LlYRSWpYiTTC02nUrOE8T3DqGeg=
 github.com/pion/datachannel v1.5.6/go.mod h1:1eKT6Q85pRnr2mHiWHxJwO50SfZRtWHTsNIVb/NfGW4=
+github.com/pion/datachannel v1.5.8 h1:ph1P1NsGkazkjrvyMfhRBUAWMxugJjq2HfQifaOoSNo=
+github.com/pion/datachannel v1.5.8/go.mod h1:PgmdpoaNBLX9HNzNClmdki4DYW5JtI7Yibu8QzbL3tI=
 github.com/pion/dtls/v2 v2.2.7/go.mod h1:8WiMkebSHFD0T+dIU+UeBaoV7kDhOW5oDCzZ7WZ/F9s=
 github.com/pion/dtls/v2 v2.2.11 h1:9U/dpCYl1ySttROPWJgqWKEylUdT0fXp/xst6JwY5Ks=
 github.com/pion/dtls/v2 v2.2.11/go.mod h1:d9SYc9fch0CqK90mRk1dC7AkzzpwJj6u2GU3u+9pqFE=
+github.com/pion/dtls/v2 v2.2.12 h1:KP7H5/c1EiVAAKUmXyCzPiQe5+bCJrpOeKg/L05dunk=
+github.com/pion/dtls/v2 v2.2.12/go.mod h1:d9SYc9fch0CqK90mRk1dC7AkzzpwJj6u2GU3u+9pqFE=
 github.com/pion/ice/v2 v2.3.25 h1:M5rJA07dqhi3nobJIg+uPtcVjFECTrhcR3n0ns8kDZs=
 github.com/pion/ice/v2 v2.3.25/go.mod h1:KXJJcZK7E8WzrBEYnV4UtqEZsGeWfHxsNqhVcVvgjxw=
+github.com/pion/ice/v2 v2.3.34 h1:Ic1ppYCj4tUOcPAp76U6F3fVrlSw8A9JtRXLqw6BbUM=
+github.com/pion/ice/v2 v2.3.34/go.mod h1:mBF7lnigdqgtB+YHkaY/Y6s6tsyRyo4u4rPGRuOjUBQ=
 github.com/pion/interceptor v0.1.29 h1:39fsnlP1U8gw2JzOFWdfCU82vHvhW9o0rZnZF56wF+M=
 github.com/pion/interceptor v0.1.29/go.mod h1:ri+LGNjRUc5xUNtDEPzfdkmSqISixVTBF/z/Zms/6T4=
+github.com/pion/interceptor v0.1.30 h1:au5rlVHsgmxNi+v/mjOPazbW1SHzfx7/hYOEYQnUcxA=
+github.com/pion/interceptor v0.1.30/go.mod h1:RQuKT5HTdkP2Fi0cuOS5G5WNymTjzXaGF75J4k7z2nc=
 github.com/pion/logging v0.2.2 h1:M9+AIj/+pxNsDfAT64+MAVgJO0rsyLnoJKCqf//DoeY=
 github.com/pion/logging v0.2.2/go.mod h1:k0/tDVsRCX2Mb2ZEmTqNa7CWsQPc+YYCB7Q+5pahoms=
 github.com/pion/mdns v0.0.12 h1:CiMYlY+O0azojWDmxdNr7ADGrnZ+V6Ilfner+6mSVK8=
@@ -588,13 +606,19 @@ github.com/pion/rtcp v1.2.14/go.mod h1:sn6qjxvnwyAkkPzPULIbVqSKI5Dv54Rv7VG0kNxh9
 github.com/pion/rtp v1.8.3/go.mod h1:pBGHaFt/yW7bf1jjWAoUjpSNoDnw98KTMg+jWWvziqU=
 github.com/pion/rtp v1.8.6 h1:MTmn/b0aWWsAzux2AmP8WGllusBVw4NPYPVFFd7jUPw=
 github.com/pion/rtp v1.8.6/go.mod h1:pBGHaFt/yW7bf1jjWAoUjpSNoDnw98KTMg+jWWvziqU=
+github.com/pion/rtp v1.8.9 h1:E2HX740TZKaqdcPmf4pw6ZZuG8u5RlMMt+l3dxeu6Wk=
+github.com/pion/rtp v1.8.9/go.mod h1:pBGHaFt/yW7bf1jjWAoUjpSNoDnw98KTMg+jWWvziqU=
 github.com/pion/sctp v1.8.13/go.mod h1:YKSgO/bO/6aOMP9LCie1DuD7m+GamiK2yIiPM6vH+GA=
 github.com/pion/sctp v1.8.16 h1:PKrMs+o9EMLRvFfXq59WFsC+V8mN1wnKzqrv+3D/gYY=
 github.com/pion/sctp v1.8.16/go.mod h1:P6PbDVA++OJMrVNg2AL3XtYHV4uD6dvfyOovCgMs0PE=
+github.com/pion/sctp v1.8.33 h1:dSE4wX6uTJBcNm8+YlMg7lw1wqyKHggsP5uKbdj+NZw=
+github.com/pion/sctp v1.8.33/go.mod h1:beTnqSzewI53KWoG3nqB282oDMGrhNxBdb+JZnkCwRM=
 github.com/pion/sdp/v3 v3.0.9 h1:pX++dCHoHUwq43kuwf3PyJfHlwIj4hXA7Vrifiq0IJY=
 github.com/pion/sdp/v3 v3.0.9/go.mod h1:B5xmvENq5IXJimIO4zfp6LAe1fD9N+kFv+V/1lOdz8M=
 github.com/pion/srtp/v2 v2.0.18 h1:vKpAXfawO9RtTRKZJbG4y0v1b11NZxQnxRl85kGuUlo=
 github.com/pion/srtp/v2 v2.0.18/go.mod h1:0KJQjA99A6/a0DOVTu1PhDSw0CXF2jTkqOoMg3ODqdA=
+github.com/pion/srtp/v2 v2.0.20 h1:HNNny4s+OUmG280ETrCdgFndp4ufx3/uy85EawYEhTk=
+github.com/pion/srtp/v2 v2.0.20/go.mod h1:0KJQjA99A6/a0DOVTu1PhDSw0CXF2jTkqOoMg3ODqdA=
 github.com/pion/stun v0.6.1 h1:8lp6YejULeHBF8NmV8e2787BogQhduZugh5PdhDyyN4=
 github.com/pion/stun v0.6.1/go.mod h1:/hO7APkX4hZKu/D0f2lHzNyvdkTGtIy3NDmLR7kSz/8=
 github.com/pion/transport/v2 v2.2.1/go.mod h1:cXXWavvCnFF6McHTft3DWS9iic2Mftcz1Aq29pGcU5g=
@@ -603,6 +627,8 @@ github.com/pion/transport/v2 v2.2.3/go.mod h1:q2U/tf9FEfnSBGSW6w5Qp5PFWRLRj3NjLh
 github.com/pion/transport/v2 v2.2.4/go.mod h1:q2U/tf9FEfnSBGSW6w5Qp5PFWRLRj3NjLhCCgpRK4p0=
 github.com/pion/transport/v2 v2.2.5 h1:iyi25i/21gQck4hfRhomF6SktmUQjRsRW4WJdhfc3Kc=
 github.com/pion/transport/v2 v2.2.5/go.mod h1:q2U/tf9FEfnSBGSW6w5Qp5PFWRLRj3NjLhCCgpRK4p0=
+github.com/pion/transport/v2 v2.2.10 h1:ucLBLE8nuxiHfvkFKnkDQRYWYfp8ejf4YBOPfaQpw6Q=
+github.com/pion/transport/v2 v2.2.10/go.mod h1:sq1kSLWs+cHW9E+2fJP95QudkzbK7wscs8yYgQToO5E=
 github.com/pion/transport/v3 v3.0.1/go.mod h1:UY7kiITrlMv7/IKgd5eTUcaahZx5oUN3l9SzK5f5xE0=
 github.com/pion/transport/v3 v3.0.2 h1:r+40RJR25S9w3jbA6/5uEPTzcdn7ncyU44RWCbHkLg4=
 github.com/pion/transport/v3 v3.0.2/go.mod h1:nIToODoOlb5If2jF9y2Igfx3PFYWfuXi37m0IlWa/D0=
@@ -611,6 +637,8 @@ github.com/pion/turn/v2 v2.1.6 h1:Xr2niVsiPTB0FPtt+yAWKFUkU1eotQbGgpTIld4x1Gc=
 github.com/pion/turn/v2 v2.1.6/go.mod h1:huEpByKKHix2/b9kmTAM3YoX6MKP+/D//0ClgUYR2fY=
 github.com/pion/webrtc/v3 v3.2.40 h1:Wtfi6AZMQg+624cvCXUuSmrKWepSB7zfgYDOYqsSOVU=
 github.com/pion/webrtc/v3 v3.2.40/go.mod h1:M1RAe3TNTD1tzyvqHrbVODfwdPGSXOUo/OgpoGGJqFY=
+github.com/pion/webrtc/v3 v3.3.0 h1:Rf4u6n6U5t5sUxhYPQk/samzU/oDv7jk6BA5hyO2F9I=
+github.com/pion/webrtc/v3 v3.3.0/go.mod h1:hVmrDJvwhEertRWObeb1xzulzHGeVUoPlWvxdGzcfU0=
 github.com/pkg/errors v0.8.1/go.mod h1:bwawxfHBFNV+L2hUp1rHADufV3IMtnDRdf1r5NINEl0=
 github.com/pkg/errors v0.9.1 h1:FEBLx1zS214owpjy7qsBeixbURkuhQAwrK5UwLGTwt4=
 github.com/pkg/errors v0.9.1/go.mod h1:bwawxfHBFNV+L2hUp1rHADufV3IMtnDRdf1r5NINEl0=
@@ -625,6 +653,8 @@ github.com/power-devops/perfstat v0.0.0-20210106213030-5aafc221ea8c/go.mod h1:Om
 github.com/prometheus/client_golang v0.8.0/go.mod h1:7SWBe2y4D6OKWSNQJUaRYU/AaXPKyh/dDVn+NZz0KFw=
 github.com/prometheus/client_golang v1.19.1 h1:wZWJDwK+NameRJuPGDhlnFgx8e8HN3XHQeLaYJFJBOE=
 github.com/prometheus/client_golang v1.19.1/go.mod h1:mP78NwGzrVks5S2H6ab8+ZZGJLZUq1hoULYBAYBw1Ho=
+github.com/prometheus/client_golang v1.20.0 h1:jBzTZ7B099Rg24tny+qngoynol8LtVYlA2bqx3vEloI=
+github.com/prometheus/client_golang v1.20.0/go.mod h1:PIEt8X02hGcP8JWbeHyeZ53Y/jReSnHgO035n//V5WE=
 github.com/prometheus/client_model v0.0.0-20180712105110-5c3871d89910/go.mod h1:MbSGuTsp3dbXC40dX6PRTWyKYBIrTGTE9sqQNg2J8bo=
 github.com/prometheus/client_model v0.0.0-20190812154241-14fe0d1b01d4/go.mod h1:xMI15A0UPsDsEKsMN9yxemIoYk6Tm2C1GtYGdfGttqA=
 github.com/prometheus/client_model v0.6.1 h1:ZKSh/rekM+n3CeS952MLRAdFwIKqeY8b62p8ais2e9E=
@@ -639,6 +669,8 @@ github.com/quic-go/qpack v0.4.0 h1:Cr9BXA1sQS2SmDUWjSofMPNKmvF6IiIfDRmgU0w1ZCo=
 github.com/quic-go/qpack v0.4.0/go.mod h1:UZVnYIfi5GRk+zI9UMaCPsmZ2xKJP7XBUvVyT1Knj9A=
 github.com/quic-go/quic-go v0.44.0 h1:So5wOr7jyO4vzL2sd8/pD9Kesciv91zSk8BoFngItQ0=
 github.com/quic-go/quic-go v0.44.0/go.mod h1:z4cx/9Ny9UtGITIPzmPTXh1ULfOyWh4qGQlpnPcWmek=
+github.com/quic-go/quic-go v0.46.0 h1:uuwLClEEyk1DNvchH8uCByQVjo3yKL9opKulExNDs7Y=
+github.com/quic-go/quic-go v0.46.0/go.mod h1:1dLehS7TIR64+vxGR70GDcatWTOtMX2PUtnKsjbTurI=
 github.com/quic-go/webtransport-go v0.8.0 h1:HxSrwun11U+LlmwpgM1kEqIqH90IT4N8auv/cD7QFJg=
 github.com/quic-go/webtransport-go v0.8.0/go.mod h1:N99tjprW432Ut5ONql/aUhSLT0YVSlwHohQsuac9WaM=
 github.com/raulk/go-watchdog v1.3.0 h1:oUmdlHxdkXRJlwfG0O9omj8ukerm8MEQavSiDTEtBsk=
@@ -759,6 +791,8 @@ github.com/urfave/cli v1.22.12 h1:igJgVw1JdKH+trcLWLeLwZjU9fEfPesQ+9/e4MQ44S8=
 github.com/urfave/cli v1.22.12/go.mod h1:sSBEIC79qR6OvcmsD4U3KABeOTxDqQtdDnaFuUN30b8=
 github.com/urfave/cli/v2 v2.27.3 h1:/POWahRmdh7uztQ3CYnaDddk0Rm90PyOgIxgW2rr41M=
 github.com/urfave/cli/v2 v2.27.3/go.mod h1:m4QzxcD2qpra4z7WhzEGn74WZLViBnMpb1ToCAKdGRQ=
+github.com/urfave/cli/v2 v2.27.4 h1:o1owoI+02Eb+K107p27wEX9Bb8eqIoZCfLXloLUSWJ8=
+github.com/urfave/cli/v2 v2.27.4/go.mod h1:m4QzxcD2qpra4z7WhzEGn74WZLViBnMpb1ToCAKdGRQ=
 github.com/valyala/bytebufferpool v1.0.0 h1:GqA5TC/0021Y/b9FG4Oi9Mr3q7XYx6KllzawFIhcdPw=
 github.com/valyala/bytebufferpool v1.0.0/go.mod h1:6bBcMArwyJ5K/AmCkWv1jt77kVWyCJ6HpOuEn7z0Csc=
 github.com/valyala/fasthttp v1.55.0 h1:Zkefzgt6a7+bVKHnu/YaYSOPfNYNisSVBo/unVCf8k8=
@@ -780,6 +814,9 @@ github.com/warpfork/go-wish v0.0.0-20220906213052-39a1cc7a02d0 h1:GDDkbFiaK8jsSD
 github.com/warpfork/go-wish v0.0.0-20220906213052-39a1cc7a02d0/go.mod h1:x6AKhvSSexNrVSrViXSHUEbICjmGXhtgABaHIySUSGw=
 github.com/whyrusleeping/go-keyspace v0.0.0-20160322163242-5b898ac5add1 h1:EKhdznlJHPMoKr0XTrX+IlJs1LH3lyx2nfr1dOlZ79k=
 github.com/whyrusleeping/go-keyspace v0.0.0-20160322163242-5b898ac5add1/go.mod h1:8UvriyWtv5Q5EOgjHaSseUEdkQfvwFv1I/In/O2M9gc=
+github.com/wlynxg/anet v0.0.3/go.mod h1:eay5PRQr7fIVAMbTbchTnO9gG65Hg/uYGdc7mguHxoA=
+github.com/wlynxg/anet v0.0.4 h1:0de1OFQxnNqAu+x2FAKKCVIrnfGKQbs7FQz++tB0+Uw=
+github.com/wlynxg/anet v0.0.4/go.mod h1:eay5PRQr7fIVAMbTbchTnO9gG65Hg/uYGdc7mguHxoA=
 github.com/xeipuuv/gojsonpointer v0.0.0-20180127040702-4e3ac2762d5f/go.mod h1:N2zxlSyiKSe5eX1tZViRH5QA0qijqEDrYZiPEAiq3wU=
 github.com/xeipuuv/gojsonpointer v0.0.0-20190905194746-02993c407bfb h1:zGWFAtiMcyryUHoUjUJX0/lt1H2+i2Ka2n+D3DImSNo=
 github.com/xeipuuv/gojsonpointer v0.0.0-20190905194746-02993c407bfb/go.mod h1:N2zxlSyiKSe5eX1tZViRH5QA0qijqEDrYZiPEAiq3wU=
@@ -822,8 +859,12 @@ go.uber.org/atomic v1.6.0/go.mod h1:sABNBOSYdrvTF6hTgEIbc7YasKWGhgEQZyfxyTvoXHQ=
 go.uber.org/atomic v1.7.0/go.mod h1:fEN4uk6kAWBTFdckzkM89CLk9XfWZrxpCo0nPH17wJc=
 go.uber.org/dig v1.17.1 h1:Tga8Lz8PcYNsWsyHMZ1Vm0OQOUaJNDyvPImgbAu9YSc=
 go.uber.org/dig v1.17.1/go.mod h1:Us0rSJiThwCv2GteUN0Q7OKvU7n5J4dxZ9JKUXozFdE=
+go.uber.org/dig v1.18.0 h1:imUL1UiY0Mg4bqbFfsRQO5G4CGRBec/ZujWTvSVp3pw=
+go.uber.org/dig v1.18.0/go.mod h1:Us0rSJiThwCv2GteUN0Q7OKvU7n5J4dxZ9JKUXozFdE=
 go.uber.org/fx v1.22.1 h1:nvvln7mwyT5s1q201YE29V/BFrGor6vMiDNpU/78Mys=
 go.uber.org/fx v1.22.1/go.mod h1:HT2M7d7RHo+ebKGh9NRcrsrHHfpZ60nW3QRubMRfv48=
+go.uber.org/fx v1.22.2 h1:iPW+OPxv0G8w75OemJ1RAnTUrF55zOJlXlo1TbJ0Buw=
+go.uber.org/fx v1.22.2/go.mod h1:o/D9n+2mLP6v1EG+qsdT1O8wKopYAsqZasju97SDFCU=
 go.uber.org/goleak v1.1.11-0.20210813005559-691160354723/go.mod h1:cwTWslyiVhfpKIDGSZEM2HlOvcqm+tG4zioyIeLoqMQ=
 go.uber.org/goleak v1.3.0 h1:2K3zAYmnTNqV73imy9J1T3WC+gmCePx2hEGkimedGto=
 go.uber.org/goleak v1.3.0/go.mod h1:CoHD4mav9JJNrW/WLlf7HGZPjdw8EucARQHekz1X6bE=
@@ -866,6 +907,8 @@ golang.org/x/exp v0.0.0-20240506185415-9bf2ced13842 h1:vr/HnozRka3pE4EsMEg1lgkXJ
 golang.org/x/exp v0.0.0-20240506185415-9bf2ced13842/go.mod h1:XtvwrStGgqGPLc4cjQfWqZHG1YFdYs6swckp8vpsjnc=
 golang.org/x/exp v0.0.0-20240719175910-8a7402abbf56 h1:2dVuKD2vS7b0QIHQbpyTISPd0LeHDbnYEryqj5Q1ug8=
 golang.org/x/exp v0.0.0-20240719175910-8a7402abbf56/go.mod h1:M4RDyNAINzryxdtnbRXRL/OHtkFuWGRjvuhBJpk2IlY=
+golang.org/x/exp v0.0.0-20240808152545-0cdaa3abc0fa h1:ELnwvuAXPNtPk1TJRuGkI9fDTwym6AYBu0qzT8AcHdI=
+golang.org/x/exp v0.0.0-20240808152545-0cdaa3abc0fa/go.mod h1:akd2r19cwCdwSwWeIdzYQGa/EZZyqcOdwWiwj5L5eKQ=
 golang.org/x/lint v0.0.0-20180702182130-06c8688daad7/go.mod h1:UVdnD1Gm6xHRNCYTkRU2/jEulfH38KcIWyp/GAMgvoE=
 golang.org/x/lint v0.0.0-20181026193005-c67002cb31c3/go.mod h1:UVdnD1Gm6xHRNCYTkRU2/jEulfH38KcIWyp/GAMgvoE=
 golang.org/x/lint v0.0.0-20190227174305-5b3e6a55c961/go.mod h1:wehouNa3lNwaWXcvxsM5YxQ5yQlVC4a0KAMCusXpPoU=
@@ -1139,6 +1182,8 @@ howett.net/plist v1.0.0 h1:7CrbWYbPPO/PyNy38b2EB/+gYbjCe2DXBxgtOOZbSQM=
 howett.net/plist v1.0.0/go.mod h1:lqaXoTrLY4hg8tnEzNru53gicrbv7rrk+2xJA/7hw9g=
 lukechampine.com/blake3 v1.2.1 h1:YuqqRuaqsGV71BV/nm9xlI0MKUv4QC54jQnBChWbGnI=
 lukechampine.com/blake3 v1.2.1/go.mod h1:0OFRp7fBtAylGVCO40o87sbupkyIGgbpv1+M1k1LM6k=
+lukechampine.com/blake3 v1.3.0 h1:sJ3XhFINmHSrYCgl958hscfIa3bw8x4DqMP3u1YvoYE=
+lukechampine.com/blake3 v1.3.0/go.mod h1:0OFRp7fBtAylGVCO40o87sbupkyIGgbpv1+M1k1LM6k=
 oras.land/oras-go/v2 v2.5.0 h1:o8Me9kLY74Vp5uw07QXPiitjsw7qNXi8Twd+19Zf02c=
 oras.land/oras-go/v2 v2.5.0/go.mod h1:z4eisnLP530vwIOUOJeBIj0aGI0L1C3d53atvCBqZHg=
 sigs.k8s.io/yaml v1.3.0 h1:a2VclLzOGrwOHDiV8EfBGhvjHvP46CtW5j6POvhYGGo=

From 6aba6223c74e09d4b4c8154493d981b598cb4493 Mon Sep 17 00:00:00 2001
From: Ettore Di Giacinto <mudler@users.noreply.github.com>
Date: Tue, 20 Aug 2024 19:21:47 +0200
Subject: [PATCH 226/235] ci(Dockerfile): adjust deps from typos

Signed-off-by: Ettore Di Giacinto <mudler@users.noreply.github.com>
---
 Dockerfile | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/Dockerfile b/Dockerfile
index f69ec067..cabac8af 100644
--- a/Dockerfile
+++ b/Dockerfile
@@ -81,7 +81,7 @@ RUN apt-get update && \
         espeak \
         python3-pip \
         python-is-python3 \
-        python3-dev lsb_release wget add-apt-repository gpg \
+        python3-dev lsb-release wget software-properties-common gnupg \
         python3-venv && \
     apt-get clean && \
     rm -rf /var/lib/apt/lists/* && \

From 9cfd89087bed3a612e43d95dde11da00b6444bc5 Mon Sep 17 00:00:00 2001
From: Dave <dave@gray101.com>
Date: Tue, 20 Aug 2024 13:25:22 -0400
Subject: [PATCH 227/235] feat: devcontainer part 4 (#3339)

add utils.sh, prelim docs

Signed-off-by: Dave Lee <dave@gray101.com>
---
 .devcontainer-scripts/utils.sh        | 49 +++++++++++++++++++++++++++
 .devcontainer/customization/README.md | 19 +++++++----
 2 files changed, 61 insertions(+), 7 deletions(-)
 create mode 100644 .devcontainer-scripts/utils.sh

diff --git a/.devcontainer-scripts/utils.sh b/.devcontainer-scripts/utils.sh
new file mode 100644
index 00000000..02b588ae
--- /dev/null
+++ b/.devcontainer-scripts/utils.sh
@@ -0,0 +1,49 @@
+#!/bin/bash
+
+# This file contains some really simple functions that are useful when building up customization scripts.
+
+
+# Checks if the git config has a user registered - and sets it up if not.
+#
+# Param 1: name
+# Param 2: email
+#
+config_user() {
+    local gcn=$(git config --global user.name)
+    if [ -z "${gcn}" ]; then
+        echo "Setting up git user / remote"
+        git config --global user.name "$1"
+        git config --global user.email "$2"
+        
+    fi
+}
+
+# Checks if the git remote is configured - and sets it up if not. Fetches either way.
+#
+# Param 1: remote name
+# Param 2: remote url
+#
+config_remote() {
+    local gr=$(git remote -v | grep $1)
+    if [ -z "${gr}" ]; then
+        git remote add $1 $2
+    fi
+    git fetch $1
+}
+
+# Setup special .ssh files
+#
+# Param 1: bash array, filenames relative to the customization directory that should be copied to ~/.ssh
+setup_ssh() {
+    local files=("$@")
+    for file in "${files[@]}"; then
+        local cfile="/devcontainer-customization/${file}"
+        local hfile="~/.ssh/${file}"
+        if [ ! -f "${hfile}" ]; then
+            echo "copying ${file}"
+            cp "${cfile}" "${hfile}"
+            chmod 600 "${hfile}"
+        fi
+    done
+    ls ~/.ssh
+}
diff --git a/.devcontainer/customization/README.md b/.devcontainer/customization/README.md
index 3e2a3778..89eb48e8 100644
--- a/.devcontainer/customization/README.md
+++ b/.devcontainer/customization/README.md
@@ -7,14 +7,19 @@ If files with those names exist here, they will be called at the end of the norm
 
 This is a good place to set things like `git config --global user.name` are set - and to handle any other files that are mounted via this directory.
 
-An example of a useful script might be:
+To assist in doing so, `source /.devcontainer-scripts/utils.sh` will provide utility functions that may be useful - for example:
 
 ```
 #!/bin/bash
-gcn=$(git config --global user.name)
-if [ -z "$gcn" ]; then
-    git config --global user.name YOUR.NAME
-    git config --global user.email YOUR.EMAIL
-    git remote add PREFIX FORK_URL
-fi
+
+source "/.devcontainer-scripts/utils.sh"
+
+sshfiles=("config", "key.pub")
+
+setup_ssh "${sshfiles[@]}"
+
+config_user "YOUR NAME" "YOUR EMAIL"
+
+config_remote "REMOTE NAME" "REMOTE URL"
+
 ```
\ No newline at end of file

From aca2c4196a5c66fe82ac86d634147e4289ef446c Mon Sep 17 00:00:00 2001
From: Ettore Di Giacinto <mudler@users.noreply.github.com>
Date: Tue, 20 Aug 2024 19:36:11 +0200
Subject: [PATCH 228/235] ci(Dockerfile): try to install lvm-10 from Ubuntu
 repositories

Signed-off-by: Ettore Di Giacinto <mudler@users.noreply.github.com>
---
 Dockerfile | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/Dockerfile b/Dockerfile
index cabac8af..a9d7b7de 100644
--- a/Dockerfile
+++ b/Dockerfile
@@ -81,12 +81,12 @@ RUN apt-get update && \
         espeak \
         python3-pip \
         python-is-python3 \
-        python3-dev lsb-release wget software-properties-common gnupg \
+        python3-dev llvm-10* \
         python3-venv && \
     apt-get clean && \
     rm -rf /var/lib/apt/lists/* && \
     pip install --upgrade pip
-RUN curl -L https://apt.llvm.org/llvm.sh -o llvm.sh && chmod +x llvm.sh && ./llvm.sh 10
+
 # Install grpcio-tools (the version in 22.04 is too old)
 RUN pip install --user grpcio-tools
 

From 2669f4738acfa792c91b6b3f45256210d789807c Mon Sep 17 00:00:00 2001
From: Ettore Di Giacinto <mudler@users.noreply.github.com>
Date: Tue, 20 Aug 2024 20:14:17 +0200
Subject: [PATCH 229/235] fix(p2p): re-use p2p host when running federated mode
 (#3341)

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
---
 core/cli/run.go               |  8 +++-----
 core/cli/worker/worker_p2p.go |  4 ++--
 core/p2p/federated_server.go  |  1 -
 core/p2p/p2p.go               | 12 ++++++------
 core/p2p/p2p_disabled.go      |  4 ++--
 5 files changed, 13 insertions(+), 16 deletions(-)

diff --git a/core/cli/run.go b/core/cli/run.go
index 23939548..c469f05f 100644
--- a/core/cli/run.go
+++ b/core/cli/run.go
@@ -145,15 +145,13 @@ func (r *RunCMD) Run(ctx *cliContext.Context) error {
 		if err != nil {
 			return err
 		}
-		if err := p2p.ExposeService(context.Background(), "localhost", port, token, p2p.NetworkID(r.Peer2PeerNetworkID, p2p.FederatedID)); err != nil {
-			return err
-		}
-		node, err := p2p.NewNode(token)
+		fedCtx := context.Background()
+		node, err := p2p.ExposeService(fedCtx, "localhost", port, token, p2p.NetworkID(r.Peer2PeerNetworkID, p2p.FederatedID))
 		if err != nil {
 			return err
 		}
 
-		if err := p2p.ServiceDiscoverer(context.Background(), node, token, p2p.NetworkID(r.Peer2PeerNetworkID, p2p.FederatedID), nil, false); err != nil {
+		if err := p2p.ServiceDiscoverer(fedCtx, node, token, p2p.NetworkID(r.Peer2PeerNetworkID, p2p.FederatedID), nil, false); err != nil {
 			return err
 		}
 	}
diff --git a/core/cli/worker/worker_p2p.go b/core/cli/worker/worker_p2p.go
index 17b9ff08..7c900667 100644
--- a/core/cli/worker/worker_p2p.go
+++ b/core/cli/worker/worker_p2p.go
@@ -60,7 +60,7 @@ func (r *P2P) Run(ctx *cliContext.Context) error {
 			p = r.RunnerPort
 		}
 
-		err = p2p.ExposeService(context.Background(), address, p, r.Token, p2p.NetworkID(r.Peer2PeerNetworkID, p2p.WorkerID))
+		_, err = p2p.ExposeService(context.Background(), address, p, r.Token, p2p.NetworkID(r.Peer2PeerNetworkID, p2p.WorkerID))
 		if err != nil {
 			return err
 		}
@@ -100,7 +100,7 @@ func (r *P2P) Run(ctx *cliContext.Context) error {
 		}
 	}()
 
-	err = p2p.ExposeService(context.Background(), address, fmt.Sprint(port), r.Token, p2p.NetworkID(r.Peer2PeerNetworkID, p2p.WorkerID))
+	_, err = p2p.ExposeService(context.Background(), address, fmt.Sprint(port), r.Token, p2p.NetworkID(r.Peer2PeerNetworkID, p2p.WorkerID))
 	if err != nil {
 		return err
 	}
diff --git a/core/p2p/federated_server.go b/core/p2p/federated_server.go
index 87df633f..d0a8c2f8 100644
--- a/core/p2p/federated_server.go
+++ b/core/p2p/federated_server.go
@@ -99,5 +99,4 @@ func (fs *FederatedServer) proxy(ctx context.Context, node *node.Node) error {
 			}()
 		}
 	}
-
 }
diff --git a/core/p2p/p2p.go b/core/p2p/p2p.go
index 1bc46e7d..fe55346a 100644
--- a/core/p2p/p2p.go
+++ b/core/p2p/p2p.go
@@ -309,7 +309,7 @@ func ensureService(ctx context.Context, n *node.Node, nd *NodeData, sserv string
 }
 
 // This is the P2P worker main
-func ExposeService(ctx context.Context, host, port, token, servicesID string) error {
+func ExposeService(ctx context.Context, host, port, token, servicesID string) (*node.Node, error) {
 	if servicesID == "" {
 		servicesID = defaultServicesID
 	}
@@ -317,7 +317,7 @@ func ExposeService(ctx context.Context, host, port, token, servicesID string) er
 
 	nodeOpts, err := newNodeOpts(token)
 	if err != nil {
-		return err
+		return nil, err
 	}
 	// generate a random string for the name
 	name := utils.RandString(10)
@@ -327,17 +327,17 @@ func ExposeService(ctx context.Context, host, port, token, servicesID string) er
 		services.RegisterService(llger, time.Duration(60)*time.Second, name, fmt.Sprintf("%s:%s", host, port))...)
 	n, err := node.New(nodeOpts...)
 	if err != nil {
-		return fmt.Errorf("creating a new node: %w", err)
+		return nil, fmt.Errorf("creating a new node: %w", err)
 	}
 
 	err = n.Start(ctx)
 	if err != nil {
-		return fmt.Errorf("creating a new node: %w", err)
+		return n, fmt.Errorf("creating a new node: %w", err)
 	}
 
 	ledger, err := n.Ledger()
 	if err != nil {
-		return fmt.Errorf("creating a new node: %w", err)
+		return n, fmt.Errorf("creating a new node: %w", err)
 	}
 
 	ledger.Announce(
@@ -354,7 +354,7 @@ func ExposeService(ctx context.Context, host, port, token, servicesID string) er
 		},
 	)
 
-	return err
+	return n, err
 }
 
 func NewNode(token string) (*node.Node, error) {
diff --git a/core/p2p/p2p_disabled.go b/core/p2p/p2p_disabled.go
index 92241f42..f0d331df 100644
--- a/core/p2p/p2p_disabled.go
+++ b/core/p2p/p2p_disabled.go
@@ -22,8 +22,8 @@ func ServiceDiscoverer(ctx context.Context, node *node.Node, token, servicesID s
 	return fmt.Errorf("not implemented")
 }
 
-func ExposeService(ctx context.Context, host, port, token, servicesID string) error {
-	return fmt.Errorf("not implemented")
+func ExposeService(ctx context.Context, host, port, token, servicesID string) (*node.Node, error) {
+	return nil, fmt.Errorf("not implemented")
 }
 
 func IsP2PEnabled() bool {

From 736df114542cb9ee0e8ba66a651ba45432d59a76 Mon Sep 17 00:00:00 2001
From: Ettore Di Giacinto <mudler@users.noreply.github.com>
Date: Tue, 20 Aug 2024 20:14:35 +0200
Subject: [PATCH 230/235] fix(ci): pin to llvmlite 0.43 (#3342)

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
---
 Dockerfile                                 | 2 +-
 backend/python/parler-tts/requirements.txt | 3 ++-
 2 files changed, 3 insertions(+), 2 deletions(-)

diff --git a/Dockerfile b/Dockerfile
index a9d7b7de..9d651760 100644
--- a/Dockerfile
+++ b/Dockerfile
@@ -81,7 +81,7 @@ RUN apt-get update && \
         espeak \
         python3-pip \
         python-is-python3 \
-        python3-dev llvm-10* \
+        python3-dev llvm \
         python3-venv && \
     apt-get clean && \
     rm -rf /var/lib/apt/lists/* && \
diff --git a/backend/python/parler-tts/requirements.txt b/backend/python/parler-tts/requirements.txt
index 920971ce..1f17c892 100644
--- a/backend/python/parler-tts/requirements.txt
+++ b/backend/python/parler-tts/requirements.txt
@@ -1,3 +1,4 @@
 grpcio==1.65.5
 protobuf
-certifi
\ No newline at end of file
+certifi
+llvmlite==0.43.0
\ No newline at end of file

From 93658fc5fd67b9a0901b5c625cdc5d94e2ba7c79 Mon Sep 17 00:00:00 2001
From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com>
Date: Tue, 20 Aug 2024 19:28:48 +0000
Subject: [PATCH 231/235] chore(deps): Bump langchain from 0.2.12 to 0.2.14 in
 /examples/langchain/langchainpy-localai-example (#3307)

chore(deps): Bump langchain

Bumps [langchain](https://github.com/langchain-ai/langchain) from 0.2.12 to 0.2.14.
- [Release notes](https://github.com/langchain-ai/langchain/releases)
- [Commits](https://github.com/langchain-ai/langchain/compare/langchain==0.2.12...langchain==0.2.14)

---
updated-dependencies:
- dependency-name: langchain
  dependency-type: direct:production
  update-type: version-update:semver-patch
...

Signed-off-by: dependabot[bot] <support@github.com>
Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>
---
 examples/langchain/langchainpy-localai-example/requirements.txt | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/examples/langchain/langchainpy-localai-example/requirements.txt b/examples/langchain/langchainpy-localai-example/requirements.txt
index f8a6b381..59330758 100644
--- a/examples/langchain/langchainpy-localai-example/requirements.txt
+++ b/examples/langchain/langchainpy-localai-example/requirements.txt
@@ -10,7 +10,7 @@ debugpy==1.8.2
 frozenlist==1.4.1
 greenlet==3.0.3
 idna==3.7
-langchain==0.2.12
+langchain==0.2.14
 langchain-community==0.2.12
 marshmallow==3.21.3
 marshmallow-enum==1.5.1

From 6f1b4f29a8a888e63c664ab8b6b3d7b27a7728f2 Mon Sep 17 00:00:00 2001
From: "LocalAI [bot]" <139863280+localai-bot@users.noreply.github.com>
Date: Tue, 20 Aug 2024 22:54:04 +0200
Subject: [PATCH 232/235] feat(swagger): update swagger (#3343)

Signed-off-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com>
Co-authored-by: mudler <2420543+mudler@users.noreply.github.com>
---
 swagger/docs.go      | 3 +++
 swagger/swagger.json | 3 +++
 swagger/swagger.yaml | 2 ++
 3 files changed, 8 insertions(+)

diff --git a/swagger/docs.go b/swagger/docs.go
index 4d89a926..44ae10ad 100644
--- a/swagger/docs.go
+++ b/swagger/docs.go
@@ -1071,6 +1071,9 @@ const docTemplate = `{
                 "name": {
                     "type": "string"
                 },
+                "serviceID": {
+                    "type": "string"
+                },
                 "tunnelAddress": {
                     "type": "string"
                 }
diff --git a/swagger/swagger.json b/swagger/swagger.json
index ef038c4c..6edfebbd 100644
--- a/swagger/swagger.json
+++ b/swagger/swagger.json
@@ -1064,6 +1064,9 @@
                 "name": {
                     "type": "string"
                 },
+                "serviceID": {
+                    "type": "string"
+                },
                 "tunnelAddress": {
                     "type": "string"
                 }
diff --git a/swagger/swagger.yaml b/swagger/swagger.yaml
index 34d3d64f..c953b0af 100644
--- a/swagger/swagger.yaml
+++ b/swagger/swagger.yaml
@@ -262,6 +262,8 @@ definitions:
         type: string
       name:
         type: string
+      serviceID:
+        type: string
       tunnelAddress:
         type: string
     type: object

From 16f7140461dee4c78c42550547c0594a889ef132 Mon Sep 17 00:00:00 2001
From: Ettore Di Giacinto <mudler@users.noreply.github.com>
Date: Tue, 20 Aug 2024 22:54:16 +0200
Subject: [PATCH 233/235] chore(deps): update edgevpn (#3346)

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
---
 go.mod | 2 +-
 go.sum | 2 ++
 2 files changed, 3 insertions(+), 1 deletion(-)

diff --git a/go.mod b/go.mod
index 60d16577..2d1d3de2 100644
--- a/go.mod
+++ b/go.mod
@@ -32,7 +32,7 @@ require (
 	github.com/libp2p/go-libp2p v0.36.2
 	github.com/mholt/archiver/v3 v3.5.1
 	github.com/microcosm-cc/bluemonday v1.0.26
-	github.com/mudler/edgevpn v0.27.1
+	github.com/mudler/edgevpn v0.27.2
 	github.com/mudler/go-processmanager v0.0.0-20230818213616-f204007f963c
 	github.com/mudler/go-stable-diffusion v0.0.0-20240429204715-4a3cd6aeae6f
 	github.com/nomic-ai/gpt4all/gpt4all-bindings/golang v0.0.0-20240606155928-41c9013fa46a
diff --git a/go.sum b/go.sum
index 7b72d401..962bb94b 100644
--- a/go.sum
+++ b/go.sum
@@ -483,6 +483,8 @@ github.com/mudler/edgevpn v0.27.0 h1:FnBVzPs098DTgbUkiwm22n30hmEVBAq+PVpXanqx6qo
 github.com/mudler/edgevpn v0.27.0/go.mod h1:Hwvr+i+dePgn/Yh+EMMvqcw9ByUCLAWD9TgYtJYV95Y=
 github.com/mudler/edgevpn v0.27.1 h1:UKW7/JW4l2cBAPMRnlZRHbuFDGrv7resVJlFD34WBDE=
 github.com/mudler/edgevpn v0.27.1/go.mod h1:PK7rl0QQQTdlpie9rlaS7DguH500ogqproQli/QwrxU=
+github.com/mudler/edgevpn v0.27.2 h1:FsQ95jPCDJP9LzKJYCHx70z08DGXK5yrHMzH9Qok3nE=
+github.com/mudler/edgevpn v0.27.2/go.mod h1:PK7rl0QQQTdlpie9rlaS7DguH500ogqproQli/QwrxU=
 github.com/mudler/go-piper v0.0.0-20240315144837-9d0100873a7d h1:8udOFrDf/I83JL0/u22j6U6Q9z9LoSdby2a/DWdd0/s=
 github.com/mudler/go-piper v0.0.0-20240315144837-9d0100873a7d/go.mod h1:O7SwdSWMilAWhBZMK9N9Y/oBDyMMzshE3ju8Xkexwig=
 github.com/mudler/go-processmanager v0.0.0-20230818213616-f204007f963c h1:CI5uGwqBpN8N7BrSKC+nmdfw+9nPQIDyjHHlaIiitZI=

From 7147f1990f98e6f8a3139adb12e5349e6eafc15d Mon Sep 17 00:00:00 2001
From: "LocalAI [bot]" <139863280+localai-bot@users.noreply.github.com>
Date: Wed, 21 Aug 2024 00:20:34 +0200
Subject: [PATCH 234/235] chore: :arrow_up: Update ggerganov/whisper.cpp to
 `d65786ea540a5aef21f67cacfa6f134097727780` (#3344)

:arrow_up: Update ggerganov/whisper.cpp

Signed-off-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com>
Co-authored-by: mudler <2420543+mudler@users.noreply.github.com>
---
 Makefile | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/Makefile b/Makefile
index 108a38ab..45b99942 100644
--- a/Makefile
+++ b/Makefile
@@ -16,7 +16,7 @@ RWKV_VERSION?=661e7ae26d442f5cfebd2a0881b44e8c55949ec6
 
 # whisper.cpp version
 WHISPER_REPO?=https://github.com/ggerganov/whisper.cpp
-WHISPER_CPP_VERSION?=22fcd5fd110ba1ff592b4e23013d870831756259
+WHISPER_CPP_VERSION?=d65786ea540a5aef21f67cacfa6f134097727780
 
 # bert.cpp version
 BERT_REPO?=https://github.com/go-skynet/go-bert.cpp

From 7cf59d9f983921e4ba9c207fe4a2c743ac9a364d Mon Sep 17 00:00:00 2001
From: "LocalAI [bot]" <139863280+localai-bot@users.noreply.github.com>
Date: Wed, 21 Aug 2024 00:37:13 +0200
Subject: [PATCH 235/235] chore: :arrow_up: Update ggerganov/llama.cpp to
 `2f3c1466ff46a2413b0e363a5005c46538186ee6` (#3345)

:arrow_up: Update ggerganov/llama.cpp

Signed-off-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com>
Co-authored-by: mudler <2420543+mudler@users.noreply.github.com>
---
 Makefile | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/Makefile b/Makefile
index 45b99942..988a4fb7 100644
--- a/Makefile
+++ b/Makefile
@@ -8,7 +8,7 @@ DETECT_LIBS?=true
 # llama.cpp versions
 GOLLAMA_REPO?=https://github.com/go-skynet/go-llama.cpp
 GOLLAMA_VERSION?=2b57a8ae43e4699d3dc5d1496a1ccd42922993be
-CPPLLAMA_VERSION?=cfac111e2b3953cdb6b0126e67a2487687646971
+CPPLLAMA_VERSION?=2f3c1466ff46a2413b0e363a5005c46538186ee6
 
 # go-rwkv version
 RWKV_REPO?=https://github.com/donomii/go-rwkv.cpp