models(gallery): add new models to the gallery (#2124)

* models: add reranker and parler-tts-mini Signed-off-by: Ettore Di Giacinto <mudler@localai.io> * fix: chatml im_end should not have a newline Signed-off-by: Ettore Di Giacinto <mudler@localai.io> * models(noromaid): add Signed-off-by: Ettore Di Giacinto <mudler@localai.io> * models(llama3): add 70b, add dolphin2.9 Signed-off-by: Ettore Di Giacinto <mudler@localai.io> * models(llama3): add unholy-8b Signed-off-by: Ettore Di Giacinto <mudler@localai.io> * models(llama3): add therapyllama3, aura Signed-off-by: Ettore Di Giacinto <mudler@localai.io> --------- Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2024-12-18 20:27:57 +00:00 · 2024-04-25 01:28:02 +02:00 · 2024-04-25 01:28:02 +02:00 · 48d0aa2f6d
commit 48d0aa2f6d
parent b664edde29
10 changed files with 272 additions and 34 deletions
--- a/aio/cpu/text-to-text.yaml
+++ b/aio/cpu/text-to-text.yaml
@ -21,8 +21,7 @@ template:
    </tool_call>
    {{- else if eq .RoleName "tool" }}
    </tool_response>
-    {{- end }}
-    <|im_end|>
+    {{- end }}<|im_end|>
  # https://huggingface.co/NousResearch/Hermes-2-Pro-Mistral-7B-GGUF#prompt-format-for-function-calling
  function: |
    <|im_start|>system
@ -37,8 +36,7 @@ template:
    For each function call return a json object with function name and arguments within <tool_call></tool_call> XML tags as follows:
    <tool_call>
    {'arguments': <args-dict>, 'name': <function-name>}
-    </tool_call>
-    <|im_end|>
+    </tool_call><|im_end|>
    {{.Input -}}
    <|im_start|>assistant
    <tool_call>
--- a/aio/gpu-8g/text-to-text.yaml
+++ b/aio/gpu-8g/text-to-text.yaml
@ -21,8 +21,7 @@ template:
    </tool_call>
    {{- else if eq .RoleName "tool" }}
    </tool_response>
-    {{- end }}
-    <|im_end|>
+    {{- end }}<|im_end|>
  # https://huggingface.co/NousResearch/Hermes-2-Pro-Mistral-7B-GGUF#prompt-format-for-function-calling
  function: |
    <|im_start|>system
@ -37,8 +36,7 @@ template:
    For each function call return a json object with function name and arguments within <tool_call></tool_call> XML tags as follows:
    <tool_call>
    {'arguments': <args-dict>, 'name': <function-name>}
-    </tool_call>
-    <|im_end|>
+    </tool_call><|im_end|>
    {{.Input -}}
    <|im_start|>assistant
    <tool_call>
--- a/aio/intel/text-to-text.yaml
+++ b/aio/intel/text-to-text.yaml
@ -22,8 +22,7 @@ template:
    </tool_call>
    {{- else if eq .RoleName "tool" }}
    </tool_response>
-    {{- end }}
-    <|im_end|>
+    {{- end }}<|im_end|>
  # https://huggingface.co/NousResearch/Hermes-2-Pro-Mistral-7B-GGUF#prompt-format-for-function-calling
  function: |
    <|im_start|>system
@ -38,8 +37,7 @@ template:
    For each function call return a json object with function name and arguments within <tool_call></tool_call> XML tags as follows:
    <tool_call>
    {'arguments': <args-dict>, 'name': <function-name>}
-    </tool_call>
-    <|im_end|>
+    </tool_call><|im_end|>
    {{.Input -}}
    <|im_start|>assistant
    <tool_call>
--- a/embedded/models/hermes-2-pro-mistral.yaml
+++ b/embedded/models/hermes-2-pro-mistral.yaml
@ -21,8 +21,7 @@ template:
    </tool_call>
    {{- else if eq .RoleName "tool" }}
    </tool_response>
-    {{- end }}
-    <|im_end|>
+    {{- end }}<|im_end|>
  # https://huggingface.co/NousResearch/Hermes-2-Pro-Mistral-7B-GGUF#prompt-format-for-function-calling
  function: |
    <|im_start|>system
@ -37,8 +36,7 @@ template:
    For each function call return a json object with function name and arguments within <tool_call></tool_call> XML tags as follows:
    <tool_call>
    {'arguments': <args-dict>, 'name': <function-name>}
-    </tool_call>
-    <|im_end|>
+    </tool_call><|im_end|>
    {{.Input -}}
    <|im_start|>assistant
    <tool_call>
--- a/gallery/hermes-2-pro-mistral.yaml
+++ b/gallery/hermes-2-pro-mistral.yaml
@ -3,9 +3,6 @@ name: "hermes-2-pro-mistral"

 config_file: |
  mmap: true
-  parameters:
-    model: Hermes-2-Pro-Mistral-7B.Q6_K.gguf
-
  template:
    chat_message: |
      <|im_start|>{{if eq .RoleName "assistant"}}assistant{{else if eq .RoleName "system"}}system{{else if eq .RoleName "tool"}}tool{{else if eq .RoleName "user"}}user{{end}}
@ -24,8 +21,7 @@ config_file: |
      </tool_call>
      {{- else if eq .RoleName "tool" }}
      </tool_response>
-      {{- end }}
-      <|im_end|>
+      {{- end }}<|im_end|>
    # https://huggingface.co/NousResearch/Hermes-2-Pro-Mistral-7B-GGUF#prompt-format-for-function-calling
    function: |
      <|im_start|>system
@ -40,8 +36,7 @@ config_file: |
      For each function call return a json object with function name and arguments within <tool_call></tool_call> XML tags as follows:
      <tool_call>
      {'arguments': <args-dict>, 'name': <function-name>}
-      </tool_call>
-      <|im_end|>
+      </tool_call><|im_end|>
      {{.Input -}}
      <|im_start|>assistant
      <tool_call>
--- a/gallery/index.yaml
+++ b/gallery/index.yaml
@ -1,5 +1,35 @@
-## LLM

+### START parler-tts
+- &parler-tts
+  url: "github:mudler/LocalAI/gallery/parler-tts.yaml@master"
+  name: parler-tts-mini-v0.1
+  parameters:
+    model: parler-tts/parler_tts_mini_v0.1
+  license: apache-2.0
+  description: |
+    Parler-TTS is a lightweight text-to-speech (TTS) model that can generate high-quality, natural sounding speech in the style of a given speaker (gender, pitch, speaking style, etc). It is a reproduction of work from the paper Natural language guidance of high-fidelity text-to-speech with synthetic annotations by Dan Lyth and Simon King, from Stability AI and Edinburgh University respectively.
+  urls:
+  - https://github.com/huggingface/parler-tts
+  tags:
+  - tts
+  - gpu
+  - cpu
+  - text-to-speech
+  - python
+### START rerankers
+- &rerankers
+  url: "github:mudler/LocalAI/gallery/rerankers.yaml@master"
+  name: cross-encoder
+  parameters:
+    model: cross-encoder
+  license: apache-2.0
+  description: |
+    A cross-encoder model that can be used for reranking
+  tags:
+  - reranker
+  - gpu
+  - python
+## LLMs
 ### START LLAMA3
 - &llama3
  url: "github:mudler/LocalAI/gallery/llama3-instruct.yaml@master"
@ -20,20 +50,177 @@
      Model Architecture Llama 3 is an auto-regressive language model that uses an optimized transformer architecture. The tuned versions use supervised fine-tuning (SFT) and reinforcement learning with human feedback (RLHF) to align with human preferences for helpfulness and safety.
  urls:
  - https://huggingface.co/meta-llama/Meta-Llama-3-8B-Instruct
-
+  - https://huggingface.co/QuantFactory/Meta-Llama-3-8B-Instruct-GGUF
  tags:
  - llm
  - gguf
  - gpu
  - cpu
+  - llama3
  overrides:
    parameters:
      model: Meta-Llama-3-8B-Instruct-Q5_K_M.gguf
  files:
-  - filename: vicuna-7b-q5_k.gguf
-    sha256:	cce3ba85525027d0fff520cad053d5a6f32c293382a40b3d55a650282c267787
-    uri: huggingface://second-state/Llama-3-8B-Instruct-GGUF/Meta-Llama-3-8B-Instruct-Q5_K_M.gguf
+  - filename: Meta-Llama-3-8B-Instruct.Q4_0.gguf
+    sha256:	19ded996fe6c60254dc7544d782276eff41046ed42aa5f2d0005dc457e5c0895
+    uri: huggingface://QuantFactory/Meta-Llama-3-8B-Instruct-GGUF/Meta-Llama-3-8B-Instruct.Q4_0.gguf
+- <<: *llama3
+  name: "llama3-8b-instruct:Q6_K"
+  overrides:
+    parameters:
+      model: Meta-Llama-3-8B-Instruct.Q6_K.gguf
+  files:
+  - filename: Meta-Llama-3-8B-Instruct.Q6_K.gguf
+    sha256: b7bad45618e2a76cc1e89a0fbb93a2cac9bf410e27a619c8024ed6db53aa9b4a
+    uri: huggingface://QuantFactory/Meta-Llama-3-8B-Instruct-GGUF/Meta-Llama-3-8B-Instruct.Q6_K.gguf
+- <<: *llama3
+  name: "llama3-70b-instruct"
+  overrides:
+    parameters:
+      model: Meta-Llama-3-70B-Instruct.Q4_K_M.gguf
+  files:
+  - filename: Meta-Llama-3-70B-Instruct.Q4_K_M.gguf
+    sha256: d559de8dd806a76dbd29f8d8bd04666f2b29e7c7872d8e8481abd07805884d72
+    uri: huggingface://MaziyarPanahi/Meta-Llama-3-70B-Instruct-GGUF/Meta-Llama-3-70B-Instruct.Q4_K_M.gguf
+- <<: *llama3
+  name: "llama-3-unholy-8b"
+  urls:
+  - https://huggingface.co/Undi95/Llama-3-Unholy-8B-GGUF
+  icon: https://cdn-uploads.huggingface.co/production/uploads/63ab1241ad514ca8d1430003/JmdBlOHlBHVmX1IbZzWSv.png
+  description: |
+    Use at your own risk, I'm not responsible for any usage of this model, don't try to do anything this model tell you to do.

+    Basic uncensoring, this model is epoch 3 out of 4 (but it seem enough at 3).
+
+    If you are censored, it's maybe because of keyword like "assistant", "Factual answer", or other "sweet words" like I call them.
+  overrides:
+    parameters:
+      model: Llama-3-Unholy-8B.q4_k_m.gguf
+  files:
+  - filename: Llama-3-Unholy-8B.q4_k_m.gguf
+    sha256: 17b7f716bce1b34d4aa99ee730a19a834f8c77ddb36090dde5a1eda963f93602
+    uri: huggingface://Undi95/Llama-3-Unholy-8B-GGUF/Llama-3-Unholy-8B.q4_k_m.gguf
+- <<: *llama3
+  name: "llama-3-unholy-8b:Q8_0"
+  urls:
+  - https://huggingface.co/Undi95/Llama-3-Unholy-8B-GGUF
+  icon: https://cdn-uploads.huggingface.co/production/uploads/63ab1241ad514ca8d1430003/JmdBlOHlBHVmX1IbZzWSv.png
+  description: |
+    Use at your own risk, I'm not responsible for any usage of this model, don't try to do anything this model tell you to do.
+
+    Basic uncensoring, this model is epoch 3 out of 4 (but it seem enough at 3).
+
+    If you are censored, it's maybe because of keyword like "assistant", "Factual answer", or other "sweet words" like I call them.
+  overrides:
+    parameters:
+      model: Llama-3-Unholy-8B.q8_0.gguf
+  files:
+  - filename: Llama-3-Unholy-8B.q8_0.gguf
+    sha256: 8d4137018acdcd57df4beccc84d9ad3f7f08cac50588f76370afc16c85752702
+    uri: huggingface://Undi95/Llama-3-Unholy-8B-GGUF/Llama-3-Unholy-8B.q8_0.gguf
+- <<: *llama3
+  name: "therapyllama-8b-v1"
+  urls:
+  - https://huggingface.co/victunes/TherapyLlama-8B-v1-GGUF
+  icon: https://cdn-uploads.huggingface.co/production/uploads/65f07d05279d2d8f725bf0c3/A-ckcZ9H0Ee1n_ls2FM41.png
+  description: |
+    Trained on Llama 3 8B using a modified version of jerryjalapeno/nart-100k-synthetic.
+
+    It is a Llama 3 version of https://huggingface.co/victunes/TherapyBeagle-11B-v2
+
+    TherapyLlama is hopefully aligned to be helpful, healthy, and comforting. 
+    Usage:
+        Do not hold back on Buddy.
+        Open up to Buddy.
+        Pour your heart out to Buddy.
+        Engage with Buddy.
+        Remember that Buddy is just an AI.
+    Notes:
+
+        Tested with the Llama 3 Format
+        You might be assigned a random name if you don't give yourself one.
+        Chat format was pretty stale?
+
+    Disclaimer
+
+    TherapyLlama is NOT a real therapist. It is a friendly AI that mimics empathy and psychotherapy. It is an illusion without the slightest clue who you are as a person. As much as it can help you with self-discovery, A LLAMA IS NOT A SUBSTITUTE to a real professional.
+  overrides:
+    parameters:
+      model: TherapyLlama-8B-v1-Q4_K_M.gguf
+  files:
+  - filename: TherapyLlama-8B-v1-Q4_K_M.gguf
+    sha256: 3d5a16d458e074a7bc7e706a493d8e95e8a7b2cb16934c851aece0af9d1da14a
+    uri: huggingface://victunes/TherapyLlama-8B-v1-GGUF/TherapyLlama-8B-v1-Q4_K_M.gguf
+- <<: *llama3
+  name: "aura-uncensored-l3-8b-iq-imatrix"
+  urls:
+  - https://huggingface.co/Lewdiculous/Aura_Uncensored_l3_8B-GGUF-IQ-Imatrix
+  icon: https://cdn-uploads.huggingface.co/production/uploads/626dfb8786671a29c715f8a9/oiYHWIEHqmgUkY0GsVdDx.png
+  description: |
+    This is another better atempt at a less censored Llama-3 with hopefully more stable formatting. 
+  overrides:
+    parameters:
+      model: Aura_Uncensored_l3_8B-Q4_K_M-imat.gguf
+  files:
+  - filename: Aura_Uncensored_l3_8B-Q4_K_M-imat.gguf
+    sha256: 265ded6a4f439bec160f394e3083a4a20e32ebb9d1d2d85196aaab23dab87fb2
+    uri: huggingface://Lewdiculous/Aura_Uncensored_l3_8B-GGUF-IQ-Imatrix/Aura_Uncensored_l3_8B-Q4_K_M-imat.gguf
+- &dolphin
+  name: "dolphin-2.9-llama3-8b"
+  url: "github:mudler/LocalAI/gallery/hermes-2-pro-mistral.yaml@master"
+  urls:
+  - https://huggingface.co/cognitivecomputations/dolphin-2.9-llama3-8b-gguf
+  tags:
+  - llm
+  - gguf
+  - gpu
+  - cpu
+  - llama3
+  license: llama3
+  description: |
+    Dolphin-2.9 has a variety of instruction, conversational, and coding skills. It also has initial agentic abilities and supports function calling.
+    Dolphin is uncensored.  
+    Curated and trained by Eric Hartford, Lucas Atkins, and Fernando Fernandes, and Cognitive Computations
+  icon: https://cdn-uploads.huggingface.co/production/uploads/63111b2d88942700629f5771/ldkN1J0WIDQwU4vutGYiD.png
+  overrides:
+    parameters:
+      model: dolphin-2.9-llama3-8b-q4_K_M.gguf
+  files:
+  - filename: dolphin-2.9-llama3-8b-q4_K_M.gguf
+    sha256: be988199ce28458e97205b11ae9d9cf4e3d8e18ff4c784e75bfc12f54407f1a1
+    uri: huggingface://cognitivecomputations/dolphin-2.9-llama3-8b-gguf/dolphin-2.9-llama3-8b-q4_K_M.gguf
+- <<: *dolphin
+  name: "dolphin-2.9-llama3-8b:Q6_K"
+  overrides:
+    parameters:
+      model: dolphin-2.9-llama3-8b-q6_K.gguf
+  files:
+  - filename: dolphin-2.9-llama3-8b-q6_K.gguf
+    sha256: 8aac72a0bd72c075ba7be1aa29945e47b07d39cd16be9a80933935f51b57fb32
+    uri: huggingface://cognitivecomputations/dolphin-2.9-llama3-8b-gguf/dolphin-2.9-llama3-8b-q6_K.gguf
+## LLama2 and derivatives
+
+### Start noromaid
+- &noromaid
+  url: "github:mudler/LocalAI/gallery/noromaid.yaml@master"
+  name: "noromaid-13b-0.4-DPO"
+  icon: https://cdn-uploads.huggingface.co/production/uploads/630dfb008df86f1e5becadc3/VKX2Z2yjZX5J8kXzgeCYO.png
+  license: cc-by-nc-4.0
+  urls:
+  - https://huggingface.co/NeverSleep/Noromaid-13B-0.4-DPO-GGUF
+  tags:
+  - llm
+  - llama2
+  - gguf
+  - gpu
+  - cpu
+  overrides:
+    parameters:
+      model: Noromaid-13B-0.4-DPO.q4_k_m.gguf
+  files:
+  - filename: Noromaid-13B-0.4-DPO.q4_k_m.gguf
+    sha256:	cb28e878d034fae3d0b43326c5fc1cfb4ab583b17c56e41d6ce023caec03c1c1
+    uri: huggingface://NeverSleep/Noromaid-13B-0.4-DPO-GGUF/Noromaid-13B-0.4-DPO.q4_k_m.gguf
 ### START LLaVa
 - &llava
  url: "github:mudler/LocalAI/gallery/llava.yaml@master"
@ -50,6 +237,7 @@
  - multimodal
  - gguf
  - gpu
+  - llama2
  - cpu
  name: "llava-1.6-vicuna"
  overrides:
@ -117,6 +305,7 @@
  - llm
  - gguf
  - gpu
+  - llama2
  - cpu
  name: "phi-2-chat:Q8_0"
  overrides:
@ -149,6 +338,7 @@
  tags:
  - llm
  - gguf
+  - llama2
  - gpu
  - cpu
  name: "phi-2-orange"
@ -175,6 +365,7 @@
  - llm
  - gguf
  - gpu
+  - llama2
  - cpu
  overrides:
    parameters:
@ -217,6 +408,7 @@
  - llm
  - gguf
  - gpu
+  - llama2
  - cpu
  overrides:
    parameters:
@ -262,6 +454,7 @@
  - llm
  - gguf
  - gpu
+  - llama2
  - cpu
  overrides:
    parameters:
@ -281,6 +474,7 @@
  - gpu
  - cpu
  - embeddings
+  - python
  name: "all-MiniLM-L6-v2"
  url: "github:mudler/LocalAI/gallery/sentencetransformers.yaml@master"
  overrides:
@ -302,6 +496,7 @@
  tags:
  - text-to-image
  - stablediffusion
+  - python
  - sd-1.5
  - gpu
  url: "github:mudler/LocalAI/gallery/dreamshaper.yaml@master"
--- a/gallery/noromaid.yaml
+++ b/gallery/noromaid.yaml
@ -0,0 +1,53 @@
+config_file: |
+  mmap: true
+  backend: llama-cpp
+  template:
+    chat_message: |
+      <|im_{{if eq .RoleName "assistant"}}bot{{else if eq .RoleName "system"}}system{{else if eq .RoleName "tool"}}tool{{else if eq .RoleName "user"}}user{{end}}|>
+      {{- if .FunctionCall }}
+      <tool_call>
+      {{- else if eq .RoleName "tool" }}
+      <tool_response>
+      {{- end }}
+      {{- if .Content}}
+      {{.Content }}
+      {{- end }}
+      {{- if .FunctionCall}}
+      {{toJson .FunctionCall}}
+      {{- end }}
+      {{- if .FunctionCall }}
+      </tool_call>
+      {{- else if eq .RoleName "tool" }}
+      </tool_response>
+      {{- end }}<|im_end|>
+    # https://huggingface.co/NousResearch/Hermes-2-Pro-Mistral-7B-GGUF#prompt-format-for-function-calling
+    function: |
+      <|im_system|>
+      You are a function calling AI model. You are provided with function signatures within <tools></tools> XML tags. You may call one or more functions to assist with the user query. Don't make assumptions about what values to plug into functions. Here are the available tools:
+      <tools>
+      {{range .Functions}}
+      {'type': 'function', 'function': {'name': '{{.Name}}', 'description': '{{.Description}}', 'parameters': {{toJson .Parameters}} }}
+      {{end}}
+      </tools>
+      Use the following pydantic model json schema for each tool call you will make:
+      {'title': 'FunctionCall', 'type': 'object', 'properties': {'arguments': {'title': 'Arguments', 'type': 'object'}, 'name': {'title': 'Name', 'type': 'string'}}, 'required': ['arguments', 'name']}
+      For each function call return a json object with function name and arguments within <tool_call></tool_call> XML tags as follows:
+      <tool_call>
+      {'arguments': <args-dict>, 'name': <function-name>}
+      </tool_call><|im_end|>
+      {{.Input -}}
+      <|im_bot|>
+      <tool_call>
+    chat: |
+      {{.Input -}}
+      <|im_bot|>
+    completion: |
+      {{.Input}}
+  context_size: 4096
+  f16: true
+  stopwords:
+  - <|im_end|>
+  - <dummy32000>
+  - "\n</tool_call>"
+  - "\n\n\n"
+
--- a/gallery/parler-tts.yaml
+++ b/gallery/parler-tts.yaml
@ -0,0 +1,2 @@
+config_file: |
+  backend: parler-tts
--- a/gallery/rerankers.yaml
+++ b/gallery/rerankers.yaml
@ -0,0 +1,2 @@
+config_file: |
+  backend: rerankers
--- a/pkg/model/loader_test.go
+++ b/pkg/model/loader_test.go
@ -24,8 +24,7 @@ const chatML = `<|im_start|>{{if eq .RoleName "assistant"}}assistant{{else if eq
 </tool_call>
 {{- else if eq .RoleName "tool" }}
 </tool_response>
-{{- end }}
-<|im_end|>`
+{{- end }}<|im_end|>`

 const llama3 = `<|start_header_id|>{{if eq .RoleName "assistant"}}assistant{{else if eq .RoleName "system"}}system{{else if eq .RoleName "tool"}}tool{{else if eq .RoleName "user"}}user{{end}}<|end_header_id|>

@ -107,7 +106,7 @@ var llama3TestMatch map[string]map[string]interface{} = map[string]map[string]in
 var chatMLTestMatch map[string]map[string]interface{} = map[string]map[string]interface{}{
 	"user": {
 		"template": chatML,
-		"expected": "<|im_start|>user\nA long time ago in a galaxy far, far away...\n<|im_end|>",
+		"expected": "<|im_start|>user\nA long time ago in a galaxy far, far away...<|im_end|>",
 		"data": model.ChatMessageTemplateData{
 			SystemPrompt: "",
 			Role:         "user",
@ -122,7 +121,7 @@ var chatMLTestMatch map[string]map[string]interface{} = map[string]map[string]in
 	},
 	"assistant": {
 		"template": chatML,
-		"expected": "<|im_start|>assistant\nA long time ago in a galaxy far, far away...\n<|im_end|>",
+		"expected": "<|im_start|>assistant\nA long time ago in a galaxy far, far away...<|im_end|>",
 		"data": model.ChatMessageTemplateData{
 			SystemPrompt: "",
 			Role:         "assistant",
@ -137,7 +136,7 @@ var chatMLTestMatch map[string]map[string]interface{} = map[string]map[string]in
 	},
 	"function_call": {
 		"template": chatML,
-		"expected": "<|im_start|>assistant\n<tool_call>\n{\"function\":\"test\"}\n</tool_call>\n<|im_end|>",
+		"expected": "<|im_start|>assistant\n<tool_call>\n{\"function\":\"test\"}\n</tool_call><|im_end|>",
 		"data": model.ChatMessageTemplateData{
 			SystemPrompt: "",
 			Role:         "assistant",
@ -152,7 +151,7 @@ var chatMLTestMatch map[string]map[string]interface{} = map[string]map[string]in
 	},
 	"function_response": {
 		"template": chatML,
-		"expected": "<|im_start|>tool\n<tool_response>\nResponse from tool\n</tool_response>\n<|im_end|>",
+		"expected": "<|im_start|>tool\n<tool_response>\nResponse from tool\n</tool_response><|im_end|>",
 		"data": model.ChatMessageTemplateData{
 			SystemPrompt: "",
 			Role:         "tool",