models(gallery): add hermes-2-theta-llama-3-70b (#2626)

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2025-05-27 20:44:21 +00:00 · 2024-06-21 19:41:49 +02:00 · 2024-06-21 19:41:49 +02:00 · 964732590d
commit 964732590d
parent 70a2bfe82e
2 changed files with 36 additions and 34 deletions
--- a/gallery/hermes-2-pro-mistral.yaml
+++ b/gallery/hermes-2-pro-mistral.yaml
@ -7,47 +7,26 @@ config_file: |
  stopwords:
  - "<|im_end|>"
  - "<dummy32000>"
-  - "</tool_call>"
  - "<|eot_id|>"
  - "<|end_of_text|>"
  function:
-    # disable injecting the "answer" tool
    disable_no_action: true
-
-    grammar:
-      # This allows the grammar to also return messages
-      mixed_mode: true
-
    return_name_in_function_response: true
-    # Without grammar uncomment the lines below
-    # Warning: this is relying only on the capability of the
-    # LLM model to generate the correct function call.
+    grammar:
+      # Uncomment the line below to enable grammar matching for JSON results if the model is breaking
+      # the output. This will make the model more accurate and won't break the JSON output.
+      # This however, will make parallel_calls not functional (it is a known bug)
+      # mixed_mode: true
+      parallel_calls: true
+      expect_strings_after_json: true
    json_regex_match:
    - "(?s)<tool_call>(.*?)</tool_call>"
-    - "(?s)<tool_call>(.*?)"
+    - "(?s)<tool_call>(.*)"
+    capture_llm_results:
+      - (?s)<scratchpad>(.*?)</scratchpad>
    replace_llm_results:
-    # Drop the scratchpad content from responses
-    - key: "(?s)<scratchpad>.*</scratchpad>"
-      value: ""
-    replace_function_results:
-    # Replace everything that is not JSON array or object
-    - key: '(?s)^[^{\[]*'
-      value: ""
-    - key: '(?s)[^}\]]*$'
-      value: ""
-    - key: "'([^']*?)'"
-      value: "_DQUOTE_${1}_DQUOTE_"
-    - key: '\\"'
-      value: "__TEMP_QUOTE__"
-    - key: "\'"
-      value: "'"
-    - key: "_DQUOTE_"
-      value: '"'
-    - key: "__TEMP_QUOTE__"
-      value: '"'
-    # Drop the scratchpad content from responses
-    - key: "(?s)<scratchpad>.*</scratchpad>"
-      value: ""
+      - key: (?s)<scratchpad>(.*?)</scratchpad>
+        value: ""

  template:
    chat: |
@ -73,7 +52,7 @@ config_file: |
      {{- end }}<|im_end|>
    completion: |
      {{.Input}}
-    function: |-
+    function: |
      <|im_start|>system
      You are a function calling AI model.
      Here are the available tools:
--- a/gallery/index.yaml
+++ b/gallery/index.yaml
@ -2325,6 +2325,29 @@
    - filename: "Hermes-2-Pro-Llama-3-Instruct-Merged-DPO-Q4_K_M.gguf"
      sha256: "762b9371a296ab2628592b9462dc676b27d881a3402816492801641a437669b3"
      uri: "huggingface://NousResearch/Hermes-2-Theta-Llama-3-8B-GGUF/Hermes-2-Pro-Llama-3-Instruct-Merged-DPO-Q4_K_M.gguf"
+- !!merge <<: *hermes-2-pro-mistral
+  name: "hermes-2-theta-llama-3-70b"
+  icon: https://cdn-uploads.huggingface.co/production/uploads/6317aade83d8d2fd903192d9/P4NxBFwfBbboNZVytpn45.png
+  tags:
+    - llm
+    - gguf
+    - gpu
+    - llama3
+    - cpu
+    - function-calling
+  description: |
+    Hermes-2 Θ (Theta) 70B is the continuation of our experimental merged model released by Nous Research, in collaboration with Charles Goddard and Arcee AI, the team behind MergeKit.
+
+    Hermes-2 Θ is a merged and then further RLHF'ed version our excellent Hermes 2 Pro model and Meta's Llama-3 Instruct model to form a new model, Hermes-2 Θ, combining the best of both worlds of each model.
+  urls:
+    - https://huggingface.co/NousResearch/Hermes-2-Theta-Llama-3-70B-GGUF
+  overrides:
+    parameters:
+      model: Hermes-2-Theta-Llama-3-70B-Q4_K_M.gguf
+  files:
+    - filename: "Hermes-2-Theta-Llama-3-70B-Q4_K_M.gguf"
+      sha256: "	b3965f671c35d09da8b903218f5bbaac94efdd9000e4fe4a2bac87fcac9f664e"
+      uri: "huggingface://NousResearch/Hermes-2-Theta-Llama-3-70B-GGUF/Hermes-2-Theta-Llama-3-70B-Q4_K_M.gguf"
 ### LLAMA3 version
 - !!merge <<: *hermes-2-pro-mistral
  name: "hermes-2-pro-llama-3-8b"