models(gallery): add hermes-2-theta-llama-3-70b (#2626)

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2025-05-05 02:02:54 +00:00 · 2024-06-21 19:41:49 +02:00 · 2024-06-21 19:41:49 +02:00 · 964732590d
commit 964732590d
parent 70a2bfe82e
2 changed files with 36 additions and 34 deletions
--- a/gallery/hermes-2-pro-mistral.yaml
+++ b/gallery/hermes-2-pro-mistral.yaml
@ -7,47 +7,26 @@ config_file: |
  stopwords:
  - "<|im_end|>"
  - "<dummy32000>"
  - "</tool_call>"
  - "<|eot_id|>"
  - "<|end_of_text|>"
  function:
    # disable injecting the "answer" tool
    disable_no_action: true
    grammar:
      # This allows the grammar to also return messages
      mixed_mode: true
    return_name_in_function_response: true
-    # Without grammar uncomment the lines below
+    grammar:
-    # Warning: this is relying only on the capability of the
+      # Uncomment the line below to enable grammar matching for JSON results if the model is breaking
-    # LLM model to generate the correct function call.
+      # the output. This will make the model more accurate and won't break the JSON output.
      # This however, will make parallel_calls not functional (it is a known bug)
      # mixed_mode: true
      parallel_calls: true
      expect_strings_after_json: true
    json_regex_match:
    - "(?s)<tool_call>(.*?)</tool_call>"
-    - "(?s)<tool_call>(.*?)"
+    - "(?s)<tool_call>(.*)"
    capture_llm_results:
      - (?s)<scratchpad>(.*?)</scratchpad>
    replace_llm_results:
-    # Drop the scratchpad content from responses
+      - key: (?s)<scratchpad>(.*?)</scratchpad>
-    - key: "(?s)<scratchpad>.*</scratchpad>"
+        value: ""
      value: ""
    replace_function_results:
    # Replace everything that is not JSON array or object
    - key: '(?s)^[^{\[]*'
      value: ""
    - key: '(?s)[^}\]]*$'
      value: ""
    - key: "'([^']*?)'"
      value: "_DQUOTE_${1}_DQUOTE_"
    - key: '\\"'
      value: "__TEMP_QUOTE__"
    - key: "\'"
      value: "'"
    - key: "_DQUOTE_"
      value: '"'
    - key: "__TEMP_QUOTE__"
      value: '"'
    # Drop the scratchpad content from responses
    - key: "(?s)<scratchpad>.*</scratchpad>"
      value: ""
  template:
    chat: |
@ -73,7 +52,7 @@ config_file: |
      {{- end }}<|im_end|>
    completion: |
      {{.Input}}
-    function: |-
+    function: |
      <|im_start|>system
      You are a function calling AI model.
      Here are the available tools:
--- a/gallery/index.yaml
+++ b/gallery/index.yaml
@ -2325,6 +2325,29 @@
    - filename: "Hermes-2-Pro-Llama-3-Instruct-Merged-DPO-Q4_K_M.gguf"
      sha256: "762b9371a296ab2628592b9462dc676b27d881a3402816492801641a437669b3"
      uri: "huggingface://NousResearch/Hermes-2-Theta-Llama-3-8B-GGUF/Hermes-2-Pro-Llama-3-Instruct-Merged-DPO-Q4_K_M.gguf"
 - !!merge <<: *hermes-2-pro-mistral
  name: "hermes-2-theta-llama-3-70b"
  icon: https://cdn-uploads.huggingface.co/production/uploads/6317aade83d8d2fd903192d9/P4NxBFwfBbboNZVytpn45.png
  tags:
    - llm
    - gguf
    - gpu
    - llama3
    - cpu
    - function-calling
  description: |
    Hermes-2 Θ (Theta) 70B is the continuation of our experimental merged model released by Nous Research, in collaboration with Charles Goddard and Arcee AI, the team behind MergeKit.
    Hermes-2 Θ is a merged and then further RLHF'ed version our excellent Hermes 2 Pro model and Meta's Llama-3 Instruct model to form a new model, Hermes-2 Θ, combining the best of both worlds of each model.
  urls:
    - https://huggingface.co/NousResearch/Hermes-2-Theta-Llama-3-70B-GGUF
  overrides:
    parameters:
      model: Hermes-2-Theta-Llama-3-70B-Q4_K_M.gguf
  files:
    - filename: "Hermes-2-Theta-Llama-3-70B-Q4_K_M.gguf"
      sha256: "	b3965f671c35d09da8b903218f5bbaac94efdd9000e4fe4a2bac87fcac9f664e"
      uri: "huggingface://NousResearch/Hermes-2-Theta-Llama-3-70B-GGUF/Hermes-2-Theta-Llama-3-70B-Q4_K_M.gguf"
 ### LLAMA3 version
 - !!merge <<: *hermes-2-pro-mistral
  name: "hermes-2-pro-llama-3-8b"