models(gallery): add llama3 with enforced functioncall with grammars (#3027)

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2025-06-18 06:58:09 +00:00 · 2024-07-27 12:48:00 +02:00
parent 0dd21f2b5e
commit fe0d092f58
2 changed files with 82 additions and 0 deletions
--- a/gallery/index.yaml
+++ b/gallery/index.yaml
@ -39,6 +39,24 @@
    - filename: Meta-Llama-3.1-70B-Instruct.Q4_K_M.gguf
      sha256: 3f16ab17da4521fe3ed7c5d7beed960d3fe7b5b64421ee9650aa53d6b649ccab
      uri: huggingface://MaziyarPanahi/Meta-Llama-3.1-70B-Instruct-GGUF/Meta-Llama-3.1-70B-Instruct.Q4_K_M.gguf
+- !!merge <<: *llama31
+  name: "meta-llama-3.1-8b-instruct:grammar-functioncall"
+  url: "github:mudler/LocalAI/gallery/llama3.1-instruct-grammar.yaml@master"
+  urls:
+    - https://huggingface.co/meta-llama/Meta-Llama-3.1-8B-Instruct
+    - https://huggingface.co/MaziyarPanahi/Meta-Llama-3.1-8B-Instruct-GGUF
+  description: |
+    This is the standard Llama 3.1 8B Instruct model with grammar and function call enabled.
+
+    When grammars are enabled in LocalAI, the LLM is forced to output valid tools constrained by BNF grammars. This can be useful for ensuring that the model outputs are valid and can be used in a production environment.
+    For more information on how to use grammars in LocalAI, see https://localai.io/features/openai-functions/#advanced and https://localai.io/features/constrained_grammars/.
+  overrides:
+    parameters:
+      model: Meta-Llama-3.1-8B-Instruct.Q4_K_M.gguf
+  files:
+    - filename: Meta-Llama-3.1-8B-Instruct.Q4_K_M.gguf
+      sha256: c2f17f44af962660d1ad4cb1af91a731f219f3b326c2b14441f9df1f347f2815
+      uri: huggingface://MaziyarPanahi/Meta-Llama-3.1-8B-Instruct-GGUF/Meta-Llama-3.1-8B-Instruct.Q4_K_M.gguf
 - !!merge <<: *llama31
  name: "meta-llama-3.1-8b-claude-imat"
  urls:
--- a/gallery/llama3.1-instruct-grammar.yaml
+++ b/gallery/llama3.1-instruct-grammar.yaml
@ -0,0 +1,64 @@
+---
+name: "llama3-instruct-grammar"
+
+config_file: |
+  mmap: true
+  function:
+    disable_no_action: true
+    grammar:
+      no_mixed_free_string: true
+      mixed_mode: true
+      schema_type: llama3.1 # or JSON is supported too (json)
+    response_regex:
+    - <function=(?P<name>\w+)>(?P<arguments>.*)</function>
+  template:
+    chat_message: |
+      <|start_header_id|>{{if eq .RoleName "assistant"}}assistant{{else if eq .RoleName "system"}}system{{else if eq .RoleName "tool"}}tool{{else if eq .RoleName "user"}}user{{end}}<|end_header_id|>
+
+      {{ if .FunctionCall -}}
+      Function call:
+      {{ else if eq .RoleName "tool" -}}
+      Function response:
+      {{ end -}}
+      {{ if .Content -}}
+      {{.Content -}}
+      {{ else if .FunctionCall -}}
+      {{ toJson .FunctionCall -}}
+      {{ end -}}
+      <|eot_id|>
+    function: |
+      <|start_header_id|>system<|end_header_id|>
+
+      You have access to the following functions:
+
+      {{range .Functions}}
+      Use the function '{{.Name}}' to '{{.Description}}'
+      {{toJson .Parameters}}
+      {{end}}
+
+      Think very carefully before calling functions.
+      If a you choose to call a function ONLY reply in the following format with no prefix or suffix:
+
+      <function=example_function_name>{{`{{"example_name": "example_value"}}`}}</function>
+
+      Reminder:
+      - If looking for real time information use relevant functions before falling back to searching on internet
+      - Function calls MUST follow the specified format, start with <function= and end with </function>
+      - Required parameters MUST be specified
+      - Only call one function at a time
+      - Put the entire function call reply on one line
+      <|eot_id|>
+      {{.Input }}
+      <|start_header_id|>assistant<|end_header_id|>
+    chat: |
+      <|begin_of_text|>{{.Input }}
+      <|start_header_id|>assistant<|end_header_id|>
+    completion: |
+      {{.Input}}
+  context_size: 8192
+  f16: true
+  stopwords:
+  - <|im_end|>
+  - <dummy32000>
+  - "<|eot_id|>"
+  - <|end_of_text|>