mirror of
https://github.com/mudler/LocalAI.git
synced 2024-12-18 20:27:57 +00:00
models(gallery): add mistral-0.3 and command-r, update functions (#2388)
* models(gallery): add mistral-0.3 and command-r, update functions Add also disable_parallel_new_lines to disable newlines in the JSON output when forcing parallel tools. Some models (like mistral) might be very sensible to that when being used for function calling. Signed-off-by: Ettore Di Giacinto <mudler@localai.io> * models(gallery): add aya-23-8b Signed-off-by: Ettore Di Giacinto <mudler@localai.io> --------- Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
This commit is contained in:
parent
eb11a46a73
commit
ea330d452d
3
.gitignore
vendored
3
.gitignore
vendored
@ -6,6 +6,9 @@ get-sources
|
||||
prepare-sources
|
||||
/backend/cpp/llama/grpc-server
|
||||
/backend/cpp/llama/llama.cpp
|
||||
/backend/cpp/llama-*
|
||||
|
||||
*.log
|
||||
|
||||
go-ggml-transformers
|
||||
go-gpt2
|
||||
|
@ -2,6 +2,7 @@ name: gpt-4
|
||||
mmap: true
|
||||
parameters:
|
||||
model: huggingface://NousResearch/Hermes-2-Pro-Llama-3-8B-GGUF/Hermes-2-Pro-Llama-3-8B-Q4_K_M.gguf
|
||||
context_size: 8192
|
||||
|
||||
stopwords:
|
||||
- "<|im_end|>"
|
||||
|
@ -2,6 +2,7 @@ name: gpt-4
|
||||
mmap: true
|
||||
parameters:
|
||||
model: huggingface://NousResearch/Hermes-2-Pro-Llama-3-8B-GGUF/Hermes-2-Pro-Llama-3-8B-Q4_K_M.gguf
|
||||
context_size: 8192
|
||||
|
||||
stopwords:
|
||||
- "<|im_end|>"
|
||||
|
@ -1,5 +1,7 @@
|
||||
name: gpt-4
|
||||
mmap: false
|
||||
context_size: 8192
|
||||
|
||||
f16: false
|
||||
parameters:
|
||||
model: huggingface://NousResearch/Hermes-2-Pro-Llama-3-8B-GGUF/Hermes-2-Pro-Llama-3-8B-Q4_K_M.gguf
|
||||
|
69
gallery/command-r.yaml
Normal file
69
gallery/command-r.yaml
Normal file
@ -0,0 +1,69 @@
|
||||
---
|
||||
name: "command-r"
|
||||
|
||||
config_file: |
|
||||
context_size: 131072
|
||||
stopwords:
|
||||
- "<|END_OF_TURN_TOKEN|>"
|
||||
|
||||
function:
|
||||
# disable injecting the "answer" tool
|
||||
disable_no_action: true
|
||||
|
||||
grammar:
|
||||
# This allows the grammar to also return messages
|
||||
mixed_mode: true
|
||||
# Not all models have a sketchpad or something to write thoughts on.
|
||||
# This one will OR reply to strings OR JSON, but not both in the same reply
|
||||
#no_mixed_free_string: true
|
||||
# Disable grammar
|
||||
# Base instructor model doesn't work well with grammars
|
||||
#disable: true
|
||||
disable_parallel_new_lines: true
|
||||
return_name_in_function_response: true
|
||||
replace_function_results:
|
||||
# Replace everything that is not JSON array or object
|
||||
- key: '(?s)^[^{\[]*'
|
||||
value: ""
|
||||
- key: '(?s)[^}\]]*$'
|
||||
value: ""
|
||||
# Convert single quotes to double quotes
|
||||
- key: "'([^']*?)'"
|
||||
value: "_DQUOTE_${1}_DQUOTE_"
|
||||
- key: '\\"'
|
||||
value: "__TEMP_QUOTE__"
|
||||
- key: "\'"
|
||||
value: "'"
|
||||
- key: "_DQUOTE_"
|
||||
value: '"'
|
||||
- key: "__TEMP_QUOTE__"
|
||||
value: '"'
|
||||
|
||||
template:
|
||||
join_chat_messages_by_character: "" ## No newlines between messages
|
||||
chat: |-
|
||||
{{.Input -}}<|START_OF_TURN_TOKEN|><|CHATBOT_TOKEN|>
|
||||
chat_message: |-
|
||||
{{if eq .RoleName "user" -}}
|
||||
<|START_OF_TURN_TOKEN|><|USER_TOKEN|>{{.Content}}<|END_OF_TURN_TOKEN|>
|
||||
{{- else if eq .RoleName "system" -}}
|
||||
<|START_OF_TURN_TOKEN|><|SYSTEM_TOKEN|>{{.Content}}<|END_OF_TURN_TOKEN|>
|
||||
{{- else if eq .RoleName "assistant" -}}
|
||||
<|START_OF_TURN_TOKEN|><|CHATBOT_TOKEN|>{{.Content}}<|END_OF_TURN_TOKEN|>
|
||||
{{- else if eq .RoleName "tool" -}}
|
||||
<|START_OF_TURN_TOKEN|><|SYSTEM_TOKEN|>{{.Content}}<|END_OF_TURN_TOKEN|>
|
||||
{{- else if .FunctionCall -}}
|
||||
<|START_OF_TURN_TOKEN|><|CHATBOT_TOKEN|>{{toJson .FunctionCall}}}<|END_OF_TURN_TOKEN|>
|
||||
{{- end -}}
|
||||
|
||||
completion: |
|
||||
{{.Input}}
|
||||
function: |-
|
||||
<|START_OF_TURN_TOKEN|><|SYSTEM_TOKEN|>
|
||||
You are a function calling AI model, you can call the following functions:
|
||||
## Available Tools
|
||||
{{range .Functions}}
|
||||
- {"type": "function", "function": {"name": "{{.Name}}", "description": "{{.Description}}", "parameters": {{toJson .Parameters}} }}
|
||||
{{end}}
|
||||
When using a tool, reply with JSON, for instance {"name": "tool_name", "arguments": {"param1": "value1", "param2": "value2"}}
|
||||
<|END_OF_TURN_TOKEN|><|START_OF_TURN_TOKEN|><|CHATBOT_TOKEN|>{{.Input -}}
|
@ -3,6 +3,7 @@ name: "hermes-2-pro-mistral"
|
||||
|
||||
config_file: |
|
||||
mmap: true
|
||||
context_size: 8192
|
||||
stopwords:
|
||||
- "<|im_end|>"
|
||||
- "<dummy32000>"
|
||||
|
@ -1,4 +1,35 @@
|
||||
---
|
||||
## START Mistral
|
||||
- &mistral03
|
||||
url: "github:mudler/LocalAI/gallery/mistral-0.3.yaml@master"
|
||||
name: "mistral-7b-instruct-v0.3"
|
||||
icon: https://cdn-avatars.huggingface.co/v1/production/uploads/62dac1c7a8ead43d20e3e17a/wrLf5yaGC6ng4XME70w6Z.png
|
||||
license: apache-2.0
|
||||
description: |
|
||||
The Mistral-7B-Instruct-v0.3 Large Language Model (LLM) is an instruct fine-tuned version of the Mistral-7B-v0.3.
|
||||
|
||||
Mistral-7B-v0.3 has the following changes compared to Mistral-7B-v0.2
|
||||
|
||||
Extended vocabulary to 32768
|
||||
Supports v3 Tokenizer
|
||||
Supports function calling
|
||||
urls:
|
||||
- https://huggingface.co/mistralai/Mistral-7B-Instruct-v0.3
|
||||
- https://huggingface.co/MaziyarPanahi/Mistral-7B-Instruct-v0.3-GGUF
|
||||
tags:
|
||||
- llm
|
||||
- gguf
|
||||
- gpu
|
||||
- mistral
|
||||
- cpu
|
||||
- function-calling
|
||||
overrides:
|
||||
parameters:
|
||||
model: Mistral-7B-Instruct-v0.3.Q4_K_M.gguf
|
||||
files:
|
||||
- filename: "Mistral-7B-Instruct-v0.3.Q4_K_M.gguf"
|
||||
sha256: "14850c84ff9f06e9b51d505d64815d5cc0cea0257380353ac0b3d21b21f6e024"
|
||||
uri: "huggingface://MaziyarPanahi/Mistral-7B-Instruct-v0.3-GGUF/Mistral-7B-Instruct-v0.3.Q4_K_M.gguf"
|
||||
### START mudler's LocalAI specific-models
|
||||
- &mudler
|
||||
url: "github:mudler/LocalAI/gallery/mudler.yaml@master"
|
||||
@ -1134,6 +1165,46 @@
|
||||
- filename: Llama-3-Hercules-5.0-8B-Q4_K_M.gguf
|
||||
sha256: 83647caf4a23a91697585cff391e7d1236fac867392f9e49a6dab59f81b5f810
|
||||
uri: huggingface://bartowski/Llama-3-Hercules-5.0-8B-GGUF/Llama-3-Hercules-5.0-8B-Q4_K_M.gguf
|
||||
### START Command-r
|
||||
- &command-R
|
||||
url: "github:mudler/LocalAI/gallery/command-r.yaml@master"
|
||||
name: "command-r-v01:q1_s"
|
||||
license: "cc-by-nc-4.0"
|
||||
icon: https://cdn.sanity.io/images/rjtqmwfu/production/ae020d94b599cc453cc09ebc80be06d35d953c23-102x18.svg
|
||||
urls:
|
||||
- https://huggingface.co/CohereForAI/c4ai-command-r-v01
|
||||
- https://huggingface.co/dranger003/c4ai-command-r-v01-iMat.GGUF
|
||||
description: |
|
||||
C4AI Command-R is a research release of a 35 billion parameter highly performant generative model. Command-R is a large language model with open weights optimized for a variety of use cases including reasoning, summarization, and question answering. Command-R has the capability for multilingual generation evaluated in 10 languages and highly performant RAG capabilities.
|
||||
tags:
|
||||
- llm
|
||||
- gguf
|
||||
- gpu
|
||||
- command-r
|
||||
- cpu
|
||||
overrides:
|
||||
parameters:
|
||||
model: ggml-c4ai-command-r-v01-iq1_s.gguf
|
||||
files:
|
||||
- filename: "ggml-c4ai-command-r-v01-iq1_s.gguf"
|
||||
sha256: "aad4594ee45402fe344d8825937d63b9fa1f00becc6d1cc912b016dbb020e0f0"
|
||||
uri: "huggingface://dranger003/c4ai-command-r-v01-iMat.GGUF/ggml-c4ai-command-r-v01-iq1_s.gguf"
|
||||
- !!merge <<: *command-R
|
||||
name: "aya-23-8b"
|
||||
urls:
|
||||
- https://huggingface.co/CohereForAI/aya-23-8B
|
||||
- https://huggingface.co/bartowski/aya-23-8B-GGUF
|
||||
description: |
|
||||
Aya 23 is an open weights research release of an instruction fine-tuned model with highly advanced multilingual capabilities. Aya 23 focuses on pairing a highly performant pre-trained Command family of models with the recently released Aya Collection. The result is a powerful multilingual large language model serving 23 languages.
|
||||
|
||||
This model card corresponds to the 8-billion version of the Aya 23 model. We also released a 35-billion version which you can find here.
|
||||
overrides:
|
||||
parameters:
|
||||
model: aya-23-8B-Q4_K_M.gguf
|
||||
files:
|
||||
- filename: "aya-23-8B-Q4_K_M.gguf"
|
||||
sha256: "21b3aa3abf067f78f6fe08deb80660cc4ee8ad7b4ab873a98d87761f9f858b0f"
|
||||
uri: "huggingface://bartowski/aya-23-8B-GGUF/aya-23-8B-Q4_K_M.gguf"
|
||||
- &phi-2-chat
|
||||
### START Phi-2
|
||||
url: "github:mudler/LocalAI/gallery/phi-2-chat.yaml@master"
|
||||
|
67
gallery/mistral-0.3.yaml
Normal file
67
gallery/mistral-0.3.yaml
Normal file
@ -0,0 +1,67 @@
|
||||
---
|
||||
name: "mistral-0.3"
|
||||
|
||||
config_file: |
|
||||
context_size: 8192
|
||||
mmap: true
|
||||
stopwords:
|
||||
- "<|im_end|>"
|
||||
- "<dummy32000>"
|
||||
- "</tool_call>"
|
||||
- "<|eot_id|>"
|
||||
- "<|end_of_text|>"
|
||||
- "</s>"
|
||||
- "[/TOOL_CALLS]"
|
||||
- "[/ACTIONS]"
|
||||
|
||||
function:
|
||||
# disable injecting the "answer" tool
|
||||
disable_no_action: true
|
||||
|
||||
grammar:
|
||||
# This allows the grammar to also return messages
|
||||
#mixed_mode: true
|
||||
# Not all models have a sketchpad or something to write thoughts on.
|
||||
# This one will OR reply to strings OR JSON, but not both in the same reply
|
||||
#no_mixed_free_string: true
|
||||
# Disable grammar
|
||||
# Base instructor model doesn't work well with grammars
|
||||
disable: true
|
||||
parallel_calls: true
|
||||
disable_parallel_new_lines: true
|
||||
|
||||
return_name_in_function_response: true
|
||||
# Without grammar uncomment the lines below
|
||||
# Warning: this is relying only on the capability of the
|
||||
# LLM model to generate the correct function call.
|
||||
json_regex_match:
|
||||
- "(?s)\\[TOOL\\_CALLS\\](.*)"
|
||||
replace_function_results:
|
||||
# Replace everything that is not JSON array or object
|
||||
- key: '(?s)^[^{\[]*'
|
||||
value: ""
|
||||
- key: '(?s)[^}\]]*$'
|
||||
value: ""
|
||||
- key: "(?s)\\[TOOL\\_CALLS\\]"
|
||||
value: ""
|
||||
- key: "(?s)\\[\\/TOOL\\_CALLS\\]"
|
||||
value: ""
|
||||
|
||||
template:
|
||||
join_chat_messages_by_character: "" ## No newlines between messages
|
||||
chat: |
|
||||
{{.Input -}}
|
||||
chat_message: |-
|
||||
{{if eq .RoleName "user" -}}
|
||||
[INST] {{.Content }} [/INST]
|
||||
{{- else if .FunctionCall -}}
|
||||
[TOOL_CALLS] {{toJson .FunctionCall}} [/TOOL_CALLS]
|
||||
{{- else if eq .RoleName "tool" -}}
|
||||
[TOOL_RESULTS] {{.Content}} [/TOOL_RESULTS]
|
||||
{{- else -}}
|
||||
{{ .Content -}}
|
||||
{{ end -}}
|
||||
completion: |
|
||||
{{.Input}}
|
||||
function: |-
|
||||
[AVAILABLE_TOOLS] [{{range .Functions}}{"type": "function", "function": {"name": "{{.Name}}", "description": "{{.Description}}", "parameters": {{toJson .Parameters}} }}{{end}} ] [/AVAILABLE_TOOLS]{{.Input }}
|
@ -50,6 +50,9 @@ var (
|
||||
[^"\\] |
|
||||
"\\" (["\\/bfnrt] | "u" [0-9a-fA-F] [0-9a-fA-F] [0-9a-fA-F] [0-9a-fA-F])
|
||||
)* "\"" space`,
|
||||
// TODO: we shouldn't forbid \" and \\ or all unicode and have this branch here,
|
||||
// however, if we don't have it, the grammar will be ambiguous and
|
||||
// empirically results are way worse.
|
||||
"freestring": `(
|
||||
[^"\\] |
|
||||
"\\" (["\\/bfnrt] | "u" [0-9a-fA-F] [0-9a-fA-F] [0-9a-fA-F] [0-9a-fA-F])
|
||||
@ -111,12 +114,18 @@ func (sc *JSONSchemaConverter) addRule(name, rule string) string {
|
||||
return key
|
||||
}
|
||||
|
||||
const array = `arr ::=
|
||||
const arrayNewLines = `arr ::=
|
||||
"[\n" (
|
||||
realvalue
|
||||
(",\n" realvalue)*
|
||||
)? "]"`
|
||||
|
||||
const array = `arr ::=
|
||||
"[" (
|
||||
realvalue
|
||||
("," realvalue)*
|
||||
)? "]"`
|
||||
|
||||
func (sc *JSONSchemaConverter) finalizeGrammar(options ...func(*GrammarOption)) string {
|
||||
|
||||
grammarOpts := &GrammarOption{}
|
||||
@ -124,6 +133,7 @@ func (sc *JSONSchemaConverter) finalizeGrammar(options ...func(*GrammarOption))
|
||||
|
||||
suffix := grammarOpts.Suffix
|
||||
maybeArray := grammarOpts.MaybeArray
|
||||
disableParallelNewLines := grammarOpts.DisableParallelNewLines
|
||||
maybeString := grammarOpts.MaybeString
|
||||
noMixedFreeString := grammarOpts.NoMixedFreeString
|
||||
|
||||
@ -177,7 +187,11 @@ func (sc *JSONSchemaConverter) finalizeGrammar(options ...func(*GrammarOption))
|
||||
}
|
||||
|
||||
lines = append(lines, fmt.Sprintf("%s ::= %s", "root", newRoot))
|
||||
if disableParallelNewLines {
|
||||
lines = append(lines, array)
|
||||
} else {
|
||||
lines = append(lines, arrayNewLines)
|
||||
}
|
||||
|
||||
if maybeArray {
|
||||
lines = append(lines, `mixedstring ::= freestring | freestring arr | freestring realvalue | realvalue | arr`)
|
||||
|
@ -427,5 +427,22 @@ var _ = Describe("JSON schema grammar tests", func() {
|
||||
}
|
||||
Expect(len(results)).To(Equal(len(strings.Split(grammar, "\n"))), grammar)
|
||||
})
|
||||
|
||||
It("generates parallel tools without newlines in JSON", func() {
|
||||
structuredGrammar := JSONFunctionStructureName{
|
||||
OneOf: testFunctionsName}
|
||||
content := `arr ::=
|
||||
"[" (
|
||||
realvalue
|
||||
("," realvalue)*
|
||||
)? "]"`
|
||||
grammar := structuredGrammar.Grammar(functions.EnableMaybeString, functions.EnableMaybeArray, functions.DisableParallelNewLines)
|
||||
results := strings.Split(content, "\n")
|
||||
for _, r := range results {
|
||||
if r != "" {
|
||||
Expect(grammar).To(ContainSubstring(r))
|
||||
}
|
||||
}
|
||||
})
|
||||
})
|
||||
})
|
||||
|
@ -4,6 +4,7 @@ type GrammarOption struct {
|
||||
PropOrder string
|
||||
Suffix string
|
||||
MaybeArray bool
|
||||
DisableParallelNewLines bool
|
||||
MaybeString bool
|
||||
NoMixedFreeString bool
|
||||
}
|
||||
@ -18,6 +19,10 @@ var EnableMaybeArray = func(o *GrammarOption) {
|
||||
o.MaybeArray = true
|
||||
}
|
||||
|
||||
var DisableParallelNewLines = func(o *GrammarOption) {
|
||||
o.DisableParallelNewLines = true
|
||||
}
|
||||
|
||||
var EnableMaybeString = func(o *GrammarOption) {
|
||||
o.MaybeString = true
|
||||
}
|
||||
|
@ -12,6 +12,8 @@ type GrammarConfig struct {
|
||||
// ParallelCalls enables the LLM to return multiple function calls in the same response
|
||||
ParallelCalls bool `yaml:"parallel_calls"`
|
||||
|
||||
DisableParallelNewLines bool `yaml:"disable_parallel_new_lines"`
|
||||
|
||||
// MixedMode enables the LLM to return strings and not only JSON objects
|
||||
// This is useful for models to not constraing returning only JSON and also messages back to the user
|
||||
MixedMode bool `yaml:"mixed_mode"`
|
||||
@ -81,6 +83,9 @@ func (g GrammarConfig) Options() []func(o *GrammarOption) {
|
||||
if g.ParallelCalls {
|
||||
opts = append(opts, EnableMaybeArray)
|
||||
}
|
||||
if g.DisableParallelNewLines {
|
||||
opts = append(opts, DisableParallelNewLines)
|
||||
}
|
||||
if g.Prefix != "" {
|
||||
opts = append(opts, SetPrefix(g.Prefix))
|
||||
}
|
||||
@ -134,7 +139,7 @@ func ParseFunctionCall(llmresult string, functionConfig FunctionsConfig) []FuncC
|
||||
var singleObj map[string]interface{}
|
||||
err = json.Unmarshal([]byte(s), &singleObj)
|
||||
if err != nil {
|
||||
log.Warn().Err(err).Str("escapedLLMResult", s).Msg("unable to unmarshal llm result")
|
||||
log.Debug().Err(err).Str("escapedLLMResult", s).Msg("unable to unmarshal llm result in a single object or an array of JSON objects")
|
||||
} else {
|
||||
ss = []map[string]interface{}{singleObj}
|
||||
}
|
||||
@ -177,6 +182,7 @@ func ParseFunctionCall(llmresult string, functionConfig FunctionsConfig) []FuncC
|
||||
match := respRegex.FindStringSubmatch(llmresult)
|
||||
if len(match) >= 2 {
|
||||
llmresult = match[1]
|
||||
log.Debug().Msgf("LLM result(JSONRegexMatch): %s", llmresult)
|
||||
break
|
||||
}
|
||||
}
|
||||
|
Loading…
Reference in New Issue
Block a user