feat(detection): detect by template in gguf file, add qwen2, phi, mistral and chatml (#2536)

feat(detection): detect by template in gguf file, add qwen and chatml

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
This commit is contained in:
Ettore Di Giacinto 2024-06-10 22:58:04 +02:00 committed by GitHub
parent aff2acacf9
commit 14b41be057
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
3 changed files with 133 additions and 8 deletions

View File

@ -14,7 +14,8 @@ type UtilCMD struct {
}
type GGUFInfoCMD struct {
Args []string `arg:"" optional:"" name:"args" help:"Arguments to pass to the utility command"`
Args []string `arg:"" optional:"" name:"args" help:"Arguments to pass to the utility command"`
Header bool `optional:"" default:"false" name:"header" help:"Show header information"`
}
func (u *GGUFInfoCMD) Run(ctx *cliContext.Context) error {
@ -35,5 +36,20 @@ func (u *GGUFInfoCMD) Run(ctx *cliContext.Context) error {
Any("modelName", f.Model().Name).
Any("architecture", f.Architecture().Architecture).Msgf("GGUF file loaded: %s", u.Args[0])
log.Info().Any("tokenizer", fmt.Sprintf("%+v", f.Tokenizer())).Msg("Tokenizer")
log.Info().Any("architecture", fmt.Sprintf("%+v", f.Architecture())).Msg("Architecture")
v, exists := f.Header.MetadataKV.Get("tokenizer.chat_template")
if exists {
log.Info().Msgf("chat_template: %s", v.ValueString())
}
if u.Header {
for _, metadata := range f.Header.MetadataKV {
log.Info().Msgf("%s: %+v", metadata.Key, metadata.Value)
}
// log.Info().Any("header", fmt.Sprintf("%+v", f.Header)).Msg("Header")
}
return nil
}

View File

@ -14,8 +14,10 @@ type familyType uint8
const (
Unknown familyType = iota
LLaMa3
LLama2
CommandR
Phi3
ChatML
Mistral03
)
type settingsConfig struct {
@ -23,6 +25,7 @@ type settingsConfig struct {
TemplateConfig TemplateConfig
}
// default settings to adopt with a given model family
var defaultsSettings map[familyType]settingsConfig = map[familyType]settingsConfig{
LLaMa3: {
StopWords: []string{"<|eot_id|>"},
@ -56,6 +59,63 @@ When using a tool, reply with JSON, for instance {"name": "tool_name", "argument
},
StopWords: []string{"<|END_OF_TURN_TOKEN|>"},
},
Phi3: {
TemplateConfig: TemplateConfig{
Chat: "{{.Input}}\n<|assistant|>",
ChatMessage: "<|{{ .RoleName }}|>\n{{.Content}}<|end|>",
Completion: "{{.Input}}",
},
StopWords: []string{"<|end|>", "<|endoftext|>"},
},
ChatML: {
TemplateConfig: TemplateConfig{
Chat: "{{.Input -}}\n<|im_start|>assistant",
Functions: `<|im_start|>system
You are a function calling AI model. You are provided with functions to execute. You may call one or more functions to assist with the user query. Don't make assumptions about what values to plug into functions. Here are the available tools:
{{range .Functions}}
{'type': 'function', 'function': {'name': '{{.Name}}', 'description': '{{.Description}}', 'parameters': {{toJson .Parameters}} }}
{{end}}
For each function call return a json object with function name and arguments
<|im_end|>
{{.Input -}}
<|im_start|>assistant`,
ChatMessage: `<|im_start|>{{ .RoleName }}
{{ if .FunctionCall -}}
Function call:
{{ else if eq .RoleName "tool" -}}
Function response:
{{ end -}}
{{ if .Content -}}
{{.Content }}
{{ end -}}
{{ if .FunctionCall -}}
{{toJson .FunctionCall}}
{{ end -}}<|im_end|>`,
},
StopWords: []string{"<|im_end|>", "<dummy32000>", "</s>"},
},
Mistral03: {
TemplateConfig: TemplateConfig{
Chat: "{{.Input -}}",
Functions: `[AVAILABLE_TOOLS] [{{range .Functions}}{"type": "function", "function": {"name": "{{.Name}}", "description": "{{.Description}}", "parameters": {{toJson .Parameters}} }}{{end}} ] [/AVAILABLE_TOOLS]{{.Input }}`,
ChatMessage: `{{if eq .RoleName "user" -}}
[INST] {{.Content }} [/INST]
{{- else if .FunctionCall -}}
[TOOL_CALLS] {{toJson .FunctionCall}} [/TOOL_CALLS]
{{- else if eq .RoleName "tool" -}}
[TOOL_RESULTS] {{.Content}} [/TOOL_RESULTS]
{{- else -}}
{{ .Content -}}
{{ end -}}`,
},
StopWords: []string{"<|im_end|>", "<dummy32000>", "</tool_call>", "<|eot_id|>", "<|end_of_text|>", "</s>", "[/TOOL_CALLS]", "[/ACTIONS]"},
},
}
// this maps well known template used in HF to model families defined above
var knownTemplates = map[string]familyType{
`{% if messages[0]['role'] == 'system' %}{% set system_message = messages[0]['content'] %}{% endif %}{% if system_message is defined %}{{ system_message }}{% endif %}{% for message in messages %}{% set content = message['content'] %}{% if message['role'] == 'user' %}{{ '<|im_start|>user\n' + content + '<|im_end|>\n<|im_start|>assistant\n' }}{% elif message['role'] == 'assistant' %}{{ content + '<|im_end|>' + '\n' }}{% endif %}{% endfor %}`: ChatML,
`{{ bos_token }}{% for message in messages %}{% if (message['role'] == 'user') != (loop.index0 % 2 == 0) %}{{ raise_exception('Conversation roles must alternate user/assistant/user/assistant/...') }}{% endif %}{% if message['role'] == 'user' %}{{ '[INST] ' + message['content'] + ' [/INST]' }}{% elif message['role'] == 'assistant' %}{{ message['content'] + eos_token}}{% else %}{{ raise_exception('Only user and assistant roles are supported!') }}{% endif %}{% endfor %}`: Mistral03,
}
func guessDefaultsFromFile(cfg *BackendConfig, modelPath string) {
@ -116,12 +176,38 @@ func guessDefaultsFromFile(cfg *BackendConfig, modelPath string) {
}
func identifyFamily(f *gguf.GGUFFile) familyType {
switch {
case f.Architecture().Architecture == "llama" && f.Tokenizer().EOSTokenID == 128009:
return LLaMa3
case f.Architecture().Architecture == "command-r" && f.Tokenizer().EOSTokenID == 255001:
return CommandR
// identify from well known templates first
chatTemplate, found := f.Header.MetadataKV.Get("tokenizer.chat_template")
if found && chatTemplate.ValueString() != "" {
if family, ok := knownTemplates[chatTemplate.ValueString()]; ok {
return family
}
}
return Unknown
// otherwise try to identify from the model properties
arch := f.Architecture().Architecture
eosTokenID := f.Tokenizer().EOSTokenID
bosTokenID := f.Tokenizer().BOSTokenID
isYI := arch == "llama" && bosTokenID == 1 && eosTokenID == 2
// WTF! Mistral0.3 and isYi have same bosTokenID and eosTokenID
llama3 := arch == "llama" && eosTokenID == 128009
commandR := arch == "command-r" && eosTokenID == 255001
qwen2 := arch == "qwen2"
phi3 := arch == "phi-3"
switch {
case llama3:
return LLaMa3
case commandR:
return CommandR
case phi3:
return Phi3
case qwen2, isYI:
return ChatML
default:
return Unknown
}
}

View File

@ -1,4 +1,27 @@
---
## Start QWEN2
- &qwen2
url: "github:mudler/LocalAI/gallery/chatml.yaml@master"
name: "qwen2-7b-instruct"
license: apache-2.0
description: |
Qwen2 is the new series of Qwen large language models. For Qwen2, we release a number of base language models and instruction-tuned language models ranging from 0.5 to 72 billion parameters, including a Mixture-of-Experts model. This repo contains the instruction-tuned 7B Qwen2 model.
urls:
- https://huggingface.co/Qwen/Qwen2-7B-Instruct
- https://huggingface.co/bartowski/Qwen2-7B-Instruct-GGUF
tags:
- llm
- gguf
- gpu
- qwen
- cpu
overrides:
parameters:
model: Qwen2-7B-Instruct-Q4_K_M.gguf
files:
- filename: Qwen2-7B-Instruct-Q4_K_M.gguf
sha256: 8d0d33f0d9110a04aad1711b1ca02dafc0fa658cd83028bdfa5eff89c294fe76
uri: huggingface://bartowski/Qwen2-7B-Instruct-GGUF/Qwen2-7B-Instruct-Q4_K_M.gguf
## START Mistral
- &mistral03
url: "github:mudler/LocalAI/gallery/mistral-0.3.yaml@master"