fix(grammar): respect JSONmode and grammar from user input (#1935)

* fix(grammar): Fix JSON mode and custom grammar

* tests(aio): add jsonmode test

* tests(aio): add functioncall test

* fix(aio): use hermes-2-pro-mistral as llm for CPU profile

* add phi-2-orange
This commit is contained in:
Ettore Di Giacinto 2024-03-31 13:04:09 +02:00 committed by GitHub
parent 784657a652
commit 35290e146b
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
5 changed files with 139 additions and 5 deletions

View File

@ -1,25 +1,48 @@
name: gpt-4 name: gpt-4
mmap: true mmap: true
parameters: parameters:
model: huggingface://l3utterfly/phi-2-layla-v1-chatml-gguf/phi-2-layla-v1-chatml-Q8_0.gguf model: huggingface://NousResearch/Hermes-2-Pro-Mistral-7B-GGUF/Hermes-2-Pro-Mistral-7B.Q2_K.gguf
template: template:
chat_message: | chat_message: |
<|im_start|>{{if eq .RoleName "assistant"}}assistant{{else if eq .RoleName "system"}}system{{else if eq .RoleName "user"}}user{{end}} <|im_start|>{{if eq .RoleName "assistant"}}assistant{{else if eq .RoleName "system"}}system{{else if eq .RoleName "tool"}}tool{{else if eq .RoleName "user"}}user{{end}}
{{ if .FunctionCall }}<tool_call>{{end}}
{{ if eq .RoleName "tool" }}<tool_result>{{end}}
{{if .Content}}{{.Content}}{{end}} {{if .Content}}{{.Content}}{{end}}
{{if .FunctionCall}}{{toJson .FunctionCall}}{{end}}
{{ if .FunctionCall }}</tool_call>{{end}}
{{ if eq .RoleName "tool" }}</tool_result>{{end}}
<|im_end|> <|im_end|>
# https://huggingface.co/NousResearch/Hermes-2-Pro-Mistral-7B-GGUF#prompt-format-for-function-calling
function: |
<|im_start|>system
You are a function calling AI model. You are provided with function signatures within <tools></tools> XML tags. You may call one or more functions to assist with the user query. Don't make assumptions about what values to plug into functions. Here are the available tools:
<tools>
{{range .Functions}}
{'type': 'function', 'function': {'name': '{{.Name}}', 'description': '{{.Description}}', 'parameters': {{toJson .Parameters}} }}
{{end}}
</tools>
Use the following pydantic model json schema for each tool call you will make:
{'title': 'FunctionCall', 'type': 'object', 'properties': {'arguments': {'title': 'Arguments', 'type': 'object'}, 'name': {'title': 'Name', 'type': 'string'}}, 'required': ['arguments', 'name']}
For each function call return a json object with function name and arguments within <tool_call></tool_call> XML tags as follows:
<tool_call>
{'arguments': <args-dict>, 'name': <function-name>}
</tool_call><|im_end|>
{{.Input}}
<|im_start|>assistant
<tool_call>
chat: | chat: |
{{.Input}} {{.Input}}
<|im_start|>assistant <|im_start|>assistant
completion: | completion: |
{{.Input}} {{.Input}}
context_size: 2048 context_size: 4096
f16: true f16: true
stopwords: stopwords:
- <|im_end|> - <|im_end|>
- <dummy32000> - <dummy32000>
usage: | usage: |
curl http://localhost:8080/v1/chat/completions -H "Content-Type: application/json" -d '{ curl http://localhost:8080/v1/chat/completions -H "Content-Type: application/json" -d '{
"model": "phi-2-chat", "model": "gpt-4",
"messages": [{"role": "user", "content": "How are you doing?", "temperature": 0.1}] "messages": [{"role": "user", "content": "How are you doing?", "temperature": 0.1}]
}' }'

View File

@ -185,6 +185,8 @@ func ChatEndpoint(cl *config.BackendConfigLoader, ml *model.ModelLoader, startup
input.Grammar = grammar.JSONBNF input.Grammar = grammar.JSONBNF
} }
config.Grammar = input.Grammar
// process functions if we have any defined or if we have a function call string // process functions if we have any defined or if we have a function call string
if len(input.Functions) > 0 && config.ShouldUseFunctions() { if len(input.Functions) > 0 && config.ShouldUseFunctions() {
log.Debug().Msgf("Response needs to process functions") log.Debug().Msgf("Response needs to process functions")

View File

@ -73,6 +73,8 @@ func CompletionEndpoint(cl *config.BackendConfigLoader, ml *model.ModelLoader, a
input.Grammar = grammar.JSONBNF input.Grammar = grammar.JSONBNF
} }
config.Grammar = input.Grammar
log.Debug().Msgf("Parameter Config: %+v", config) log.Debug().Msgf("Parameter Config: %+v", config)
if input.Stream { if input.Stream {

View File

@ -0,0 +1,30 @@
name: phi-2-chat
mmap: true
parameters:
model: huggingface://l3utterfly/phi-2-orange-GGUF/phi-2-orange.Q6_K.gguf
template:
chat_message: |
<|im_start|>{{if eq .RoleName "assistant"}}assistant{{else if eq .RoleName "system"}}system{{else if eq .RoleName "user"}}user{{end}}
{{if .Content}}{{.Content}}{{end}}
<|im_end|>
chat: |
{{.Input}}
<|im_start|>assistant
completion: |
{{.Input}}
context_size: 4096
f16: true
stopwords:
- <|im_end|>
- <dummy32000>
description: |
This model is a chatbot that can be used for general conversation.
[Model card](https://huggingface.co/TheBloke/phi-2-orange-GGUF)
usage: |
curl http://localhost:8080/v1/chat/completions -H "Content-Type: application/json" -d '{
"model": "phi-2-chat",
"messages": [{"role": "user", "content": "How are you doing?", "temperature": 0.1}]
}'

View File

@ -2,6 +2,7 @@ package e2e_test
import ( import (
"context" "context"
"encoding/json"
"fmt" "fmt"
"io" "io"
"net/http" "net/http"
@ -9,8 +10,8 @@ import (
. "github.com/onsi/ginkgo/v2" . "github.com/onsi/ginkgo/v2"
. "github.com/onsi/gomega" . "github.com/onsi/gomega"
"github.com/sashabaranov/go-openai" "github.com/sashabaranov/go-openai"
"github.com/sashabaranov/go-openai/jsonschema"
) )
var _ = Describe("E2E test", func() { var _ = Describe("E2E test", func() {
@ -40,6 +41,82 @@ var _ = Describe("E2E test", func() {
Expect(resp.Choices[0].Message.Content).To(Or(ContainSubstring("4"), ContainSubstring("four")), fmt.Sprint(resp.Choices[0].Message.Content)) Expect(resp.Choices[0].Message.Content).To(Or(ContainSubstring("4"), ContainSubstring("four")), fmt.Sprint(resp.Choices[0].Message.Content))
}) })
}) })
Context("function calls", func() {
It("correctly invoke", func() {
params := jsonschema.Definition{
Type: jsonschema.Object,
Properties: map[string]jsonschema.Definition{
"location": {
Type: jsonschema.String,
Description: "The city and state, e.g. San Francisco, CA",
},
"unit": {
Type: jsonschema.String,
Enum: []string{"celsius", "fahrenheit"},
},
},
Required: []string{"location"},
}
f := openai.FunctionDefinition{
Name: "get_current_weather",
Description: "Get the current weather in a given location",
Parameters: params,
}
t := openai.Tool{
Type: openai.ToolTypeFunction,
Function: &f,
}
dialogue := []openai.ChatCompletionMessage{
{Role: openai.ChatMessageRoleUser, Content: "What is the weather in Boston today?"},
}
resp, err := client.CreateChatCompletion(context.TODO(),
openai.ChatCompletionRequest{
Model: openai.GPT4,
Messages: dialogue,
Tools: []openai.Tool{t},
},
)
Expect(err).ToNot(HaveOccurred())
Expect(len(resp.Choices)).To(Equal(1), fmt.Sprint(resp))
msg := resp.Choices[0].Message
Expect(len(msg.ToolCalls)).To(Equal(1), fmt.Sprint(msg.ToolCalls))
Expect(msg.ToolCalls[0].Function.Name).To(Equal("get_current_weather"), fmt.Sprint(msg.ToolCalls[0].Function.Name))
Expect(msg.ToolCalls[0].Function.Arguments).To(ContainSubstring("Boston"), fmt.Sprint(msg.ToolCalls[0].Function.Arguments))
})
})
Context("json", func() {
It("correctly", func() {
model := "gpt-4"
req := openai.ChatCompletionRequest{
ResponseFormat: &openai.ChatCompletionResponseFormat{Type: openai.ChatCompletionResponseFormatTypeJSONObject},
Model: model,
Messages: []openai.ChatCompletionMessage{
{
Role: "user",
Content: "An animal with 'name', 'gender' and 'legs' fields",
},
},
}
resp, err := client.CreateChatCompletion(context.TODO(), req)
Expect(err).ToNot(HaveOccurred())
Expect(len(resp.Choices)).To(Equal(1), fmt.Sprint(resp))
var i map[string]interface{}
err = json.Unmarshal([]byte(resp.Choices[0].Message.Content), &i)
Expect(err).ToNot(HaveOccurred())
Expect(i).To(HaveKey("name"))
Expect(i).To(HaveKey("gender"))
Expect(i).To(HaveKey("legs"))
})
})
Context("images", func() { Context("images", func() {
It("correctly", func() { It("correctly", func() {
resp, err := client.CreateImage(context.TODO(), resp, err := client.CreateImage(context.TODO(),