From 35290e146b8b575cd691c844dd611ead3c111c0b Mon Sep 17 00:00:00 2001 From: Ettore Di Giacinto Date: Sun, 31 Mar 2024 13:04:09 +0200 Subject: [PATCH] fix(grammar): respect JSONmode and grammar from user input (#1935) * fix(grammar): Fix JSON mode and custom grammar * tests(aio): add jsonmode test * tests(aio): add functioncall test * fix(aio): use hermes-2-pro-mistral as llm for CPU profile * add phi-2-orange --- aio/cpu/text-to-text.yaml | 31 ++++++++-- core/http/endpoints/openai/chat.go | 2 + core/http/endpoints/openai/completion.go | 2 + embedded/models/phi-2-orange.yaml | 30 +++++++++ tests/e2e-aio/e2e_test.go | 79 +++++++++++++++++++++++- 5 files changed, 139 insertions(+), 5 deletions(-) create mode 100644 embedded/models/phi-2-orange.yaml diff --git a/aio/cpu/text-to-text.yaml b/aio/cpu/text-to-text.yaml index 4fd88500..aeb3c842 100644 --- a/aio/cpu/text-to-text.yaml +++ b/aio/cpu/text-to-text.yaml @@ -1,25 +1,48 @@ name: gpt-4 mmap: true parameters: - model: huggingface://l3utterfly/phi-2-layla-v1-chatml-gguf/phi-2-layla-v1-chatml-Q8_0.gguf + model: huggingface://NousResearch/Hermes-2-Pro-Mistral-7B-GGUF/Hermes-2-Pro-Mistral-7B.Q2_K.gguf template: chat_message: | - <|im_start|>{{if eq .RoleName "assistant"}}assistant{{else if eq .RoleName "system"}}system{{else if eq .RoleName "user"}}user{{end}} + <|im_start|>{{if eq .RoleName "assistant"}}assistant{{else if eq .RoleName "system"}}system{{else if eq .RoleName "tool"}}tool{{else if eq .RoleName "user"}}user{{end}} + {{ if .FunctionCall }}{{end}} + {{ if eq .RoleName "tool" }}{{end}} {{if .Content}}{{.Content}}{{end}} + {{if .FunctionCall}}{{toJson .FunctionCall}}{{end}} + {{ if .FunctionCall }}{{end}} + {{ if eq .RoleName "tool" }}{{end}} <|im_end|> + # https://huggingface.co/NousResearch/Hermes-2-Pro-Mistral-7B-GGUF#prompt-format-for-function-calling + function: | + <|im_start|>system + You are a function calling AI model. You are provided with function signatures within XML tags. You may call one or more functions to assist with the user query. Don't make assumptions about what values to plug into functions. Here are the available tools: + + {{range .Functions}} + {'type': 'function', 'function': {'name': '{{.Name}}', 'description': '{{.Description}}', 'parameters': {{toJson .Parameters}} }} + {{end}} + + Use the following pydantic model json schema for each tool call you will make: + {'title': 'FunctionCall', 'type': 'object', 'properties': {'arguments': {'title': 'Arguments', 'type': 'object'}, 'name': {'title': 'Name', 'type': 'string'}}, 'required': ['arguments', 'name']} + For each function call return a json object with function name and arguments within XML tags as follows: + + {'arguments': , 'name': } + <|im_end|> + {{.Input}} + <|im_start|>assistant + chat: | {{.Input}} <|im_start|>assistant completion: | {{.Input}} -context_size: 2048 +context_size: 4096 f16: true stopwords: - <|im_end|> - usage: | curl http://localhost:8080/v1/chat/completions -H "Content-Type: application/json" -d '{ - "model": "phi-2-chat", + "model": "gpt-4", "messages": [{"role": "user", "content": "How are you doing?", "temperature": 0.1}] }' diff --git a/core/http/endpoints/openai/chat.go b/core/http/endpoints/openai/chat.go index f5f03eb4..837b6e12 100644 --- a/core/http/endpoints/openai/chat.go +++ b/core/http/endpoints/openai/chat.go @@ -185,6 +185,8 @@ func ChatEndpoint(cl *config.BackendConfigLoader, ml *model.ModelLoader, startup input.Grammar = grammar.JSONBNF } + config.Grammar = input.Grammar + // process functions if we have any defined or if we have a function call string if len(input.Functions) > 0 && config.ShouldUseFunctions() { log.Debug().Msgf("Response needs to process functions") diff --git a/core/http/endpoints/openai/completion.go b/core/http/endpoints/openai/completion.go index a67f0993..69923475 100644 --- a/core/http/endpoints/openai/completion.go +++ b/core/http/endpoints/openai/completion.go @@ -73,6 +73,8 @@ func CompletionEndpoint(cl *config.BackendConfigLoader, ml *model.ModelLoader, a input.Grammar = grammar.JSONBNF } + config.Grammar = input.Grammar + log.Debug().Msgf("Parameter Config: %+v", config) if input.Stream { diff --git a/embedded/models/phi-2-orange.yaml b/embedded/models/phi-2-orange.yaml new file mode 100644 index 00000000..9207d283 --- /dev/null +++ b/embedded/models/phi-2-orange.yaml @@ -0,0 +1,30 @@ +name: phi-2-chat +mmap: true +parameters: + model: huggingface://l3utterfly/phi-2-orange-GGUF/phi-2-orange.Q6_K.gguf + +template: + chat_message: | + <|im_start|>{{if eq .RoleName "assistant"}}assistant{{else if eq .RoleName "system"}}system{{else if eq .RoleName "user"}}user{{end}} + {{if .Content}}{{.Content}}{{end}} + <|im_end|> + chat: | + {{.Input}} + <|im_start|>assistant + completion: | + {{.Input}} +context_size: 4096 +f16: true +stopwords: +- <|im_end|> +- + +description: | + This model is a chatbot that can be used for general conversation. + [Model card](https://huggingface.co/TheBloke/phi-2-orange-GGUF) + +usage: | + curl http://localhost:8080/v1/chat/completions -H "Content-Type: application/json" -d '{ + "model": "phi-2-chat", + "messages": [{"role": "user", "content": "How are you doing?", "temperature": 0.1}] + }' diff --git a/tests/e2e-aio/e2e_test.go b/tests/e2e-aio/e2e_test.go index c52d789e..8fcd1280 100644 --- a/tests/e2e-aio/e2e_test.go +++ b/tests/e2e-aio/e2e_test.go @@ -2,6 +2,7 @@ package e2e_test import ( "context" + "encoding/json" "fmt" "io" "net/http" @@ -9,8 +10,8 @@ import ( . "github.com/onsi/ginkgo/v2" . "github.com/onsi/gomega" - "github.com/sashabaranov/go-openai" + "github.com/sashabaranov/go-openai/jsonschema" ) var _ = Describe("E2E test", func() { @@ -40,6 +41,82 @@ var _ = Describe("E2E test", func() { Expect(resp.Choices[0].Message.Content).To(Or(ContainSubstring("4"), ContainSubstring("four")), fmt.Sprint(resp.Choices[0].Message.Content)) }) }) + + Context("function calls", func() { + It("correctly invoke", func() { + params := jsonschema.Definition{ + Type: jsonschema.Object, + Properties: map[string]jsonschema.Definition{ + "location": { + Type: jsonschema.String, + Description: "The city and state, e.g. San Francisco, CA", + }, + "unit": { + Type: jsonschema.String, + Enum: []string{"celsius", "fahrenheit"}, + }, + }, + Required: []string{"location"}, + } + + f := openai.FunctionDefinition{ + Name: "get_current_weather", + Description: "Get the current weather in a given location", + Parameters: params, + } + t := openai.Tool{ + Type: openai.ToolTypeFunction, + Function: &f, + } + + dialogue := []openai.ChatCompletionMessage{ + {Role: openai.ChatMessageRoleUser, Content: "What is the weather in Boston today?"}, + } + resp, err := client.CreateChatCompletion(context.TODO(), + openai.ChatCompletionRequest{ + Model: openai.GPT4, + Messages: dialogue, + Tools: []openai.Tool{t}, + }, + ) + Expect(err).ToNot(HaveOccurred()) + Expect(len(resp.Choices)).To(Equal(1), fmt.Sprint(resp)) + + msg := resp.Choices[0].Message + Expect(len(msg.ToolCalls)).To(Equal(1), fmt.Sprint(msg.ToolCalls)) + Expect(msg.ToolCalls[0].Function.Name).To(Equal("get_current_weather"), fmt.Sprint(msg.ToolCalls[0].Function.Name)) + Expect(msg.ToolCalls[0].Function.Arguments).To(ContainSubstring("Boston"), fmt.Sprint(msg.ToolCalls[0].Function.Arguments)) + }) + }) + Context("json", func() { + It("correctly", func() { + model := "gpt-4" + + req := openai.ChatCompletionRequest{ + ResponseFormat: &openai.ChatCompletionResponseFormat{Type: openai.ChatCompletionResponseFormatTypeJSONObject}, + Model: model, + Messages: []openai.ChatCompletionMessage{ + { + + Role: "user", + Content: "An animal with 'name', 'gender' and 'legs' fields", + }, + }, + } + + resp, err := client.CreateChatCompletion(context.TODO(), req) + Expect(err).ToNot(HaveOccurred()) + Expect(len(resp.Choices)).To(Equal(1), fmt.Sprint(resp)) + + var i map[string]interface{} + err = json.Unmarshal([]byte(resp.Choices[0].Message.Content), &i) + Expect(err).ToNot(HaveOccurred()) + Expect(i).To(HaveKey("name")) + Expect(i).To(HaveKey("gender")) + Expect(i).To(HaveKey("legs")) + }) + }) + Context("images", func() { It("correctly", func() { resp, err := client.CreateImage(context.TODO(),