fix(grammar): respect JSONmode and grammar from user input (#1935)

* fix(grammar): Fix JSON mode and custom grammar * tests(aio): add jsonmode test * tests(aio): add functioncall test * fix(aio): use hermes-2-pro-mistral as llm for CPU profile * add phi-2-orange
2024-12-24 06:46:39 +00:00 · 2024-03-31 13:04:09 +02:00 · 2024-03-31 13:04:09 +02:00 · 35290e146b
commit 35290e146b
parent 784657a652
5 changed files with 139 additions and 5 deletions
--- a/aio/cpu/text-to-text.yaml
+++ b/aio/cpu/text-to-text.yaml
@ -1,25 +1,48 @@
 name: gpt-4
 mmap: true
 parameters:
-  model: huggingface://l3utterfly/phi-2-layla-v1-chatml-gguf/phi-2-layla-v1-chatml-Q8_0.gguf
+  model: huggingface://NousResearch/Hermes-2-Pro-Mistral-7B-GGUF/Hermes-2-Pro-Mistral-7B.Q2_K.gguf
 template:
  chat_message: |
-    <|im_start|>{{if eq .RoleName "assistant"}}assistant{{else if eq .RoleName "system"}}system{{else if eq .RoleName "user"}}user{{end}}
+    <|im_start|>{{if eq .RoleName "assistant"}}assistant{{else if eq .RoleName "system"}}system{{else if eq .RoleName "tool"}}tool{{else if eq .RoleName "user"}}user{{end}}
    {{ if .FunctionCall }}<tool_call>{{end}}
    {{ if eq .RoleName "tool" }}<tool_result>{{end}}
    {{if .Content}}{{.Content}}{{end}}
    {{if .FunctionCall}}{{toJson .FunctionCall}}{{end}}
    {{ if .FunctionCall }}</tool_call>{{end}}
    {{ if eq .RoleName "tool" }}</tool_result>{{end}}
    <|im_end|>
  # https://huggingface.co/NousResearch/Hermes-2-Pro-Mistral-7B-GGUF#prompt-format-for-function-calling
  function: |
    <|im_start|>system
    You are a function calling AI model. You are provided with function signatures within <tools></tools> XML tags. You may call one or more functions to assist with the user query. Don't make assumptions about what values to plug into functions. Here are the available tools:
    <tools>
    {{range .Functions}}
    {'type': 'function', 'function': {'name': '{{.Name}}', 'description': '{{.Description}}', 'parameters': {{toJson .Parameters}} }}
    {{end}}
    </tools>
    Use the following pydantic model json schema for each tool call you will make:
    {'title': 'FunctionCall', 'type': 'object', 'properties': {'arguments': {'title': 'Arguments', 'type': 'object'}, 'name': {'title': 'Name', 'type': 'string'}}, 'required': ['arguments', 'name']}
    For each function call return a json object with function name and arguments within <tool_call></tool_call> XML tags as follows:
    <tool_call>
    {'arguments': <args-dict>, 'name': <function-name>}
    </tool_call><|im_end|>
    {{.Input}}
    <|im_start|>assistant
    <tool_call>
  chat: |
    {{.Input}}
    <|im_start|>assistant
  completion: |
    {{.Input}}
-context_size: 2048
+context_size: 4096
 f16: true
 stopwords:
 - <|im_end|>
 - <dummy32000>
 usage: |
      curl http://localhost:8080/v1/chat/completions -H "Content-Type: application/json" -d '{
-          "model": "phi-2-chat",
+          "model": "gpt-4",
          "messages": [{"role": "user", "content": "How are you doing?", "temperature": 0.1}]
      }'
--- a/core/http/endpoints/openai/chat.go
+++ b/core/http/endpoints/openai/chat.go
@ -185,6 +185,8 @@ func ChatEndpoint(cl *config.BackendConfigLoader, ml *model.ModelLoader, startup
 			input.Grammar = grammar.JSONBNF
 		}
 		config.Grammar = input.Grammar
 		// process functions if we have any defined or if we have a function call string
 		if len(input.Functions) > 0 && config.ShouldUseFunctions() {
 			log.Debug().Msgf("Response needs to process functions")
--- a/core/http/endpoints/openai/completion.go
+++ b/core/http/endpoints/openai/completion.go
@ -73,6 +73,8 @@ func CompletionEndpoint(cl *config.BackendConfigLoader, ml *model.ModelLoader, a
 			input.Grammar = grammar.JSONBNF
 		}
 		config.Grammar = input.Grammar
 		log.Debug().Msgf("Parameter Config: %+v", config)
 		if input.Stream {
--- a/embedded/models/phi-2-orange.yaml
+++ b/embedded/models/phi-2-orange.yaml
@ -0,0 +1,30 @@
 name: phi-2-chat
 mmap: true
 parameters:
  model: huggingface://l3utterfly/phi-2-orange-GGUF/phi-2-orange.Q6_K.gguf
 template:
  chat_message: |
    <|im_start|>{{if eq .RoleName "assistant"}}assistant{{else if eq .RoleName "system"}}system{{else if eq .RoleName "user"}}user{{end}}
    {{if .Content}}{{.Content}}{{end}}
    <|im_end|>
  chat: |
    {{.Input}}
    <|im_start|>assistant
  completion: |
    {{.Input}}
 context_size: 4096
 f16: true
 stopwords:
 - <|im_end|>
 - <dummy32000>
 description: |
  This model is a chatbot that can be used for general conversation.
  [Model card](https://huggingface.co/TheBloke/phi-2-orange-GGUF)
 usage: |
      curl http://localhost:8080/v1/chat/completions -H "Content-Type: application/json" -d '{
          "model": "phi-2-chat",
          "messages": [{"role": "user", "content": "How are you doing?", "temperature": 0.1}]
      }'
--- a/tests/e2e-aio/e2e_test.go
+++ b/tests/e2e-aio/e2e_test.go
@ -2,6 +2,7 @@ package e2e_test
 import (
 	"context"
 	"encoding/json"
 	"fmt"
 	"io"
 	"net/http"
@ -9,8 +10,8 @@ import (
 	. "github.com/onsi/ginkgo/v2"
 	. "github.com/onsi/gomega"
 	"github.com/sashabaranov/go-openai"
 	"github.com/sashabaranov/go-openai/jsonschema"
 )
 var _ = Describe("E2E test", func() {
@ -40,6 +41,82 @@ var _ = Describe("E2E test", func() {
 				Expect(resp.Choices[0].Message.Content).To(Or(ContainSubstring("4"), ContainSubstring("four")), fmt.Sprint(resp.Choices[0].Message.Content))
 			})
 		})
 		Context("function calls", func() {
 			It("correctly invoke", func() {
 				params := jsonschema.Definition{
 					Type: jsonschema.Object,
 					Properties: map[string]jsonschema.Definition{
 						"location": {
 							Type:        jsonschema.String,
 							Description: "The city and state, e.g. San Francisco, CA",
 						},
 						"unit": {
 							Type: jsonschema.String,
 							Enum: []string{"celsius", "fahrenheit"},
 						},
 					},
 					Required: []string{"location"},
 				}
 				f := openai.FunctionDefinition{
 					Name:        "get_current_weather",
 					Description: "Get the current weather in a given location",
 					Parameters:  params,
 				}
 				t := openai.Tool{
 					Type:     openai.ToolTypeFunction,
 					Function: &f,
 				}
 				dialogue := []openai.ChatCompletionMessage{
 					{Role: openai.ChatMessageRoleUser, Content: "What is the weather in Boston today?"},
 				}
 				resp, err := client.CreateChatCompletion(context.TODO(),
 					openai.ChatCompletionRequest{
 						Model:    openai.GPT4,
 						Messages: dialogue,
 						Tools:    []openai.Tool{t},
 					},
 				)
 				Expect(err).ToNot(HaveOccurred())
 				Expect(len(resp.Choices)).To(Equal(1), fmt.Sprint(resp))
 				msg := resp.Choices[0].Message
 				Expect(len(msg.ToolCalls)).To(Equal(1), fmt.Sprint(msg.ToolCalls))
 				Expect(msg.ToolCalls[0].Function.Name).To(Equal("get_current_weather"), fmt.Sprint(msg.ToolCalls[0].Function.Name))
 				Expect(msg.ToolCalls[0].Function.Arguments).To(ContainSubstring("Boston"), fmt.Sprint(msg.ToolCalls[0].Function.Arguments))
 			})
 		})
 		Context("json", func() {
 			It("correctly", func() {
 				model := "gpt-4"
 				req := openai.ChatCompletionRequest{
 					ResponseFormat: &openai.ChatCompletionResponseFormat{Type: openai.ChatCompletionResponseFormatTypeJSONObject},
 					Model:          model,
 					Messages: []openai.ChatCompletionMessage{
 						{
 							Role:    "user",
 							Content: "An animal with 'name', 'gender' and 'legs' fields",
 						},
 					},
 				}
 				resp, err := client.CreateChatCompletion(context.TODO(), req)
 				Expect(err).ToNot(HaveOccurred())
 				Expect(len(resp.Choices)).To(Equal(1), fmt.Sprint(resp))
 				var i map[string]interface{}
 				err = json.Unmarshal([]byte(resp.Choices[0].Message.Content), &i)
 				Expect(err).ToNot(HaveOccurred())
 				Expect(i).To(HaveKey("name"))
 				Expect(i).To(HaveKey("gender"))
 				Expect(i).To(HaveKey("legs"))
 			})
 		})
 		Context("images", func() {
 			It("correctly", func() {
 				resp, err := client.CreateImage(context.TODO(),