mirror of
https://github.com/mudler/LocalAI.git
synced 2024-12-20 05:07:54 +00:00
fix(grammar): respect JSONmode and grammar from user input (#1935)
* fix(grammar): Fix JSON mode and custom grammar * tests(aio): add jsonmode test * tests(aio): add functioncall test * fix(aio): use hermes-2-pro-mistral as llm for CPU profile * add phi-2-orange
This commit is contained in:
parent
784657a652
commit
35290e146b
@ -1,25 +1,48 @@
|
|||||||
name: gpt-4
|
name: gpt-4
|
||||||
mmap: true
|
mmap: true
|
||||||
parameters:
|
parameters:
|
||||||
model: huggingface://l3utterfly/phi-2-layla-v1-chatml-gguf/phi-2-layla-v1-chatml-Q8_0.gguf
|
model: huggingface://NousResearch/Hermes-2-Pro-Mistral-7B-GGUF/Hermes-2-Pro-Mistral-7B.Q2_K.gguf
|
||||||
|
|
||||||
template:
|
template:
|
||||||
chat_message: |
|
chat_message: |
|
||||||
<|im_start|>{{if eq .RoleName "assistant"}}assistant{{else if eq .RoleName "system"}}system{{else if eq .RoleName "user"}}user{{end}}
|
<|im_start|>{{if eq .RoleName "assistant"}}assistant{{else if eq .RoleName "system"}}system{{else if eq .RoleName "tool"}}tool{{else if eq .RoleName "user"}}user{{end}}
|
||||||
|
{{ if .FunctionCall }}<tool_call>{{end}}
|
||||||
|
{{ if eq .RoleName "tool" }}<tool_result>{{end}}
|
||||||
{{if .Content}}{{.Content}}{{end}}
|
{{if .Content}}{{.Content}}{{end}}
|
||||||
|
{{if .FunctionCall}}{{toJson .FunctionCall}}{{end}}
|
||||||
|
{{ if .FunctionCall }}</tool_call>{{end}}
|
||||||
|
{{ if eq .RoleName "tool" }}</tool_result>{{end}}
|
||||||
<|im_end|>
|
<|im_end|>
|
||||||
|
# https://huggingface.co/NousResearch/Hermes-2-Pro-Mistral-7B-GGUF#prompt-format-for-function-calling
|
||||||
|
function: |
|
||||||
|
<|im_start|>system
|
||||||
|
You are a function calling AI model. You are provided with function signatures within <tools></tools> XML tags. You may call one or more functions to assist with the user query. Don't make assumptions about what values to plug into functions. Here are the available tools:
|
||||||
|
<tools>
|
||||||
|
{{range .Functions}}
|
||||||
|
{'type': 'function', 'function': {'name': '{{.Name}}', 'description': '{{.Description}}', 'parameters': {{toJson .Parameters}} }}
|
||||||
|
{{end}}
|
||||||
|
</tools>
|
||||||
|
Use the following pydantic model json schema for each tool call you will make:
|
||||||
|
{'title': 'FunctionCall', 'type': 'object', 'properties': {'arguments': {'title': 'Arguments', 'type': 'object'}, 'name': {'title': 'Name', 'type': 'string'}}, 'required': ['arguments', 'name']}
|
||||||
|
For each function call return a json object with function name and arguments within <tool_call></tool_call> XML tags as follows:
|
||||||
|
<tool_call>
|
||||||
|
{'arguments': <args-dict>, 'name': <function-name>}
|
||||||
|
</tool_call><|im_end|>
|
||||||
|
{{.Input}}
|
||||||
|
<|im_start|>assistant
|
||||||
|
<tool_call>
|
||||||
chat: |
|
chat: |
|
||||||
{{.Input}}
|
{{.Input}}
|
||||||
<|im_start|>assistant
|
<|im_start|>assistant
|
||||||
completion: |
|
completion: |
|
||||||
{{.Input}}
|
{{.Input}}
|
||||||
context_size: 2048
|
context_size: 4096
|
||||||
f16: true
|
f16: true
|
||||||
stopwords:
|
stopwords:
|
||||||
- <|im_end|>
|
- <|im_end|>
|
||||||
- <dummy32000>
|
- <dummy32000>
|
||||||
usage: |
|
usage: |
|
||||||
curl http://localhost:8080/v1/chat/completions -H "Content-Type: application/json" -d '{
|
curl http://localhost:8080/v1/chat/completions -H "Content-Type: application/json" -d '{
|
||||||
"model": "phi-2-chat",
|
"model": "gpt-4",
|
||||||
"messages": [{"role": "user", "content": "How are you doing?", "temperature": 0.1}]
|
"messages": [{"role": "user", "content": "How are you doing?", "temperature": 0.1}]
|
||||||
}'
|
}'
|
||||||
|
@ -185,6 +185,8 @@ func ChatEndpoint(cl *config.BackendConfigLoader, ml *model.ModelLoader, startup
|
|||||||
input.Grammar = grammar.JSONBNF
|
input.Grammar = grammar.JSONBNF
|
||||||
}
|
}
|
||||||
|
|
||||||
|
config.Grammar = input.Grammar
|
||||||
|
|
||||||
// process functions if we have any defined or if we have a function call string
|
// process functions if we have any defined or if we have a function call string
|
||||||
if len(input.Functions) > 0 && config.ShouldUseFunctions() {
|
if len(input.Functions) > 0 && config.ShouldUseFunctions() {
|
||||||
log.Debug().Msgf("Response needs to process functions")
|
log.Debug().Msgf("Response needs to process functions")
|
||||||
|
@ -73,6 +73,8 @@ func CompletionEndpoint(cl *config.BackendConfigLoader, ml *model.ModelLoader, a
|
|||||||
input.Grammar = grammar.JSONBNF
|
input.Grammar = grammar.JSONBNF
|
||||||
}
|
}
|
||||||
|
|
||||||
|
config.Grammar = input.Grammar
|
||||||
|
|
||||||
log.Debug().Msgf("Parameter Config: %+v", config)
|
log.Debug().Msgf("Parameter Config: %+v", config)
|
||||||
|
|
||||||
if input.Stream {
|
if input.Stream {
|
||||||
|
30
embedded/models/phi-2-orange.yaml
Normal file
30
embedded/models/phi-2-orange.yaml
Normal file
@ -0,0 +1,30 @@
|
|||||||
|
name: phi-2-chat
|
||||||
|
mmap: true
|
||||||
|
parameters:
|
||||||
|
model: huggingface://l3utterfly/phi-2-orange-GGUF/phi-2-orange.Q6_K.gguf
|
||||||
|
|
||||||
|
template:
|
||||||
|
chat_message: |
|
||||||
|
<|im_start|>{{if eq .RoleName "assistant"}}assistant{{else if eq .RoleName "system"}}system{{else if eq .RoleName "user"}}user{{end}}
|
||||||
|
{{if .Content}}{{.Content}}{{end}}
|
||||||
|
<|im_end|>
|
||||||
|
chat: |
|
||||||
|
{{.Input}}
|
||||||
|
<|im_start|>assistant
|
||||||
|
completion: |
|
||||||
|
{{.Input}}
|
||||||
|
context_size: 4096
|
||||||
|
f16: true
|
||||||
|
stopwords:
|
||||||
|
- <|im_end|>
|
||||||
|
- <dummy32000>
|
||||||
|
|
||||||
|
description: |
|
||||||
|
This model is a chatbot that can be used for general conversation.
|
||||||
|
[Model card](https://huggingface.co/TheBloke/phi-2-orange-GGUF)
|
||||||
|
|
||||||
|
usage: |
|
||||||
|
curl http://localhost:8080/v1/chat/completions -H "Content-Type: application/json" -d '{
|
||||||
|
"model": "phi-2-chat",
|
||||||
|
"messages": [{"role": "user", "content": "How are you doing?", "temperature": 0.1}]
|
||||||
|
}'
|
@ -2,6 +2,7 @@ package e2e_test
|
|||||||
|
|
||||||
import (
|
import (
|
||||||
"context"
|
"context"
|
||||||
|
"encoding/json"
|
||||||
"fmt"
|
"fmt"
|
||||||
"io"
|
"io"
|
||||||
"net/http"
|
"net/http"
|
||||||
@ -9,8 +10,8 @@ import (
|
|||||||
|
|
||||||
. "github.com/onsi/ginkgo/v2"
|
. "github.com/onsi/ginkgo/v2"
|
||||||
. "github.com/onsi/gomega"
|
. "github.com/onsi/gomega"
|
||||||
|
|
||||||
"github.com/sashabaranov/go-openai"
|
"github.com/sashabaranov/go-openai"
|
||||||
|
"github.com/sashabaranov/go-openai/jsonschema"
|
||||||
)
|
)
|
||||||
|
|
||||||
var _ = Describe("E2E test", func() {
|
var _ = Describe("E2E test", func() {
|
||||||
@ -40,6 +41,82 @@ var _ = Describe("E2E test", func() {
|
|||||||
Expect(resp.Choices[0].Message.Content).To(Or(ContainSubstring("4"), ContainSubstring("four")), fmt.Sprint(resp.Choices[0].Message.Content))
|
Expect(resp.Choices[0].Message.Content).To(Or(ContainSubstring("4"), ContainSubstring("four")), fmt.Sprint(resp.Choices[0].Message.Content))
|
||||||
})
|
})
|
||||||
})
|
})
|
||||||
|
|
||||||
|
Context("function calls", func() {
|
||||||
|
It("correctly invoke", func() {
|
||||||
|
params := jsonschema.Definition{
|
||||||
|
Type: jsonschema.Object,
|
||||||
|
Properties: map[string]jsonschema.Definition{
|
||||||
|
"location": {
|
||||||
|
Type: jsonschema.String,
|
||||||
|
Description: "The city and state, e.g. San Francisco, CA",
|
||||||
|
},
|
||||||
|
"unit": {
|
||||||
|
Type: jsonschema.String,
|
||||||
|
Enum: []string{"celsius", "fahrenheit"},
|
||||||
|
},
|
||||||
|
},
|
||||||
|
Required: []string{"location"},
|
||||||
|
}
|
||||||
|
|
||||||
|
f := openai.FunctionDefinition{
|
||||||
|
Name: "get_current_weather",
|
||||||
|
Description: "Get the current weather in a given location",
|
||||||
|
Parameters: params,
|
||||||
|
}
|
||||||
|
t := openai.Tool{
|
||||||
|
Type: openai.ToolTypeFunction,
|
||||||
|
Function: &f,
|
||||||
|
}
|
||||||
|
|
||||||
|
dialogue := []openai.ChatCompletionMessage{
|
||||||
|
{Role: openai.ChatMessageRoleUser, Content: "What is the weather in Boston today?"},
|
||||||
|
}
|
||||||
|
resp, err := client.CreateChatCompletion(context.TODO(),
|
||||||
|
openai.ChatCompletionRequest{
|
||||||
|
Model: openai.GPT4,
|
||||||
|
Messages: dialogue,
|
||||||
|
Tools: []openai.Tool{t},
|
||||||
|
},
|
||||||
|
)
|
||||||
|
Expect(err).ToNot(HaveOccurred())
|
||||||
|
Expect(len(resp.Choices)).To(Equal(1), fmt.Sprint(resp))
|
||||||
|
|
||||||
|
msg := resp.Choices[0].Message
|
||||||
|
Expect(len(msg.ToolCalls)).To(Equal(1), fmt.Sprint(msg.ToolCalls))
|
||||||
|
Expect(msg.ToolCalls[0].Function.Name).To(Equal("get_current_weather"), fmt.Sprint(msg.ToolCalls[0].Function.Name))
|
||||||
|
Expect(msg.ToolCalls[0].Function.Arguments).To(ContainSubstring("Boston"), fmt.Sprint(msg.ToolCalls[0].Function.Arguments))
|
||||||
|
})
|
||||||
|
})
|
||||||
|
Context("json", func() {
|
||||||
|
It("correctly", func() {
|
||||||
|
model := "gpt-4"
|
||||||
|
|
||||||
|
req := openai.ChatCompletionRequest{
|
||||||
|
ResponseFormat: &openai.ChatCompletionResponseFormat{Type: openai.ChatCompletionResponseFormatTypeJSONObject},
|
||||||
|
Model: model,
|
||||||
|
Messages: []openai.ChatCompletionMessage{
|
||||||
|
{
|
||||||
|
|
||||||
|
Role: "user",
|
||||||
|
Content: "An animal with 'name', 'gender' and 'legs' fields",
|
||||||
|
},
|
||||||
|
},
|
||||||
|
}
|
||||||
|
|
||||||
|
resp, err := client.CreateChatCompletion(context.TODO(), req)
|
||||||
|
Expect(err).ToNot(HaveOccurred())
|
||||||
|
Expect(len(resp.Choices)).To(Equal(1), fmt.Sprint(resp))
|
||||||
|
|
||||||
|
var i map[string]interface{}
|
||||||
|
err = json.Unmarshal([]byte(resp.Choices[0].Message.Content), &i)
|
||||||
|
Expect(err).ToNot(HaveOccurred())
|
||||||
|
Expect(i).To(HaveKey("name"))
|
||||||
|
Expect(i).To(HaveKey("gender"))
|
||||||
|
Expect(i).To(HaveKey("legs"))
|
||||||
|
})
|
||||||
|
})
|
||||||
|
|
||||||
Context("images", func() {
|
Context("images", func() {
|
||||||
It("correctly", func() {
|
It("correctly", func() {
|
||||||
resp, err := client.CreateImage(context.TODO(),
|
resp, err := client.CreateImage(context.TODO(),
|
||||||
|
Loading…
Reference in New Issue
Block a user