diff --git a/aio/cpu/text-to-text.yaml b/aio/cpu/text-to-text.yaml index 6c4ec9e6..cf18f659 100644 --- a/aio/cpu/text-to-text.yaml +++ b/aio/cpu/text-to-text.yaml @@ -6,14 +6,22 @@ parameters: template: chat_message: | <|im_start|>{{if eq .RoleName "assistant"}}assistant{{else if eq .RoleName "system"}}system{{else if eq .RoleName "tool"}}tool{{else if eq .RoleName "user"}}user{{end}} - {{- if .FunctionCall }}{{end}} - {{- if eq .RoleName "tool" }}{{end }} - {{- if .Content}} - {{.Content}} + {{- if .FunctionCall }} + + {{- else if eq .RoleName "tool" }} + + {{- end }} + {{- if .Content}} + {{.Content }} + {{- end }} + {{- if .FunctionCall}} + {{toJson .FunctionCall}} + {{- end }} + {{- if .FunctionCall }} + + {{- else if eq .RoleName "tool" }} + {{- end }} - {{- if .FunctionCall}}{{toJson .FunctionCall}}{{end }} - {{- if .FunctionCall }}{{end }} - {{- if eq .RoleName "tool" }}{{end }} <|im_end|> # https://huggingface.co/NousResearch/Hermes-2-Pro-Mistral-7B-GGUF#prompt-format-for-function-calling function: | diff --git a/aio/gpu-8g/text-to-text.yaml b/aio/gpu-8g/text-to-text.yaml index 8d5c84f7..0407bb22 100644 --- a/aio/gpu-8g/text-to-text.yaml +++ b/aio/gpu-8g/text-to-text.yaml @@ -6,14 +6,22 @@ parameters: template: chat_message: | <|im_start|>{{if eq .RoleName "assistant"}}assistant{{else if eq .RoleName "system"}}system{{else if eq .RoleName "tool"}}tool{{else if eq .RoleName "user"}}user{{end}} - {{- if .FunctionCall }}{{end}} - {{- if eq .RoleName "tool" }}{{end }} - {{- if .Content}} - {{.Content}} + {{- if .FunctionCall }} + + {{- else if eq .RoleName "tool" }} + + {{- end }} + {{- if .Content}} + {{.Content }} + {{- end }} + {{- if .FunctionCall}} + {{toJson .FunctionCall}} + {{- end }} + {{- if .FunctionCall }} + + {{- else if eq .RoleName "tool" }} + {{- end }} - {{- if .FunctionCall}}{{toJson .FunctionCall}}{{end }} - {{- if .FunctionCall }}{{end }} - {{- if eq .RoleName "tool" }}{{end }} <|im_end|> # https://huggingface.co/NousResearch/Hermes-2-Pro-Mistral-7B-GGUF#prompt-format-for-function-calling function: | diff --git a/aio/intel/text-to-text.yaml b/aio/intel/text-to-text.yaml index a7cb5b4d..f5f93c14 100644 --- a/aio/intel/text-to-text.yaml +++ b/aio/intel/text-to-text.yaml @@ -7,14 +7,22 @@ parameters: template: chat_message: | <|im_start|>{{if eq .RoleName "assistant"}}assistant{{else if eq .RoleName "system"}}system{{else if eq .RoleName "tool"}}tool{{else if eq .RoleName "user"}}user{{end}} - {{- if .FunctionCall }}{{end}} - {{- if eq .RoleName "tool" }}{{end }} - {{- if .Content}} - {{.Content}} + {{- if .FunctionCall }} + + {{- else if eq .RoleName "tool" }} + + {{- end }} + {{- if .Content}} + {{.Content }} + {{- end }} + {{- if .FunctionCall}} + {{toJson .FunctionCall}} + {{- end }} + {{- if .FunctionCall }} + + {{- else if eq .RoleName "tool" }} + {{- end }} - {{- if .FunctionCall}}{{toJson .FunctionCall}}{{end }} - {{- if .FunctionCall }}{{end }} - {{- if eq .RoleName "tool" }}{{end }} <|im_end|> # https://huggingface.co/NousResearch/Hermes-2-Pro-Mistral-7B-GGUF#prompt-format-for-function-calling function: | diff --git a/embedded/models/hermes-2-pro-mistral.yaml b/embedded/models/hermes-2-pro-mistral.yaml index 7bfa9418..dd18ce6f 100644 --- a/embedded/models/hermes-2-pro-mistral.yaml +++ b/embedded/models/hermes-2-pro-mistral.yaml @@ -6,14 +6,22 @@ parameters: template: chat_message: | <|im_start|>{{if eq .RoleName "assistant"}}assistant{{else if eq .RoleName "system"}}system{{else if eq .RoleName "tool"}}tool{{else if eq .RoleName "user"}}user{{end}} - {{- if .FunctionCall }}{{end}} - {{- if eq .RoleName "tool" }}{{end }} - {{- if .Content}} - {{.Content}} + {{- if .FunctionCall }} + + {{- else if eq .RoleName "tool" }} + + {{- end }} + {{- if .Content}} + {{.Content }} + {{- end }} + {{- if .FunctionCall}} + {{toJson .FunctionCall}} + {{- end }} + {{- if .FunctionCall }} + + {{- else if eq .RoleName "tool" }} + {{- end }} - {{- if .FunctionCall}}{{toJson .FunctionCall}}{{end }} - {{- if .FunctionCall }}{{end }} - {{- if eq .RoleName "tool" }}{{end }} <|im_end|> # https://huggingface.co/NousResearch/Hermes-2-Pro-Mistral-7B-GGUF#prompt-format-for-function-calling function: | diff --git a/embedded/models/llama3-instruct.yaml b/embedded/models/llama3-instruct.yaml new file mode 100644 index 00000000..d483d2b2 --- /dev/null +++ b/embedded/models/llama3-instruct.yaml @@ -0,0 +1,48 @@ +name: llama3-8b-instruct +mmap: true +parameters: + model: huggingface://second-state/Llama-3-8B-Instruct-GGUF/Meta-Llama-3-8B-Instruct-Q5_K_M.gguf + +template: + chat_message: | + <|start_header_id|>{{if eq .RoleName "assistant"}}assistant{{else if eq .RoleName "system"}}system{{else if eq .RoleName "tool"}}tool{{else if eq .RoleName "user"}}user{{end}}<|end_header_id|> + + {{ if .FunctionCall -}} + Function call: + {{ else if eq .RoleName "tool" -}} + Function response: + {{ end -}} + {{ if .Content -}} + {{.Content -}} + {{ else if .FunctionCall -}} + {{ toJson .FunctionCall -}} + {{ end -}} + <|eot_id|> + function: | + <|start_header_id|>system<|end_header_id|> + + You are a function calling AI model. You are provided with function signatures within XML tags. You may call one or more functions to assist with the user query. Don't make assumptions about what values to plug into functions. Here are the available tools: + + {{range .Functions}} + {'type': 'function', 'function': {'name': '{{.Name}}', 'description': '{{.Description}}', 'parameters': {{toJson .Parameters}} }} + {{end}} + + Use the following pydantic model json schema for each tool call you will make: + {'title': 'FunctionCall', 'type': 'object', 'properties': {'arguments': {'title': 'Arguments', 'type': 'object'}, 'name': {'title': 'Name', 'type': 'string'}}, 'required': ['arguments', 'name']}<|eot_id|><|start_header_id|>assistant<|end_header_id|> + Function call: + chat: | + <|begin_of_text|>{{.Input }} + <|start_header_id|>assistant<|end_header_id|> + completion: | + {{.Input}} +context_size: 8192 +f16: true +stopwords: +- <|im_end|> +- +- "<|eot_id|>" +usage: | + curl http://localhost:8080/v1/chat/completions -H "Content-Type: application/json" -d '{ + "model": "llama3-8b-instruct", + "messages": [{"role": "user", "content": "How are you doing?", "temperature": 0.1}] + }' diff --git a/pkg/model/loader_test.go b/pkg/model/loader_test.go index e4207b35..d3956b63 100644 --- a/pkg/model/loader_test.go +++ b/pkg/model/loader_test.go @@ -27,7 +27,84 @@ const chatML = `<|im_start|>{{if eq .RoleName "assistant"}}assistant{{else if eq {{- end }} <|im_end|>` -var testMatch map[string]map[string]interface{} = map[string]map[string]interface{}{ +const llama3 = `<|start_header_id|>{{if eq .RoleName "assistant"}}assistant{{else if eq .RoleName "system"}}system{{else if eq .RoleName "tool"}}tool{{else if eq .RoleName "user"}}user{{end}}<|end_header_id|> + +{{ if .FunctionCall -}} +Function call: +{{ else if eq .RoleName "tool" -}} +Function response: +{{ end -}} +{{ if .Content -}} +{{.Content -}} +{{ else if .FunctionCall -}} +{{ toJson .FunctionCall -}} +{{ end -}} +<|eot_id|>` + +var llama3TestMatch map[string]map[string]interface{} = map[string]map[string]interface{}{ + "user": { + "template": llama3, + "expected": "<|start_header_id|>user<|end_header_id|>\n\nA long time ago in a galaxy far, far away...<|eot_id|>", + "data": model.ChatMessageTemplateData{ + SystemPrompt: "", + Role: "user", + RoleName: "user", + Content: "A long time ago in a galaxy far, far away...", + FunctionCall: nil, + FunctionName: "", + LastMessage: false, + Function: false, + MessageIndex: 0, + }, + }, + "assistant": { + "template": llama3, + "expected": "<|start_header_id|>assistant<|end_header_id|>\n\nA long time ago in a galaxy far, far away...<|eot_id|>", + "data": model.ChatMessageTemplateData{ + SystemPrompt: "", + Role: "assistant", + RoleName: "assistant", + Content: "A long time ago in a galaxy far, far away...", + FunctionCall: nil, + FunctionName: "", + LastMessage: false, + Function: false, + MessageIndex: 0, + }, + }, + "function_call": { + "template": llama3, + "expected": "<|start_header_id|>assistant<|end_header_id|>\n\nFunction call:\n{\"function\":\"test\"}<|eot_id|>", + "data": model.ChatMessageTemplateData{ + SystemPrompt: "", + Role: "assistant", + RoleName: "assistant", + Content: "", + FunctionCall: map[string]string{"function": "test"}, + FunctionName: "", + LastMessage: false, + Function: false, + MessageIndex: 0, + }, + }, + "function_response": { + "template": llama3, + "expected": "<|start_header_id|>tool<|end_header_id|>\n\nFunction response:\nResponse from tool<|eot_id|>", + "data": model.ChatMessageTemplateData{ + SystemPrompt: "", + Role: "tool", + RoleName: "tool", + Content: "Response from tool", + FunctionCall: nil, + FunctionName: "", + LastMessage: false, + Function: false, + MessageIndex: 0, + }, + }, +} + +var chatMLTestMatch map[string]map[string]interface{} = map[string]map[string]interface{}{ "user": { "template": chatML, "expected": "<|im_start|>user\nA long time ago in a galaxy far, far away...\n<|im_end|>", @@ -91,13 +168,27 @@ var testMatch map[string]map[string]interface{} = map[string]map[string]interfac } var _ = Describe("Templates", func() { - Context("chat message", func() { + Context("chat message ChatML", func() { var modelLoader *ModelLoader BeforeEach(func() { modelLoader = NewModelLoader("") }) - for key := range testMatch { - foo := testMatch[key] + for key := range chatMLTestMatch { + foo := chatMLTestMatch[key] + It("renders correctly `"+key+"`", func() { + templated, err := modelLoader.EvaluateTemplateForChatMessage(foo["template"].(string), foo["data"].(model.ChatMessageTemplateData)) + Expect(err).ToNot(HaveOccurred()) + Expect(templated).To(Equal(foo["expected"]), templated) + }) + } + }) + Context("chat message llama3", func() { + var modelLoader *ModelLoader + BeforeEach(func() { + modelLoader = NewModelLoader("") + }) + for key := range llama3TestMatch { + foo := llama3TestMatch[key] It("renders correctly `"+key+"`", func() { templated, err := modelLoader.EvaluateTemplateForChatMessage(foo["template"].(string), foo["data"].(model.ChatMessageTemplateData)) Expect(err).ToNot(HaveOccurred())