mirror of
https://github.com/mudler/LocalAI.git
synced 2025-01-11 23:42:48 +00:00
beb598e4f9
feat(functions): support mixed JSON BNF grammar This PR provides new options to control how functions are extracted from the LLM, and also provides more control on how JSON grammars can be used (also in conjunction). New YAML settings introduced: - `grammar_message`: when enabled, the generated grammar can also decide to push strings and not only JSON objects. This allows the LLM to pick to either respond freely or using JSON. - `grammar_prefix`: Allows to prefix a string to the JSON grammar definition. - `replace_results`: Is a map that allows to replace strings in the LLM result. As an example, consider the following settings for Hermes-2-Pro-Mistral, which allow extracting both JSON results coming from the model, and the ones coming from the grammar: ```yaml function: # disable injecting the "answer" tool disable_no_action: true # This allows the grammar to also return messages grammar_message: true # Suffix to add to the grammar grammar_prefix: '<tool_call>\n' return_name_in_function_response: true # Without grammar uncomment the lines below # Warning: this is relying only on the capability of the # LLM model to generate the correct function call. # no_grammar: true # json_regex_match: "(?s)<tool_call>(.*?)</tool_call>" replace_results: "<tool_call>": "" "\'": "\"" ``` Note: To disable entirely grammars usage in the example above, uncomment the `no_grammar` and `json_regex_match`. Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
612 lines
20 KiB
Go
612 lines
20 KiB
Go
package openai
|
|
|
|
import (
|
|
"bufio"
|
|
"bytes"
|
|
"encoding/json"
|
|
"fmt"
|
|
"strings"
|
|
"time"
|
|
|
|
"github.com/go-skynet/LocalAI/core/backend"
|
|
"github.com/go-skynet/LocalAI/core/config"
|
|
"github.com/go-skynet/LocalAI/core/schema"
|
|
"github.com/go-skynet/LocalAI/pkg/functions"
|
|
model "github.com/go-skynet/LocalAI/pkg/model"
|
|
"github.com/gofiber/fiber/v2"
|
|
"github.com/google/uuid"
|
|
"github.com/rs/zerolog/log"
|
|
"github.com/valyala/fasthttp"
|
|
)
|
|
|
|
// ChatEndpoint is the OpenAI Completion API endpoint https://platform.openai.com/docs/api-reference/chat/create
|
|
// @Summary Generate a chat completions for a given prompt and model.
|
|
// @Param request body schema.OpenAIRequest true "query params"
|
|
// @Success 200 {object} schema.OpenAIResponse "Response"
|
|
// @Router /v1/chat/completions [post]
|
|
func ChatEndpoint(cl *config.BackendConfigLoader, ml *model.ModelLoader, startupOptions *config.ApplicationConfig) func(c *fiber.Ctx) error {
|
|
emptyMessage := ""
|
|
id := uuid.New().String()
|
|
created := int(time.Now().Unix())
|
|
|
|
process := func(s string, req *schema.OpenAIRequest, config *config.BackendConfig, loader *model.ModelLoader, responses chan schema.OpenAIResponse) {
|
|
initialMessage := schema.OpenAIResponse{
|
|
ID: id,
|
|
Created: created,
|
|
Model: req.Model, // we have to return what the user sent here, due to OpenAI spec.
|
|
Choices: []schema.Choice{{Delta: &schema.Message{Role: "assistant", Content: &emptyMessage}}},
|
|
Object: "chat.completion.chunk",
|
|
}
|
|
responses <- initialMessage
|
|
|
|
ComputeChoices(req, s, config, startupOptions, loader, func(s string, c *[]schema.Choice) {}, func(s string, usage backend.TokenUsage) bool {
|
|
resp := schema.OpenAIResponse{
|
|
ID: id,
|
|
Created: created,
|
|
Model: req.Model, // we have to return what the user sent here, due to OpenAI spec.
|
|
Choices: []schema.Choice{{Delta: &schema.Message{Content: &s}, Index: 0}},
|
|
Object: "chat.completion.chunk",
|
|
Usage: schema.OpenAIUsage{
|
|
PromptTokens: usage.Prompt,
|
|
CompletionTokens: usage.Completion,
|
|
TotalTokens: usage.Prompt + usage.Completion,
|
|
},
|
|
}
|
|
|
|
responses <- resp
|
|
return true
|
|
})
|
|
close(responses)
|
|
}
|
|
processTools := func(noAction string, prompt string, req *schema.OpenAIRequest, config *config.BackendConfig, loader *model.ModelLoader, responses chan schema.OpenAIResponse) {
|
|
result := ""
|
|
_, tokenUsage, _ := ComputeChoices(req, prompt, config, startupOptions, loader, func(s string, c *[]schema.Choice) {}, func(s string, usage backend.TokenUsage) bool {
|
|
result += s
|
|
// TODO: Change generated BNF grammar to be compliant with the schema so we can
|
|
// stream the result token by token here.
|
|
return true
|
|
})
|
|
|
|
results := functions.ParseFunctionCall(result, config.FunctionsConfig)
|
|
noActionToRun := len(results) > 0 && results[0].Name == noAction || len(results) == 0
|
|
|
|
switch {
|
|
case noActionToRun:
|
|
initialMessage := schema.OpenAIResponse{
|
|
ID: id,
|
|
Created: created,
|
|
Model: req.Model, // we have to return what the user sent here, due to OpenAI spec.
|
|
Choices: []schema.Choice{{Delta: &schema.Message{Role: "assistant", Content: &emptyMessage}}},
|
|
Object: "chat.completion.chunk",
|
|
}
|
|
responses <- initialMessage
|
|
|
|
result, err := handleQuestion(config, req, ml, startupOptions, results, result, prompt)
|
|
if err != nil {
|
|
log.Error().Err(err).Msg("error handling question")
|
|
return
|
|
}
|
|
|
|
resp := schema.OpenAIResponse{
|
|
ID: id,
|
|
Created: created,
|
|
Model: req.Model, // we have to return what the user sent here, due to OpenAI spec.
|
|
Choices: []schema.Choice{{Delta: &schema.Message{Content: &result}, Index: 0}},
|
|
Object: "chat.completion.chunk",
|
|
Usage: schema.OpenAIUsage{
|
|
PromptTokens: tokenUsage.Prompt,
|
|
CompletionTokens: tokenUsage.Completion,
|
|
TotalTokens: tokenUsage.Prompt + tokenUsage.Completion,
|
|
},
|
|
}
|
|
|
|
responses <- resp
|
|
|
|
default:
|
|
for i, ss := range results {
|
|
name, args := ss.Name, ss.Arguments
|
|
|
|
initialMessage := schema.OpenAIResponse{
|
|
ID: id,
|
|
Created: created,
|
|
Model: req.Model, // we have to return what the user sent here, due to OpenAI spec.
|
|
Choices: []schema.Choice{{
|
|
Delta: &schema.Message{
|
|
Role: "assistant",
|
|
ToolCalls: []schema.ToolCall{
|
|
{
|
|
Index: i,
|
|
ID: id,
|
|
Type: "function",
|
|
FunctionCall: schema.FunctionCall{
|
|
Name: name,
|
|
},
|
|
},
|
|
},
|
|
}}},
|
|
Object: "chat.completion.chunk",
|
|
}
|
|
responses <- initialMessage
|
|
|
|
responses <- schema.OpenAIResponse{
|
|
ID: id,
|
|
Created: created,
|
|
Model: req.Model, // we have to return what the user sent here, due to OpenAI spec.
|
|
Choices: []schema.Choice{{
|
|
Delta: &schema.Message{
|
|
Role: "assistant",
|
|
ToolCalls: []schema.ToolCall{
|
|
{
|
|
Index: i,
|
|
ID: id,
|
|
Type: "function",
|
|
FunctionCall: schema.FunctionCall{
|
|
Arguments: args,
|
|
},
|
|
},
|
|
},
|
|
}}},
|
|
Object: "chat.completion.chunk",
|
|
}
|
|
}
|
|
}
|
|
|
|
close(responses)
|
|
}
|
|
|
|
return func(c *fiber.Ctx) error {
|
|
modelFile, input, err := readRequest(c, ml, startupOptions, true)
|
|
if err != nil {
|
|
return fmt.Errorf("failed reading parameters from request:%w", err)
|
|
}
|
|
|
|
config, input, err := mergeRequestWithConfig(modelFile, input, cl, ml, startupOptions.Debug, startupOptions.Threads, startupOptions.ContextSize, startupOptions.F16)
|
|
if err != nil {
|
|
return fmt.Errorf("failed reading parameters from request:%w", err)
|
|
}
|
|
log.Debug().Msgf("Configuration read: %+v", config)
|
|
|
|
funcs := input.Functions
|
|
shouldUseFn := len(input.Functions) > 0 && config.ShouldUseFunctions()
|
|
|
|
// Allow the user to set custom actions via config file
|
|
// to be "embedded" in each model
|
|
noActionName := "answer"
|
|
noActionDescription := "use this action to answer without performing any action"
|
|
|
|
if config.FunctionsConfig.NoActionFunctionName != "" {
|
|
noActionName = config.FunctionsConfig.NoActionFunctionName
|
|
}
|
|
if config.FunctionsConfig.NoActionDescriptionName != "" {
|
|
noActionDescription = config.FunctionsConfig.NoActionDescriptionName
|
|
}
|
|
|
|
if input.ResponseFormat.Type == "json_object" {
|
|
input.Grammar = functions.JSONBNF
|
|
}
|
|
|
|
config.Grammar = input.Grammar
|
|
|
|
if shouldUseFn {
|
|
log.Debug().Msgf("Response needs to process functions")
|
|
}
|
|
|
|
switch {
|
|
case !config.FunctionsConfig.NoGrammar && shouldUseFn:
|
|
noActionGrammar := functions.Function{
|
|
Name: noActionName,
|
|
Description: noActionDescription,
|
|
Parameters: map[string]interface{}{
|
|
"properties": map[string]interface{}{
|
|
"message": map[string]interface{}{
|
|
"type": "string",
|
|
"description": "The message to reply the user with",
|
|
}},
|
|
},
|
|
}
|
|
|
|
// Append the no action function
|
|
if !config.FunctionsConfig.DisableNoAction {
|
|
funcs = append(funcs, noActionGrammar)
|
|
}
|
|
|
|
// Force picking one of the functions by the request
|
|
if config.FunctionToCall() != "" {
|
|
funcs = funcs.Select(config.FunctionToCall())
|
|
}
|
|
|
|
// Update input grammar
|
|
// Handle if we should return "name" instead of "functions"
|
|
if config.FunctionsConfig.FunctionName {
|
|
jsStruct := funcs.ToJSONNameStructure()
|
|
config.Grammar = jsStruct.Grammar(config.FunctionsConfig.GrammarPrefix, "", config.FunctionsConfig.ParallelCalls, config.FunctionsConfig.GrammarMessage)
|
|
} else {
|
|
jsStruct := funcs.ToJSONFunctionStructure()
|
|
config.Grammar = jsStruct.Grammar(config.FunctionsConfig.GrammarPrefix, "", config.FunctionsConfig.ParallelCalls, config.FunctionsConfig.GrammarMessage)
|
|
}
|
|
case input.JSONFunctionGrammarObject != nil:
|
|
config.Grammar = input.JSONFunctionGrammarObject.Grammar(config.FunctionsConfig.GrammarPrefix, "", config.FunctionsConfig.ParallelCalls, config.FunctionsConfig.GrammarMessage)
|
|
case input.JSONFunctionGrammarObjectName != nil:
|
|
config.Grammar = input.JSONFunctionGrammarObjectName.Grammar(config.FunctionsConfig.GrammarPrefix, "", config.FunctionsConfig.ParallelCalls, config.FunctionsConfig.GrammarMessage)
|
|
default:
|
|
// Force picking one of the functions by the request
|
|
if config.FunctionToCall() != "" {
|
|
funcs = funcs.Select(config.FunctionToCall())
|
|
}
|
|
}
|
|
|
|
// process functions if we have any defined or if we have a function call string
|
|
|
|
// functions are not supported in stream mode (yet?)
|
|
toStream := input.Stream
|
|
|
|
log.Debug().Msgf("Parameters: %+v", config)
|
|
|
|
var predInput string
|
|
|
|
// If we are using the tokenizer template, we don't need to process the messages
|
|
// unless we are processing functions
|
|
if !config.TemplateConfig.UseTokenizerTemplate || shouldUseFn {
|
|
suppressConfigSystemPrompt := false
|
|
mess := []string{}
|
|
for messageIndex, i := range input.Messages {
|
|
var content string
|
|
role := i.Role
|
|
|
|
// if function call, we might want to customize the role so we can display better that the "assistant called a json action"
|
|
// if an "assistant_function_call" role is defined, we use it, otherwise we use the role that is passed by in the request
|
|
if (i.FunctionCall != nil || i.ToolCalls != nil) && i.Role == "assistant" {
|
|
roleFn := "assistant_function_call"
|
|
r := config.Roles[roleFn]
|
|
if r != "" {
|
|
role = roleFn
|
|
}
|
|
}
|
|
r := config.Roles[role]
|
|
contentExists := i.Content != nil && i.StringContent != ""
|
|
|
|
fcall := i.FunctionCall
|
|
if len(i.ToolCalls) > 0 {
|
|
fcall = i.ToolCalls
|
|
}
|
|
|
|
// First attempt to populate content via a chat message specific template
|
|
if config.TemplateConfig.ChatMessage != "" {
|
|
chatMessageData := model.ChatMessageTemplateData{
|
|
SystemPrompt: config.SystemPrompt,
|
|
Role: r,
|
|
RoleName: role,
|
|
Content: i.StringContent,
|
|
FunctionCall: fcall,
|
|
FunctionName: i.Name,
|
|
LastMessage: messageIndex == (len(input.Messages) - 1),
|
|
Function: config.Grammar != "" && (messageIndex == (len(input.Messages) - 1)),
|
|
MessageIndex: messageIndex,
|
|
}
|
|
templatedChatMessage, err := ml.EvaluateTemplateForChatMessage(config.TemplateConfig.ChatMessage, chatMessageData)
|
|
if err != nil {
|
|
log.Error().Err(err).Interface("message", chatMessageData).Str("template", config.TemplateConfig.ChatMessage).Msg("error processing message with template, skipping")
|
|
} else {
|
|
if templatedChatMessage == "" {
|
|
log.Warn().Msgf("template \"%s\" produced blank output for %+v. Skipping!", config.TemplateConfig.ChatMessage, chatMessageData)
|
|
continue // TODO: This continue is here intentionally to skip over the line `mess = append(mess, content)` below, and to prevent the sprintf
|
|
}
|
|
log.Debug().Msgf("templated message for chat: %s", templatedChatMessage)
|
|
content = templatedChatMessage
|
|
}
|
|
}
|
|
|
|
marshalAnyRole := func(f any) {
|
|
j, err := json.Marshal(f)
|
|
if err == nil {
|
|
if contentExists {
|
|
content += "\n" + fmt.Sprint(r, " ", string(j))
|
|
} else {
|
|
content = fmt.Sprint(r, " ", string(j))
|
|
}
|
|
}
|
|
}
|
|
marshalAny := func(f any) {
|
|
j, err := json.Marshal(f)
|
|
if err == nil {
|
|
if contentExists {
|
|
content += "\n" + string(j)
|
|
} else {
|
|
content = string(j)
|
|
}
|
|
}
|
|
}
|
|
// If this model doesn't have such a template, or if that template fails to return a value, template at the message level.
|
|
if content == "" {
|
|
if r != "" {
|
|
if contentExists {
|
|
content = fmt.Sprint(r, i.StringContent)
|
|
}
|
|
|
|
if i.FunctionCall != nil {
|
|
marshalAnyRole(i.FunctionCall)
|
|
}
|
|
if i.ToolCalls != nil {
|
|
marshalAnyRole(i.ToolCalls)
|
|
}
|
|
} else {
|
|
if contentExists {
|
|
content = fmt.Sprint(i.StringContent)
|
|
}
|
|
if i.FunctionCall != nil {
|
|
marshalAny(i.FunctionCall)
|
|
}
|
|
if i.ToolCalls != nil {
|
|
marshalAny(i.ToolCalls)
|
|
}
|
|
}
|
|
// Special Handling: System. We care if it was printed at all, not the r branch, so check seperately
|
|
if contentExists && role == "system" {
|
|
suppressConfigSystemPrompt = true
|
|
}
|
|
}
|
|
|
|
mess = append(mess, content)
|
|
}
|
|
|
|
joinCharacter := "\n"
|
|
if config.TemplateConfig.JoinChatMessagesByCharacter != nil {
|
|
joinCharacter = *config.TemplateConfig.JoinChatMessagesByCharacter
|
|
}
|
|
|
|
predInput = strings.Join(mess, joinCharacter)
|
|
log.Debug().Msgf("Prompt (before templating): %s", predInput)
|
|
|
|
templateFile := ""
|
|
|
|
// A model can have a "file.bin.tmpl" file associated with a prompt template prefix
|
|
if ml.ExistsInModelPath(fmt.Sprintf("%s.tmpl", config.Model)) {
|
|
templateFile = config.Model
|
|
}
|
|
|
|
if config.TemplateConfig.Chat != "" && !shouldUseFn {
|
|
templateFile = config.TemplateConfig.Chat
|
|
}
|
|
|
|
if config.TemplateConfig.Functions != "" && shouldUseFn {
|
|
templateFile = config.TemplateConfig.Functions
|
|
}
|
|
|
|
if templateFile != "" {
|
|
templatedInput, err := ml.EvaluateTemplateForPrompt(model.ChatPromptTemplate, templateFile, model.PromptTemplateData{
|
|
SystemPrompt: config.SystemPrompt,
|
|
SuppressSystemPrompt: suppressConfigSystemPrompt,
|
|
Input: predInput,
|
|
Functions: funcs,
|
|
})
|
|
if err == nil {
|
|
predInput = templatedInput
|
|
log.Debug().Msgf("Template found, input modified to: %s", predInput)
|
|
} else {
|
|
log.Debug().Msgf("Template failed loading: %s", err.Error())
|
|
}
|
|
}
|
|
|
|
log.Debug().Msgf("Prompt (after templating): %s", predInput)
|
|
if shouldUseFn && config.Grammar != "" {
|
|
log.Debug().Msgf("Grammar: %+v", config.Grammar)
|
|
}
|
|
}
|
|
|
|
switch {
|
|
case toStream:
|
|
|
|
log.Debug().Msgf("Stream request received")
|
|
c.Context().SetContentType("text/event-stream")
|
|
//c.Response().Header.SetContentType(fiber.MIMETextHTMLCharsetUTF8)
|
|
// c.Set("Content-Type", "text/event-stream")
|
|
c.Set("Cache-Control", "no-cache")
|
|
c.Set("Connection", "keep-alive")
|
|
c.Set("Transfer-Encoding", "chunked")
|
|
|
|
responses := make(chan schema.OpenAIResponse)
|
|
|
|
if !shouldUseFn {
|
|
go process(predInput, input, config, ml, responses)
|
|
} else {
|
|
go processTools(noActionName, predInput, input, config, ml, responses)
|
|
}
|
|
|
|
c.Context().SetBodyStreamWriter(fasthttp.StreamWriter(func(w *bufio.Writer) {
|
|
usage := &schema.OpenAIUsage{}
|
|
toolsCalled := false
|
|
for ev := range responses {
|
|
usage = &ev.Usage // Copy a pointer to the latest usage chunk so that the stop message can reference it
|
|
if len(ev.Choices[0].Delta.ToolCalls) > 0 {
|
|
toolsCalled = true
|
|
}
|
|
var buf bytes.Buffer
|
|
enc := json.NewEncoder(&buf)
|
|
enc.Encode(ev)
|
|
log.Debug().Msgf("Sending chunk: %s", buf.String())
|
|
_, err := fmt.Fprintf(w, "data: %v\n", buf.String())
|
|
if err != nil {
|
|
log.Debug().Msgf("Sending chunk failed: %v", err)
|
|
input.Cancel()
|
|
break
|
|
}
|
|
w.Flush()
|
|
}
|
|
|
|
finishReason := "stop"
|
|
if toolsCalled {
|
|
finishReason = "tool_calls"
|
|
} else if toolsCalled && len(input.Tools) == 0 {
|
|
finishReason = "function_call"
|
|
}
|
|
|
|
resp := &schema.OpenAIResponse{
|
|
ID: id,
|
|
Created: created,
|
|
Model: input.Model, // we have to return what the user sent here, due to OpenAI spec.
|
|
Choices: []schema.Choice{
|
|
{
|
|
FinishReason: finishReason,
|
|
Index: 0,
|
|
Delta: &schema.Message{Content: &emptyMessage},
|
|
}},
|
|
Object: "chat.completion.chunk",
|
|
Usage: *usage,
|
|
}
|
|
respData, _ := json.Marshal(resp)
|
|
|
|
w.WriteString(fmt.Sprintf("data: %s\n\n", respData))
|
|
w.WriteString("data: [DONE]\n\n")
|
|
w.Flush()
|
|
}))
|
|
return nil
|
|
|
|
// no streaming mode
|
|
default:
|
|
result, tokenUsage, err := ComputeChoices(input, predInput, config, startupOptions, ml, func(s string, c *[]schema.Choice) {
|
|
if !shouldUseFn {
|
|
// no function is called, just reply and use stop as finish reason
|
|
*c = append(*c, schema.Choice{FinishReason: "stop", Index: 0, Message: &schema.Message{Role: "assistant", Content: &s}})
|
|
return
|
|
}
|
|
|
|
results := functions.ParseFunctionCall(s, config.FunctionsConfig)
|
|
noActionsToRun := len(results) > 0 && results[0].Name == noActionName || len(results) == 0
|
|
|
|
switch {
|
|
case noActionsToRun:
|
|
result, err := handleQuestion(config, input, ml, startupOptions, results, s, predInput)
|
|
if err != nil {
|
|
log.Error().Err(err).Msg("error handling question")
|
|
return
|
|
}
|
|
*c = append(*c, schema.Choice{
|
|
Message: &schema.Message{Role: "assistant", Content: &result}})
|
|
default:
|
|
toolChoice := schema.Choice{
|
|
Message: &schema.Message{
|
|
Role: "assistant",
|
|
},
|
|
}
|
|
|
|
if len(input.Tools) > 0 {
|
|
toolChoice.FinishReason = "tool_calls"
|
|
}
|
|
|
|
for _, ss := range results {
|
|
name, args := ss.Name, ss.Arguments
|
|
if len(input.Tools) > 0 {
|
|
// If we are using tools, we condense the function calls into
|
|
// a single response choice with all the tools
|
|
toolChoice.Message.ToolCalls = append(toolChoice.Message.ToolCalls,
|
|
schema.ToolCall{
|
|
ID: id,
|
|
Type: "function",
|
|
FunctionCall: schema.FunctionCall{
|
|
Name: name,
|
|
Arguments: args,
|
|
},
|
|
},
|
|
)
|
|
} else {
|
|
// otherwise we return more choices directly
|
|
*c = append(*c, schema.Choice{
|
|
FinishReason: "function_call",
|
|
Message: &schema.Message{
|
|
Role: "assistant",
|
|
FunctionCall: map[string]interface{}{
|
|
"name": name,
|
|
"arguments": args,
|
|
},
|
|
},
|
|
})
|
|
}
|
|
}
|
|
|
|
if len(input.Tools) > 0 {
|
|
// we need to append our result if we are using tools
|
|
*c = append(*c, toolChoice)
|
|
}
|
|
}
|
|
|
|
}, nil)
|
|
if err != nil {
|
|
return err
|
|
}
|
|
|
|
resp := &schema.OpenAIResponse{
|
|
ID: id,
|
|
Created: created,
|
|
Model: input.Model, // we have to return what the user sent here, due to OpenAI spec.
|
|
Choices: result,
|
|
Object: "chat.completion",
|
|
Usage: schema.OpenAIUsage{
|
|
PromptTokens: tokenUsage.Prompt,
|
|
CompletionTokens: tokenUsage.Completion,
|
|
TotalTokens: tokenUsage.Prompt + tokenUsage.Completion,
|
|
},
|
|
}
|
|
respData, _ := json.Marshal(resp)
|
|
log.Debug().Msgf("Response: %s", respData)
|
|
|
|
// Return the prediction in the response body
|
|
return c.JSON(resp)
|
|
}
|
|
}
|
|
}
|
|
|
|
func handleQuestion(config *config.BackendConfig, input *schema.OpenAIRequest, ml *model.ModelLoader, o *config.ApplicationConfig, funcResults []functions.FuncCallResults, result, prompt string) (string, error) {
|
|
|
|
if len(funcResults) == 0 && result != "" {
|
|
log.Debug().Msgf("nothing function results but we had a message from the LLM")
|
|
|
|
return result, nil
|
|
}
|
|
|
|
log.Debug().Msgf("nothing to do, computing a reply")
|
|
arg := ""
|
|
if len(funcResults) > 0 {
|
|
arg = funcResults[0].Arguments
|
|
}
|
|
// If there is a message that the LLM already sends as part of the JSON reply, use it
|
|
arguments := map[string]interface{}{}
|
|
if err := json.Unmarshal([]byte(arg), &arguments); err != nil {
|
|
log.Debug().Msg("handleQuestion: function result did not contain a valid JSON object")
|
|
}
|
|
m, exists := arguments["message"]
|
|
if exists {
|
|
switch message := m.(type) {
|
|
case string:
|
|
if message != "" {
|
|
log.Debug().Msgf("Reply received from LLM: %s", message)
|
|
message = backend.Finetune(*config, prompt, message)
|
|
log.Debug().Msgf("Reply received from LLM(finetuned): %s", message)
|
|
|
|
return message, nil
|
|
}
|
|
}
|
|
}
|
|
|
|
log.Debug().Msgf("No action received from LLM, without a message, computing a reply")
|
|
// Otherwise ask the LLM to understand the JSON output and the context, and return a message
|
|
// Note: This costs (in term of CPU/GPU) another computation
|
|
config.Grammar = ""
|
|
images := []string{}
|
|
for _, m := range input.Messages {
|
|
images = append(images, m.StringImages...)
|
|
}
|
|
|
|
predFunc, err := backend.ModelInference(input.Context, prompt, input.Messages, images, ml, *config, o, nil)
|
|
if err != nil {
|
|
log.Error().Err(err).Msg("model inference failed")
|
|
return "", err
|
|
}
|
|
|
|
prediction, err := predFunc()
|
|
if err != nil {
|
|
log.Error().Err(err).Msg("prediction failed")
|
|
return "", err
|
|
}
|
|
return backend.Finetune(*config, prompt, prediction.Response), nil
|
|
}
|