From d5da8c3509d1e23d1ebcf82a4c9d9964eb1b549a Mon Sep 17 00:00:00 2001 From: Ettore Di Giacinto Date: Thu, 17 Oct 2024 17:33:50 +0200 Subject: [PATCH] feat(templates): extract text from multimodal requests (#3866) When offloading template construction to the backend, we want to keep text around in case of multimodal requests. Signed-off-by: Ettore Di Giacinto --- core/backend/llm.go | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/core/backend/llm.go b/core/backend/llm.go index d946d3f8..199a6233 100644 --- a/core/backend/llm.go +++ b/core/backend/llm.go @@ -2,6 +2,7 @@ package backend import ( "context" + "encoding/json" "fmt" "os" "regexp" @@ -77,6 +78,16 @@ func ModelInference(ctx context.Context, s string, messages []schema.Message, im switch ct := message.Content.(type) { case string: protoMessages[i].Content = ct + case []interface{}: + // If using the tokenizer template, in case of multimodal we want to keep the multimodal content as and return only strings here + data, _ := json.Marshal(ct) + resultData := []struct { + Text string `json:"text"` + }{} + json.Unmarshal(data, &resultData) + for _, r := range resultData { + protoMessages[i].Content += r.Text + } default: return nil, fmt.Errorf("unsupported type for schema.Message.Content for inference: %T", ct) }