feat(templates): extract text from multimodal requests (#3866)

When offloading template construction to the backend, we want to keep
text around in case of multimodal requests.

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
This commit is contained in:
Ettore Di Giacinto 2024-10-17 17:33:50 +02:00 committed by GitHub
parent 9db068388b
commit d5da8c3509
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194

View File

@ -2,6 +2,7 @@ package backend
import (
"context"
"encoding/json"
"fmt"
"os"
"regexp"
@ -77,6 +78,16 @@ func ModelInference(ctx context.Context, s string, messages []schema.Message, im
switch ct := message.Content.(type) {
case string:
protoMessages[i].Content = ct
case []interface{}:
// If using the tokenizer template, in case of multimodal we want to keep the multimodal content as and return only strings here
data, _ := json.Marshal(ct)
resultData := []struct {
Text string `json:"text"`
}{}
json.Unmarshal(data, &resultData)
for _, r := range resultData {
protoMessages[i].Content += r.Text
}
default:
return nil, fmt.Errorf("unsupported type for schema.Message.Content for inference: %T", ct)
}