diff --git a/core/backend/tokenize.go b/core/backend/tokenize.go new file mode 100644 index 00000000..3c78b17f --- /dev/null +++ b/core/backend/tokenize.go @@ -0,0 +1,50 @@ +package backend + +import ( + "github.com/mudler/LocalAI/core/config" + "github.com/mudler/LocalAI/core/schema" + "github.com/mudler/LocalAI/pkg/grpc" + model "github.com/mudler/LocalAI/pkg/model" +) + +func ModelTokenize(s string, loader *model.ModelLoader, backendConfig config.BackendConfig, appConfig *config.ApplicationConfig) (schema.TokenizeResponse, error) { + + modelFile := backendConfig.Model + + grpcOpts := GRPCModelOpts(backendConfig) + + var inferenceModel grpc.Backend + var err error + + opts := modelOpts(backendConfig, appConfig, []model.Option{ + model.WithLoadGRPCLoadModelOpts(grpcOpts), + model.WithThreads(uint32(*backendConfig.Threads)), + model.WithAssetDir(appConfig.AssetsDestination), + model.WithModel(modelFile), + model.WithContext(appConfig.Context), + }) + + if backendConfig.Backend == "" { + inferenceModel, err = loader.GreedyLoader(opts...) + } else { + opts = append(opts, model.WithBackendString(backendConfig.Backend)) + inferenceModel, err = loader.BackendLoader(opts...) + } + if err != nil { + return schema.TokenizeResponse{}, err + } + + predictOptions := gRPCPredictOpts(backendConfig, loader.ModelPath) + predictOptions.Prompt = s + + // tokenize the string + resp, err := inferenceModel.TokenizeString(appConfig.Context, predictOptions) + if err != nil { + return schema.TokenizeResponse{}, err + } + + return schema.TokenizeResponse{ + Tokens: resp.Tokens, + }, nil + +} diff --git a/core/http/endpoints/localai/tokenize.go b/core/http/endpoints/localai/tokenize.go new file mode 100644 index 00000000..da110bf8 --- /dev/null +++ b/core/http/endpoints/localai/tokenize.go @@ -0,0 +1,58 @@ +package localai + +import ( + "github.com/gofiber/fiber/v2" + "github.com/mudler/LocalAI/core/backend" + "github.com/mudler/LocalAI/core/config" + fiberContext "github.com/mudler/LocalAI/core/http/ctx" + "github.com/mudler/LocalAI/core/schema" + "github.com/mudler/LocalAI/pkg/model" + "github.com/rs/zerolog/log" +) + +// TokenizeEndpoint exposes a REST API to tokenize the content +// @Summary Tokenize the input. +// @Success 200 {object} schema.TokenizeResponse "Response" +// @Router /v1/tokenize [post] +func TokenizeEndpoint(cl *config.BackendConfigLoader, ml *model.ModelLoader, appConfig *config.ApplicationConfig) func(c *fiber.Ctx) error { + return func(c *fiber.Ctx) error { + + input := new(schema.TokenizeRequest) + + // Get input data from the request body + if err := c.BodyParser(input); err != nil { + return err + } + + modelFile, err := fiberContext.ModelFromContext(c, cl, ml, input.Model, false) + if err != nil { + modelFile = input.Model + log.Warn().Msgf("Model not found in context: %s", input.Model) + } + + cfg, err := cl.LoadBackendConfigFileByName(modelFile, appConfig.ModelPath, + config.LoadOptionDebug(appConfig.Debug), + config.LoadOptionThreads(appConfig.Threads), + config.LoadOptionContextSize(appConfig.ContextSize), + config.LoadOptionF16(appConfig.F16), + ) + + if err != nil { + log.Err(err) + modelFile = input.Model + log.Warn().Msgf("Model not found in context: %s", input.Model) + } else { + modelFile = cfg.Model + } + log.Debug().Msgf("Request for model: %s", modelFile) + + tokenResponse, err := backend.ModelTokenize(input.Content, ml, *cfg, appConfig) + if err != nil { + return err + } + + c.JSON(tokenResponse) + return nil + + } +} diff --git a/core/http/routes/localai.go b/core/http/routes/localai.go index 2f65e779..f2f0dfa4 100644 --- a/core/http/routes/localai.go +++ b/core/http/routes/localai.go @@ -63,4 +63,7 @@ func RegisterLocalAIRoutes(app *fiber.App, app.Get("/system", localai.SystemInformations(ml, appConfig)) + // misc + app.Post("/v1/tokenize", localai.TokenizeEndpoint(cl, ml, appConfig)) + } diff --git a/core/schema/tokenize.go b/core/schema/tokenize.go new file mode 100644 index 00000000..3770cc5a --- /dev/null +++ b/core/schema/tokenize.go @@ -0,0 +1,10 @@ +package schema + +type TokenizeRequest struct { + Content string `json:"content"` + Model string `json:"model"` +} + +type TokenizeResponse struct { + Tokens []int32 `json:"tokens"` +}