feat(tts): respect YAMLs config file, add sycl docs/examples (#1692)

* feat(refactor): refactor config and input reading * feat(tts): read config file for TTS * examples(kubernetes): Add simple deployment example * examples(kubernetes): Add simple deployment for intel arc * docs(sycl): add sycl example * feat(tts): do not always pick a first model * fixups to run vall-e-x on container * Correctly resolve backend
2025-05-10 12:32:49 +00:00 · 2024-02-10 21:37:03 +01:00 · 2024-02-10 21:37:03 +01:00 · 53dbe36f32
commit 53dbe36f32
parent 081bd07fd1
17 changed files with 297 additions and 98 deletions
--- a/2
+++ b/2
@ -39,7 +39,7 @@ RUN if [ "${BUILD_TYPE}" = "cublas" ]; then \
    dpkg -i cuda-keyring_1.1-1_all.deb && \
    rm -f cuda-keyring_1.1-1_all.deb && \
    apt-get update && \
-    apt-get install -y cuda-nvcc-${CUDA_MAJOR_VERSION}-${CUDA_MINOR_VERSION} libcublas-dev-${CUDA_MAJOR_VERSION}-${CUDA_MINOR_VERSION} libcusparse-dev-${CUDA_MAJOR_VERSION}-${CUDA_MINOR_VERSION} libcusolver-dev-${CUDA_MAJOR_VERSION}-${CUDA_MINOR_VERSION}  && apt-get clean \
+    apt-get install -y cuda-nvcc-${CUDA_MAJOR_VERSION}-${CUDA_MINOR_VERSION} libcurand-dev-${CUDA_MAJOR_VERSION}-${CUDA_MINOR_VERSION} libcublas-dev-${CUDA_MAJOR_VERSION}-${CUDA_MINOR_VERSION} libcusparse-dev-${CUDA_MAJOR_VERSION}-${CUDA_MINOR_VERSION} libcusolver-dev-${CUDA_MAJOR_VERSION}-${CUDA_MINOR_VERSION}  && apt-get clean \
    ; fi
 ENV PATH /usr/local/cuda/bin:${PATH}
--- a/api/backend/tts.go
+++ b/api/backend/tts.go
@ -7,6 +7,7 @@ import (
 	"path/filepath"
 	api_config "github.com/go-skynet/LocalAI/api/config"
 	config "github.com/go-skynet/LocalAI/api/config"
 	"github.com/go-skynet/LocalAI/api/options"
 	"github.com/go-skynet/LocalAI/pkg/grpc/proto"
 	model "github.com/go-skynet/LocalAI/pkg/model"
@ -29,16 +30,20 @@ func generateUniqueFileName(dir, baseName, ext string) string {
 	}
 }
-func ModelTTS(backend, text, modelFile string, loader *model.ModelLoader, o *options.Option) (string, *proto.Result, error) {
+func ModelTTS(backend, text, modelFile string, loader *model.ModelLoader, o *options.Option, c config.Config) (string, *proto.Result, error) {
 	bb := backend
 	if bb == "" {
 		bb = model.PiperBackend
 	}
 	grpcOpts := gRPCModelOpts(c)
 	opts := modelOpts(api_config.Config{}, o, []model.Option{
 		model.WithBackendString(bb),
 		model.WithModel(modelFile),
 		model.WithContext(o.Context),
 		model.WithAssetDir(o.AssetsDestination),
 		model.WithLoadGRPCLoadModelOpts(grpcOpts),
 	})
 	piperModel, err := o.Loader.BackendLoader(opts...)
 	if err != nil {
--- a/api/config/config.go
+++ b/api/config/config.go
@ -183,6 +183,60 @@ func (c *Config) FunctionToCall() string {
 	return c.functionCallNameString
 }
 // Load a config file for a model
 func Load(modelName, modelPath string, cm *ConfigLoader, debug bool, threads, ctx int, f16 bool) (*Config, error) {
 	// Load a config file if present after the model name
 	modelConfig := filepath.Join(modelPath, modelName+".yaml")
 	var cfg *Config
 	defaults := func() {
 		cfg = DefaultConfig(modelName)
 		cfg.ContextSize = ctx
 		cfg.Threads = threads
 		cfg.F16 = f16
 		cfg.Debug = debug
 	}
 	cfgExisting, exists := cm.GetConfig(modelName)
 	if !exists {
 		if _, err := os.Stat(modelConfig); err == nil {
 			if err := cm.LoadConfig(modelConfig); err != nil {
 				return nil, fmt.Errorf("failed loading model config (%s) %s", modelConfig, err.Error())
 			}
 			cfgExisting, exists = cm.GetConfig(modelName)
 			if exists {
 				cfg = &cfgExisting
 			} else {
 				defaults()
 			}
 		} else {
 			defaults()
 		}
 	} else {
 		cfg = &cfgExisting
 	}
 	// Set the parameters for the language model prediction
 	//updateConfig(cfg, input)
 	// Don't allow 0 as setting
 	if cfg.Threads == 0 {
 		if threads != 0 {
 			cfg.Threads = threads
 		} else {
 			cfg.Threads = 4
 		}
 	}
 	// Enforce debug flag if passed from CLI
 	if debug {
 		cfg.Debug = true
 	}
 	return cfg, nil
 }
 func defaultPredictOptions(modelFile string) PredictionOptions {
 	return PredictionOptions{
 		TopP:        0.7,
--- a/api/ctx/fiber.go
+++ b/api/ctx/fiber.go
@ -0,0 +1,43 @@
 package fiberContext
 import (
 	"fmt"
 	"strings"
 	"github.com/go-skynet/LocalAI/pkg/model"
 	"github.com/gofiber/fiber/v2"
 	"github.com/rs/zerolog/log"
 )
 // ModelFromContext returns the model from the context
 // If no model is specified, it will take the first available
 // Takes a model string as input which should be the one received from the user request.
 // It returns the model name resolved from the context and an error if any.
 func ModelFromContext(ctx *fiber.Ctx, loader *model.ModelLoader, modelInput string, firstModel bool) (string, error) {
 	if ctx.Params("model") != "" {
 		modelInput = ctx.Params("model")
 	}
 	// Set model from bearer token, if available
 	bearer := strings.TrimLeft(ctx.Get("authorization"), "Bearer ")
 	bearerExists := bearer != "" && loader.ExistsInModelPath(bearer)
 	// If no model was specified, take the first available
 	if modelInput == "" && !bearerExists && firstModel {
 		models, _ := loader.ListModels()
 		if len(models) > 0 {
 			modelInput = models[0]
 			log.Debug().Msgf("No model specified, using: %s", modelInput)
 		} else {
 			log.Debug().Msgf("No model specified, returning error")
 			return "", fmt.Errorf("no model specified")
 		}
 	}
 	// If a model is found in bearer token takes precedence
 	if bearerExists {
 		log.Debug().Msgf("Using model from bearer token: %s", bearer)
 		modelInput = bearer
 	}
 	return modelInput, nil
 }
--- a/api/localai/localai.go
+++ b/api/localai/localai.go
@ -3,6 +3,8 @@ package localai
 import (
 	"github.com/go-skynet/LocalAI/api/backend"
 	config "github.com/go-skynet/LocalAI/api/config"
 	fiberContext "github.com/go-skynet/LocalAI/api/ctx"
 	"github.com/rs/zerolog/log"
 	"github.com/go-skynet/LocalAI/api/options"
 	"github.com/gofiber/fiber/v2"
@ -18,12 +20,31 @@ func TTSEndpoint(cm *config.ConfigLoader, o *options.Option) func(c *fiber.Ctx)
 	return func(c *fiber.Ctx) error {
 		input := new(TTSRequest)
 		// Get input data from the request body
 		if err := c.BodyParser(input); err != nil {
 			return err
 		}
-		filePath, _, err := backend.ModelTTS(input.Backend, input.Input, input.Model, o.Loader, o)
+		modelFile, err := fiberContext.ModelFromContext(c, o.Loader, input.Model, false)
 		if err != nil {
 			modelFile = input.Model
 			log.Warn().Msgf("Model not found in context: %s", input.Model)
 		}
 		cfg, err := config.Load(modelFile, o.Loader.ModelPath, cm, false, 0, 0, false)
 		if err != nil {
 			modelFile = input.Model
 			log.Warn().Msgf("Model not found in context: %s", input.Model)
 		} else {
 			modelFile = cfg.Model
 		}
 		log.Debug().Msgf("Request for model: %s", modelFile)
 		if input.Backend != "" {
 			cfg.Backend = input.Input
 		}
 		filePath, _, err := backend.ModelTTS(cfg.Backend, input.Input, modelFile, o.Loader, o, *cfg)
 		if err != nil {
 			return err
 		}
--- a/api/openai/chat.go
+++ b/api/openai/chat.go
@ -58,12 +58,12 @@ func ChatEndpoint(cm *config.ConfigLoader, o *options.Option) func(c *fiber.Ctx)
 	return func(c *fiber.Ctx) error {
 		processFunctions := false
 		funcs := grammar.Functions{}
-		modelFile, input, err := readInput(c, o, true)
+		modelFile, input, err := readRequest(c, o, true)
 		if err != nil {
 			return fmt.Errorf("failed reading parameters from request:%w", err)
 		}
-		config, input, err := readConfig(modelFile, input, cm, o.Loader, o.Debug, o.Threads, o.ContextSize, o.F16)
+		config, input, err := mergeRequestWithConfig(modelFile, input, cm, o.Loader, o.Debug, o.Threads, o.ContextSize, o.F16)
 		if err != nil {
 			return fmt.Errorf("failed reading parameters from request:%w", err)
 		}
--- a/api/openai/completion.go
+++ b/api/openai/completion.go
@ -53,14 +53,14 @@ func CompletionEndpoint(cm *config.ConfigLoader, o *options.Option) func(c *fibe
 	}
 	return func(c *fiber.Ctx) error {
-		modelFile, input, err := readInput(c, o, true)
+		modelFile, input, err := readRequest(c, o, true)
 		if err != nil {
 			return fmt.Errorf("failed reading parameters from request:%w", err)
 		}
 		log.Debug().Msgf("`input`: %+v", input)
-		config, input, err := readConfig(modelFile, input, cm, o.Loader, o.Debug, o.Threads, o.ContextSize, o.F16)
+		config, input, err := mergeRequestWithConfig(modelFile, input, cm, o.Loader, o.Debug, o.Threads, o.ContextSize, o.F16)
 		if err != nil {
 			return fmt.Errorf("failed reading parameters from request:%w", err)
 		}
--- a/api/openai/edit.go
+++ b/api/openai/edit.go
@ -18,12 +18,12 @@ import (
 func EditEndpoint(cm *config.ConfigLoader, o *options.Option) func(c *fiber.Ctx) error {
 	return func(c *fiber.Ctx) error {
-		modelFile, input, err := readInput(c, o, true)
+		modelFile, input, err := readRequest(c, o, true)
 		if err != nil {
 			return fmt.Errorf("failed reading parameters from request:%w", err)
 		}
-		config, input, err := readConfig(modelFile, input, cm, o.Loader, o.Debug, o.Threads, o.ContextSize, o.F16)
+		config, input, err := mergeRequestWithConfig(modelFile, input, cm, o.Loader, o.Debug, o.Threads, o.ContextSize, o.F16)
 		if err != nil {
 			return fmt.Errorf("failed reading parameters from request:%w", err)
 		}
--- a/api/openai/embeddings.go
+++ b/api/openai/embeddings.go
@ -18,12 +18,12 @@ import (
 // https://platform.openai.com/docs/api-reference/embeddings
 func EmbeddingsEndpoint(cm *config.ConfigLoader, o *options.Option) func(c *fiber.Ctx) error {
 	return func(c *fiber.Ctx) error {
-		model, input, err := readInput(c, o, true)
+		model, input, err := readRequest(c, o, true)
 		if err != nil {
 			return fmt.Errorf("failed reading parameters from request:%w", err)
 		}
-		config, input, err := readConfig(model, input, cm, o.Loader, o.Debug, o.Threads, o.ContextSize, o.F16)
+		config, input, err := mergeRequestWithConfig(model, input, cm, o.Loader, o.Debug, o.Threads, o.ContextSize, o.F16)
 		if err != nil {
 			return fmt.Errorf("failed reading parameters from request:%w", err)
 		}
--- a/api/openai/image.go
+++ b/api/openai/image.go
@ -61,7 +61,7 @@ func downloadFile(url string) (string, error) {
 */
 func ImageEndpoint(cm *config.ConfigLoader, o *options.Option) func(c *fiber.Ctx) error {
 	return func(c *fiber.Ctx) error {
-		m, input, err := readInput(c, o, false)
+		m, input, err := readRequest(c, o, false)
 		if err != nil {
 			return fmt.Errorf("failed reading parameters from request:%w", err)
 		}
@ -71,7 +71,7 @@ func ImageEndpoint(cm *config.ConfigLoader, o *options.Option) func(c *fiber.Ctx
 		}
 		log.Debug().Msgf("Loading model: %+v", m)
-		config, input, err := readConfig(m, input, cm, o.Loader, o.Debug, 0, 0, false)
+		config, input, err := mergeRequestWithConfig(m, input, cm, o.Loader, o.Debug, 0, 0, false)
 		if err != nil {
 			return fmt.Errorf("failed reading parameters from request:%w", err)
 		}
--- a/api/openai/request.go
+++ b/api/openai/request.go
@ -7,11 +7,10 @@ import (
 	"fmt"
 	"io/ioutil"
 	"net/http"
 	"os"
 	"path/filepath"
 	"strings"
 	config "github.com/go-skynet/LocalAI/api/config"
 	fiberContext "github.com/go-skynet/LocalAI/api/ctx"
 	options "github.com/go-skynet/LocalAI/api/options"
 	"github.com/go-skynet/LocalAI/api/schema"
 	model "github.com/go-skynet/LocalAI/pkg/model"
@ -19,8 +18,7 @@ import (
 	"github.com/rs/zerolog/log"
 )
-func readInput(c *fiber.Ctx, o *options.Option, randomModel bool) (string, *schema.OpenAIRequest, error) {
+func readRequest(c *fiber.Ctx, o *options.Option, firstModel bool) (string, *schema.OpenAIRequest, error) {
 	loader := o.Loader
 	input := new(schema.OpenAIRequest)
 	ctx, cancel := context.WithCancel(o.Context)
 	input.Context = ctx
@ -30,38 +28,13 @@ func readInput(c *fiber.Ctx, o *options.Option, randomModel bool) (string, *sche
 		return "", nil, fmt.Errorf("failed parsing request body: %w", err)
 	}
 	modelFile := input.Model
 	if c.Params("model") != "" {
 		modelFile = c.Params("model")
 	}
 	received, _ := json.Marshal(input)
 	log.Debug().Msgf("Request received: %s", string(received))
-	// Set model from bearer token, if available
+	modelFile, err := fiberContext.ModelFromContext(c, o.Loader, input.Model, firstModel)
 	bearer := strings.TrimLeft(c.Get("authorization"), "Bearer ")
 	bearerExists := bearer != "" && loader.ExistsInModelPath(bearer)
-	// If no model was specified, take the first available
+	return modelFile, input, err
 	if modelFile == "" && !bearerExists && randomModel {
 		models, _ := loader.ListModels()
 		if len(models) > 0 {
 			modelFile = models[0]
 			log.Debug().Msgf("No model specified, using: %s", modelFile)
 		} else {
 			log.Debug().Msgf("No model specified, returning error")
 			return "", nil, fmt.Errorf("no model specified")
 		}
 	}
 	// If a model is found in bearer token takes precedence
 	if bearerExists {
 		log.Debug().Msgf("Using model from bearer token: %s", bearer)
 		modelFile = bearer
 	}
 	return modelFile, input, nil
 }
 // this function check if the string is an URL, if it's an URL downloads the image in memory
@ -95,7 +68,7 @@ func getBase64Image(s string) (string, error) {
 	return "", fmt.Errorf("not valid string")
 }
-func updateConfig(config *config.Config, input *schema.OpenAIRequest) {
+func updateRequestConfig(config *config.Config, input *schema.OpenAIRequest) {
 	if input.Echo {
 		config.Echo = input.Echo
 	}
@ -282,55 +255,11 @@ func updateConfig(config *config.Config, input *schema.OpenAIRequest) {
 	}
 }
-func readConfig(modelFile string, input *schema.OpenAIRequest, cm *config.ConfigLoader, loader *model.ModelLoader, debug bool, threads, ctx int, f16 bool) (*config.Config, *schema.OpenAIRequest, error) {
+func mergeRequestWithConfig(modelFile string, input *schema.OpenAIRequest, cm *config.ConfigLoader, loader *model.ModelLoader, debug bool, threads, ctx int, f16 bool) (*config.Config, *schema.OpenAIRequest, error) {
-	// Load a config file if present after the model name
+	cfg, err := config.Load(modelFile, loader.ModelPath, cm, debug, threads, ctx, f16)
 	modelConfig := filepath.Join(loader.ModelPath, modelFile+".yaml")
 	var cfg *config.Config
 	defaults := func() {
 		cfg = config.DefaultConfig(modelFile)
 		cfg.ContextSize = ctx
 		cfg.Threads = threads
 		cfg.F16 = f16
 		cfg.Debug = debug
 	}
 	cfgExisting, exists := cm.GetConfig(modelFile)
 	if !exists {
 		if _, err := os.Stat(modelConfig); err == nil {
 			if err := cm.LoadConfig(modelConfig); err != nil {
 				return nil, nil, fmt.Errorf("failed loading model config (%s) %s", modelConfig, err.Error())
 			}
 			cfgExisting, exists = cm.GetConfig(modelFile)
 			if exists {
 				cfg = &cfgExisting
 			} else {
 				defaults()
 			}
 		} else {
 			defaults()
 		}
 	} else {
 		cfg = &cfgExisting
 	}
 	// Set the parameters for the language model prediction
-	updateConfig(cfg, input)
+	updateRequestConfig(cfg, input)
-	// Don't allow 0 as setting
+	return cfg, input, err
 	if cfg.Threads == 0 {
 		if threads != 0 {
 			cfg.Threads = threads
 		} else {
 			cfg.Threads = 4
 		}
 	}
 	// Enforce debug flag if passed from CLI
 	if debug {
 		cfg.Debug = true
 	}
 	return cfg, input, nil
 }
--- a/api/openai/transcription.go
+++ b/api/openai/transcription.go
@ -19,12 +19,12 @@ import (
 // https://platform.openai.com/docs/api-reference/audio/create
 func TranscriptEndpoint(cm *config.ConfigLoader, o *options.Option) func(c *fiber.Ctx) error {
 	return func(c *fiber.Ctx) error {
-		m, input, err := readInput(c, o, false)
+		m, input, err := readRequest(c, o, false)
 		if err != nil {
 			return fmt.Errorf("failed reading parameters from request:%w", err)
 		}
-		config, input, err := readConfig(m, input, cm, o.Loader, o.Debug, o.Threads, o.ContextSize, o.F16)
+		config, input, err := mergeRequestWithConfig(m, input, cm, o.Loader, o.Debug, o.Threads, o.ContextSize, o.F16)
 		if err != nil {
 			return fmt.Errorf("failed reading parameters from request:%w", err)
 		}
--- a/backend/python/vall-e-x/install.sh
+++ b/backend/python/vall-e-x/install.sh
@ -12,6 +12,9 @@ echo $CONDA_PREFIX
 git clone https://github.com/Plachtaa/VALL-E-X.git $CONDA_PREFIX/vall-e-x && pushd $CONDA_PREFIX/vall-e-x && git checkout -b build $SHA && pip install -r requirements.txt && popd
 # Pin some dependencies (the upstream requirements is too much loose)
 pip install torchaudio==2.2.0
 cp -rfv $CONDA_PREFIX/vall-e-x/* ./
 if [ "$PIP_CACHE_PURGE" = true ] ; then
--- a/docs/content/docs/features/GPU-acceleration.md
+++ b/docs/content/docs/features/GPU-acceleration.md
@ -112,14 +112,24 @@ llama_init_from_file: kv self size  =  512.00 MB
 ## Intel acceleration (sycl)
-#### Requirements
+### Requirements
-Requirement: [Intel oneAPI Base Toolkit](https://software.intel.com/content/www/us/en/develop/tools/oneapi/base-toolkit/download.html)
+If building from source, you need to install [Intel oneAPI Base Toolkit](https://software.intel.com/content/www/us/en/develop/tools/oneapi/base-toolkit/download.html) and have the Intel drivers available in the system.
 ### Container images
 To use SYCL, use the images with the `sycl-f16` or `sycl-f32` tag, for example `{{< version >}}-sycl-f32-core`, `{{< version >}}-sycl-f16-ffmpeg-core`, ...
 The image list is on [quay](https://quay.io/repository/go-skynet/local-ai?tab=tags).
 #### Example
 To run LocalAI with Docker and sycl starting `phi-2`, you can use the following command as an example:
 ```bash
 docker run -e DEBUG=true --privileged -ti -v $PWD/models:/build/models -p 8080:8080  -v /dev/dri:/dev/dri --rm quay.io/go-skynet/local-ai:master-sycl-f32-ffmpeg-core phi-2
 ```
 ### Notes
 In addition to the commands to run LocalAI normally, you need to specify `--device /dev/dri` to docker, for example:
@ -128,3 +138,4 @@ In addition to the commands to run LocalAI normally, you need to specify `--devi
 docker run --rm -ti --device /dev/dri -p 8080:8080 -e DEBUG=true -e MODELS_PATH=/models -e THREADS=1 -v $PWD/models:/models quay.io/go-skynet/local-ai:{{< version >}}-sycl-f16-ffmpeg-core
 ```
 Note also that sycl does have a known issue to hang with `mmap: true`. You have to disable it in the model configuration if explicitly enabled.
--- a/examples/kubernetes/deployment-intel-arc.yaml
+++ b/examples/kubernetes/deployment-intel-arc.yaml
@ -0,0 +1,68 @@
 apiVersion: v1
 kind: Namespace
 metadata:
  name: local-ai
 ---
 apiVersion: v1
 kind: PersistentVolumeClaim
 metadata:
  name: models-pvc
  namespace: local-ai
 spec:
  accessModes:
    - ReadWriteOnce
  resources:
    requests:
      storage: 20Gi
 ---
 apiVersion: apps/v1
 kind: Deployment
 metadata:
  name: local-ai
  namespace: local-ai
  labels:
    app: local-ai
 spec:
  selector:
    matchLabels:
      app: local-ai
  replicas: 1
  template:
    metadata:
      labels:
        app: local-ai
      name: local-ai
    spec:
      containers:
        - args:
          - phi-2
          env:
          - name: DEBUG
            value: "true"
          name: local-ai
          image: quay.io/go-skynet/local-ai:master-sycl-f32-ffmpeg-core
          imagePullPolicy: Always
          resources:
            limits:
              gpu.intel.com/i915: 1
          volumeMounts:
            - name: models-volume
              mountPath: /build/models
      volumes:
        - name: models-volume
          persistentVolumeClaim:
            claimName: models-pvc
 ---
 apiVersion: v1
 kind: Service
 metadata:
  name: local-ai
  namespace: local-ai
 spec:
  selector:
    app: local-ai
  type: LoadBalancer
  ports:
    - protocol: TCP
      port: 8080
      targetPort: 8080
--- a/examples/kubernetes/deployment.yaml
+++ b/examples/kubernetes/deployment.yaml
@ -0,0 +1,65 @@
 apiVersion: v1
 kind: Namespace
 metadata:
  name: local-ai
 ---
 apiVersion: v1
 kind: PersistentVolumeClaim
 metadata:
  name: models-pvc
  namespace: local-ai
 spec:
  accessModes:
    - ReadWriteOnce
  resources:
    requests:
      storage: 1Gi
 ---
 apiVersion: apps/v1
 kind: Deployment
 metadata:
  name: local-ai
  namespace: local-ai
  labels:
    app: local-ai
 spec:
  selector:
    matchLabels:
      app: local-ai
  replicas: 1
  template:
    metadata:
      labels:
        app: local-ai
      name: local-ai
    spec:
      containers:
        - args:
          - phi-2
          env:
          - name: DEBUG
            value: "true"
          name: local-ai
          image: quay.io/go-skynet/local-ai:master-ffmpeg-core
          imagePullPolicy: IfNotPresent
          volumeMounts:
            - name: models-volume
              mountPath: /build/models
      volumes:
        - name: models-volume
          persistentVolumeClaim:
            claimName: models-pvc
 ---
 apiVersion: v1
 kind: Service
 metadata:
  name: local-ai
  namespace: local-ai
 spec:
  selector:
    app: local-ai
  type: LoadBalancer
  ports:
    - protocol: TCP
      port: 8080
      targetPort: 8080
--- a/main.go
+++ b/main.go
@ -404,7 +404,7 @@ For a list of compatible model, check out: https://localai.io/model-compatibilit
 					defer opts.Loader.StopAllGRPC()
-					filePath, _, err := backend.ModelTTS(backendOption, text, modelOption, opts.Loader, opts)
+					filePath, _, err := backend.ModelTTS(backendOption, text, modelOption, opts.Loader, opts, config.Config{})
 					if err != nil {
 						return err
 					}