feat(tts): respect YAMLs config file, add sycl docs/examples (#1692)

* feat(refactor): refactor config and input reading * feat(tts): read config file for TTS * examples(kubernetes): Add simple deployment example * examples(kubernetes): Add simple deployment for intel arc * docs(sycl): add sycl example * feat(tts): do not always pick a first model * fixups to run vall-e-x on container * Correctly resolve backend
2025-05-31 22:40:45 +00:00 · 2024-02-10 21:37:03 +01:00 · 2024-02-10 21:37:03 +01:00 · 53dbe36f32
commit 53dbe36f32
parent 081bd07fd1
17 changed files with 297 additions and 98 deletions
--- a/2
+++ b/2
@ -39,7 +39,7 @@ RUN if [ "${BUILD_TYPE}" = "cublas" ]; then \
    dpkg -i cuda-keyring_1.1-1_all.deb && \
    rm -f cuda-keyring_1.1-1_all.deb && \
    apt-get update && \
-    apt-get install -y cuda-nvcc-${CUDA_MAJOR_VERSION}-${CUDA_MINOR_VERSION} libcublas-dev-${CUDA_MAJOR_VERSION}-${CUDA_MINOR_VERSION} libcusparse-dev-${CUDA_MAJOR_VERSION}-${CUDA_MINOR_VERSION} libcusolver-dev-${CUDA_MAJOR_VERSION}-${CUDA_MINOR_VERSION}  && apt-get clean \
+    apt-get install -y cuda-nvcc-${CUDA_MAJOR_VERSION}-${CUDA_MINOR_VERSION} libcurand-dev-${CUDA_MAJOR_VERSION}-${CUDA_MINOR_VERSION} libcublas-dev-${CUDA_MAJOR_VERSION}-${CUDA_MINOR_VERSION} libcusparse-dev-${CUDA_MAJOR_VERSION}-${CUDA_MINOR_VERSION} libcusolver-dev-${CUDA_MAJOR_VERSION}-${CUDA_MINOR_VERSION}  && apt-get clean \
    ; fi

 ENV PATH /usr/local/cuda/bin:${PATH}
--- a/api/backend/tts.go
+++ b/api/backend/tts.go
@ -7,6 +7,7 @@ import (
 	"path/filepath"

 	api_config "github.com/go-skynet/LocalAI/api/config"
+	config "github.com/go-skynet/LocalAI/api/config"
 	"github.com/go-skynet/LocalAI/api/options"
 	"github.com/go-skynet/LocalAI/pkg/grpc/proto"
 	model "github.com/go-skynet/LocalAI/pkg/model"
@ -29,16 +30,20 @@ func generateUniqueFileName(dir, baseName, ext string) string {
 	}
 }

-func ModelTTS(backend, text, modelFile string, loader *model.ModelLoader, o *options.Option) (string, *proto.Result, error) {
+func ModelTTS(backend, text, modelFile string, loader *model.ModelLoader, o *options.Option, c config.Config) (string, *proto.Result, error) {
 	bb := backend
 	if bb == "" {
 		bb = model.PiperBackend
 	}
+
+	grpcOpts := gRPCModelOpts(c)
+
 	opts := modelOpts(api_config.Config{}, o, []model.Option{
 		model.WithBackendString(bb),
 		model.WithModel(modelFile),
 		model.WithContext(o.Context),
 		model.WithAssetDir(o.AssetsDestination),
+		model.WithLoadGRPCLoadModelOpts(grpcOpts),
 	})
 	piperModel, err := o.Loader.BackendLoader(opts...)
 	if err != nil {
--- a/api/config/config.go
+++ b/api/config/config.go
@ -183,6 +183,60 @@ func (c *Config) FunctionToCall() string {
 	return c.functionCallNameString
 }

+// Load a config file for a model
+func Load(modelName, modelPath string, cm *ConfigLoader, debug bool, threads, ctx int, f16 bool) (*Config, error) {
+	// Load a config file if present after the model name
+	modelConfig := filepath.Join(modelPath, modelName+".yaml")
+
+	var cfg *Config
+
+	defaults := func() {
+		cfg = DefaultConfig(modelName)
+		cfg.ContextSize = ctx
+		cfg.Threads = threads
+		cfg.F16 = f16
+		cfg.Debug = debug
+	}
+
+	cfgExisting, exists := cm.GetConfig(modelName)
+	if !exists {
+		if _, err := os.Stat(modelConfig); err == nil {
+			if err := cm.LoadConfig(modelConfig); err != nil {
+				return nil, fmt.Errorf("failed loading model config (%s) %s", modelConfig, err.Error())
+			}
+			cfgExisting, exists = cm.GetConfig(modelName)
+			if exists {
+				cfg = &cfgExisting
+			} else {
+				defaults()
+			}
+		} else {
+			defaults()
+		}
+	} else {
+		cfg = &cfgExisting
+	}
+
+	// Set the parameters for the language model prediction
+	//updateConfig(cfg, input)
+
+	// Don't allow 0 as setting
+	if cfg.Threads == 0 {
+		if threads != 0 {
+			cfg.Threads = threads
+		} else {
+			cfg.Threads = 4
+		}
+	}
+
+	// Enforce debug flag if passed from CLI
+	if debug {
+		cfg.Debug = true
+	}
+
+	return cfg, nil
+}
+
 func defaultPredictOptions(modelFile string) PredictionOptions {
 	return PredictionOptions{
 		TopP:        0.7,
--- a/api/ctx/fiber.go
+++ b/api/ctx/fiber.go
@ -0,0 +1,43 @@
+package fiberContext
+
+import (
+	"fmt"
+	"strings"
+
+	"github.com/go-skynet/LocalAI/pkg/model"
+	"github.com/gofiber/fiber/v2"
+	"github.com/rs/zerolog/log"
+)
+
+// ModelFromContext returns the model from the context
+// If no model is specified, it will take the first available
+// Takes a model string as input which should be the one received from the user request.
+// It returns the model name resolved from the context and an error if any.
+func ModelFromContext(ctx *fiber.Ctx, loader *model.ModelLoader, modelInput string, firstModel bool) (string, error) {
+	if ctx.Params("model") != "" {
+		modelInput = ctx.Params("model")
+	}
+
+	// Set model from bearer token, if available
+	bearer := strings.TrimLeft(ctx.Get("authorization"), "Bearer ")
+	bearerExists := bearer != "" && loader.ExistsInModelPath(bearer)
+
+	// If no model was specified, take the first available
+	if modelInput == "" && !bearerExists && firstModel {
+		models, _ := loader.ListModels()
+		if len(models) > 0 {
+			modelInput = models[0]
+			log.Debug().Msgf("No model specified, using: %s", modelInput)
+		} else {
+			log.Debug().Msgf("No model specified, returning error")
+			return "", fmt.Errorf("no model specified")
+		}
+	}
+
+	// If a model is found in bearer token takes precedence
+	if bearerExists {
+		log.Debug().Msgf("Using model from bearer token: %s", bearer)
+		modelInput = bearer
+	}
+	return modelInput, nil
+}
--- a/api/localai/localai.go
+++ b/api/localai/localai.go
@ -3,6 +3,8 @@ package localai
 import (
 	"github.com/go-skynet/LocalAI/api/backend"
 	config "github.com/go-skynet/LocalAI/api/config"
+	fiberContext "github.com/go-skynet/LocalAI/api/ctx"
+	"github.com/rs/zerolog/log"

 	"github.com/go-skynet/LocalAI/api/options"
 	"github.com/gofiber/fiber/v2"
@ -18,12 +20,31 @@ func TTSEndpoint(cm *config.ConfigLoader, o *options.Option) func(c *fiber.Ctx)
 	return func(c *fiber.Ctx) error {

 		input := new(TTSRequest)
+
 		// Get input data from the request body
 		if err := c.BodyParser(input); err != nil {
 			return err
 		}

-		filePath, _, err := backend.ModelTTS(input.Backend, input.Input, input.Model, o.Loader, o)
+		modelFile, err := fiberContext.ModelFromContext(c, o.Loader, input.Model, false)
+		if err != nil {
+			modelFile = input.Model
+			log.Warn().Msgf("Model not found in context: %s", input.Model)
+		}
+		cfg, err := config.Load(modelFile, o.Loader.ModelPath, cm, false, 0, 0, false)
+		if err != nil {
+			modelFile = input.Model
+			log.Warn().Msgf("Model not found in context: %s", input.Model)
+		} else {
+			modelFile = cfg.Model
+		}
+		log.Debug().Msgf("Request for model: %s", modelFile)
+
+		if input.Backend != "" {
+			cfg.Backend = input.Input
+		}
+
+		filePath, _, err := backend.ModelTTS(cfg.Backend, input.Input, modelFile, o.Loader, o, *cfg)
 		if err != nil {
 			return err
 		}
--- a/api/openai/chat.go
+++ b/api/openai/chat.go
@ -58,12 +58,12 @@ func ChatEndpoint(cm *config.ConfigLoader, o *options.Option) func(c *fiber.Ctx)
 	return func(c *fiber.Ctx) error {
 		processFunctions := false
 		funcs := grammar.Functions{}
-		modelFile, input, err := readInput(c, o, true)
+		modelFile, input, err := readRequest(c, o, true)
 		if err != nil {
 			return fmt.Errorf("failed reading parameters from request:%w", err)
 		}

-		config, input, err := readConfig(modelFile, input, cm, o.Loader, o.Debug, o.Threads, o.ContextSize, o.F16)
+		config, input, err := mergeRequestWithConfig(modelFile, input, cm, o.Loader, o.Debug, o.Threads, o.ContextSize, o.F16)
 		if err != nil {
 			return fmt.Errorf("failed reading parameters from request:%w", err)
 		}
--- a/api/openai/completion.go
+++ b/api/openai/completion.go
@ -53,14 +53,14 @@ func CompletionEndpoint(cm *config.ConfigLoader, o *options.Option) func(c *fibe
 	}

 	return func(c *fiber.Ctx) error {
-		modelFile, input, err := readInput(c, o, true)
+		modelFile, input, err := readRequest(c, o, true)
 		if err != nil {
 			return fmt.Errorf("failed reading parameters from request:%w", err)
 		}

 		log.Debug().Msgf("`input`: %+v", input)

-		config, input, err := readConfig(modelFile, input, cm, o.Loader, o.Debug, o.Threads, o.ContextSize, o.F16)
+		config, input, err := mergeRequestWithConfig(modelFile, input, cm, o.Loader, o.Debug, o.Threads, o.ContextSize, o.F16)
 		if err != nil {
 			return fmt.Errorf("failed reading parameters from request:%w", err)
 		}
--- a/api/openai/edit.go
+++ b/api/openai/edit.go
@ -18,12 +18,12 @@ import (

 func EditEndpoint(cm *config.ConfigLoader, o *options.Option) func(c *fiber.Ctx) error {
 	return func(c *fiber.Ctx) error {
-		modelFile, input, err := readInput(c, o, true)
+		modelFile, input, err := readRequest(c, o, true)
 		if err != nil {
 			return fmt.Errorf("failed reading parameters from request:%w", err)
 		}

-		config, input, err := readConfig(modelFile, input, cm, o.Loader, o.Debug, o.Threads, o.ContextSize, o.F16)
+		config, input, err := mergeRequestWithConfig(modelFile, input, cm, o.Loader, o.Debug, o.Threads, o.ContextSize, o.F16)
 		if err != nil {
 			return fmt.Errorf("failed reading parameters from request:%w", err)
 		}
--- a/api/openai/embeddings.go
+++ b/api/openai/embeddings.go
@ -18,12 +18,12 @@ import (
 // https://platform.openai.com/docs/api-reference/embeddings
 func EmbeddingsEndpoint(cm *config.ConfigLoader, o *options.Option) func(c *fiber.Ctx) error {
 	return func(c *fiber.Ctx) error {
-		model, input, err := readInput(c, o, true)
+		model, input, err := readRequest(c, o, true)
 		if err != nil {
 			return fmt.Errorf("failed reading parameters from request:%w", err)
 		}

-		config, input, err := readConfig(model, input, cm, o.Loader, o.Debug, o.Threads, o.ContextSize, o.F16)
+		config, input, err := mergeRequestWithConfig(model, input, cm, o.Loader, o.Debug, o.Threads, o.ContextSize, o.F16)
 		if err != nil {
 			return fmt.Errorf("failed reading parameters from request:%w", err)
 		}
--- a/api/openai/image.go
+++ b/api/openai/image.go
@ -61,7 +61,7 @@ func downloadFile(url string) (string, error) {
 */
 func ImageEndpoint(cm *config.ConfigLoader, o *options.Option) func(c *fiber.Ctx) error {
 	return func(c *fiber.Ctx) error {
-		m, input, err := readInput(c, o, false)
+		m, input, err := readRequest(c, o, false)
 		if err != nil {
 			return fmt.Errorf("failed reading parameters from request:%w", err)
 		}
@ -71,7 +71,7 @@ func ImageEndpoint(cm *config.ConfigLoader, o *options.Option) func(c *fiber.Ctx
 		}
 		log.Debug().Msgf("Loading model: %+v", m)

-		config, input, err := readConfig(m, input, cm, o.Loader, o.Debug, 0, 0, false)
+		config, input, err := mergeRequestWithConfig(m, input, cm, o.Loader, o.Debug, 0, 0, false)
 		if err != nil {
 			return fmt.Errorf("failed reading parameters from request:%w", err)
 		}
--- a/api/openai/request.go
+++ b/api/openai/request.go
@ -7,11 +7,10 @@ import (
 	"fmt"
 	"io/ioutil"
 	"net/http"
-	"os"
-	"path/filepath"
 	"strings"

 	config "github.com/go-skynet/LocalAI/api/config"
+	fiberContext "github.com/go-skynet/LocalAI/api/ctx"
 	options "github.com/go-skynet/LocalAI/api/options"
 	"github.com/go-skynet/LocalAI/api/schema"
 	model "github.com/go-skynet/LocalAI/pkg/model"
@ -19,8 +18,7 @@ import (
 	"github.com/rs/zerolog/log"
 )

-func readInput(c *fiber.Ctx, o *options.Option, randomModel bool) (string, *schema.OpenAIRequest, error) {
-	loader := o.Loader
+func readRequest(c *fiber.Ctx, o *options.Option, firstModel bool) (string, *schema.OpenAIRequest, error) {
 	input := new(schema.OpenAIRequest)
 	ctx, cancel := context.WithCancel(o.Context)
 	input.Context = ctx
@ -30,38 +28,13 @@ func readInput(c *fiber.Ctx, o *options.Option, randomModel bool) (string, *sche
 		return "", nil, fmt.Errorf("failed parsing request body: %w", err)
 	}

-	modelFile := input.Model
-
-	if c.Params("model") != "" {
-		modelFile = c.Params("model")
-	}
-
 	received, _ := json.Marshal(input)

 	log.Debug().Msgf("Request received: %s", string(received))

-	// Set model from bearer token, if available
-	bearer := strings.TrimLeft(c.Get("authorization"), "Bearer ")
-	bearerExists := bearer != "" && loader.ExistsInModelPath(bearer)
+	modelFile, err := fiberContext.ModelFromContext(c, o.Loader, input.Model, firstModel)

-	// If no model was specified, take the first available
-	if modelFile == "" && !bearerExists && randomModel {
-		models, _ := loader.ListModels()
-		if len(models) > 0 {
-			modelFile = models[0]
-			log.Debug().Msgf("No model specified, using: %s", modelFile)
-		} else {
-			log.Debug().Msgf("No model specified, returning error")
-			return "", nil, fmt.Errorf("no model specified")
-		}
-	}
-
-	// If a model is found in bearer token takes precedence
-	if bearerExists {
-		log.Debug().Msgf("Using model from bearer token: %s", bearer)
-		modelFile = bearer
-	}
-	return modelFile, input, nil
+	return modelFile, input, err
 }

 // this function check if the string is an URL, if it's an URL downloads the image in memory
@ -95,7 +68,7 @@ func getBase64Image(s string) (string, error) {
 	return "", fmt.Errorf("not valid string")
 }

-func updateConfig(config *config.Config, input *schema.OpenAIRequest) {
+func updateRequestConfig(config *config.Config, input *schema.OpenAIRequest) {
 	if input.Echo {
 		config.Echo = input.Echo
 	}
@ -282,55 +255,11 @@ func updateConfig(config *config.Config, input *schema.OpenAIRequest) {
 	}
 }

-func readConfig(modelFile string, input *schema.OpenAIRequest, cm *config.ConfigLoader, loader *model.ModelLoader, debug bool, threads, ctx int, f16 bool) (*config.Config, *schema.OpenAIRequest, error) {
-	// Load a config file if present after the model name
-	modelConfig := filepath.Join(loader.ModelPath, modelFile+".yaml")
-
-	var cfg *config.Config
-
-	defaults := func() {
-		cfg = config.DefaultConfig(modelFile)
-		cfg.ContextSize = ctx
-		cfg.Threads = threads
-		cfg.F16 = f16
-		cfg.Debug = debug
-	}
-
-	cfgExisting, exists := cm.GetConfig(modelFile)
-	if !exists {
-		if _, err := os.Stat(modelConfig); err == nil {
-			if err := cm.LoadConfig(modelConfig); err != nil {
-				return nil, nil, fmt.Errorf("failed loading model config (%s) %s", modelConfig, err.Error())
-			}
-			cfgExisting, exists = cm.GetConfig(modelFile)
-			if exists {
-				cfg = &cfgExisting
-			} else {
-				defaults()
-			}
-		} else {
-			defaults()
-		}
-	} else {
-		cfg = &cfgExisting
-	}
+func mergeRequestWithConfig(modelFile string, input *schema.OpenAIRequest, cm *config.ConfigLoader, loader *model.ModelLoader, debug bool, threads, ctx int, f16 bool) (*config.Config, *schema.OpenAIRequest, error) {
+	cfg, err := config.Load(modelFile, loader.ModelPath, cm, debug, threads, ctx, f16)

 	// Set the parameters for the language model prediction
-	updateConfig(cfg, input)
+	updateRequestConfig(cfg, input)

-	// Don't allow 0 as setting
-	if cfg.Threads == 0 {
-		if threads != 0 {
-			cfg.Threads = threads
-		} else {
-			cfg.Threads = 4
-		}
-	}
-
-	// Enforce debug flag if passed from CLI
-	if debug {
-		cfg.Debug = true
-	}
-
-	return cfg, input, nil
+	return cfg, input, err
 }
--- a/api/openai/transcription.go
+++ b/api/openai/transcription.go
@ -19,12 +19,12 @@ import (
 // https://platform.openai.com/docs/api-reference/audio/create
 func TranscriptEndpoint(cm *config.ConfigLoader, o *options.Option) func(c *fiber.Ctx) error {
 	return func(c *fiber.Ctx) error {
-		m, input, err := readInput(c, o, false)
+		m, input, err := readRequest(c, o, false)
 		if err != nil {
 			return fmt.Errorf("failed reading parameters from request:%w", err)
 		}

-		config, input, err := readConfig(m, input, cm, o.Loader, o.Debug, o.Threads, o.ContextSize, o.F16)
+		config, input, err := mergeRequestWithConfig(m, input, cm, o.Loader, o.Debug, o.Threads, o.ContextSize, o.F16)
 		if err != nil {
 			return fmt.Errorf("failed reading parameters from request:%w", err)
 		}
--- a/backend/python/vall-e-x/install.sh
+++ b/backend/python/vall-e-x/install.sh
@ -12,6 +12,9 @@ echo $CONDA_PREFIX

 git clone https://github.com/Plachtaa/VALL-E-X.git $CONDA_PREFIX/vall-e-x && pushd $CONDA_PREFIX/vall-e-x && git checkout -b build $SHA && pip install -r requirements.txt && popd

+# Pin some dependencies (the upstream requirements is too much loose)
+pip install torchaudio==2.2.0
+
 cp -rfv $CONDA_PREFIX/vall-e-x/* ./

 if [ "$PIP_CACHE_PURGE" = true ] ; then
--- a/docs/content/docs/features/GPU-acceleration.md
+++ b/docs/content/docs/features/GPU-acceleration.md
@ -112,14 +112,24 @@ llama_init_from_file: kv self size  =  512.00 MB

 ## Intel acceleration (sycl)

-#### Requirements
+### Requirements

-Requirement: [Intel oneAPI Base Toolkit](https://software.intel.com/content/www/us/en/develop/tools/oneapi/base-toolkit/download.html)
+If building from source, you need to install [Intel oneAPI Base Toolkit](https://software.intel.com/content/www/us/en/develop/tools/oneapi/base-toolkit/download.html) and have the Intel drivers available in the system.
+
+### Container images

 To use SYCL, use the images with the `sycl-f16` or `sycl-f32` tag, for example `{{< version >}}-sycl-f32-core`, `{{< version >}}-sycl-f16-ffmpeg-core`, ...

 The image list is on [quay](https://quay.io/repository/go-skynet/local-ai?tab=tags).

+#### Example
+
+To run LocalAI with Docker and sycl starting `phi-2`, you can use the following command as an example:
+
+```bash
+docker run -e DEBUG=true --privileged -ti -v $PWD/models:/build/models -p 8080:8080  -v /dev/dri:/dev/dri --rm quay.io/go-skynet/local-ai:master-sycl-f32-ffmpeg-core phi-2
+```
+
 ### Notes

 In addition to the commands to run LocalAI normally, you need to specify `--device /dev/dri` to docker, for example:
@ -128,3 +138,4 @@ In addition to the commands to run LocalAI normally, you need to specify `--devi
 docker run --rm -ti --device /dev/dri -p 8080:8080 -e DEBUG=true -e MODELS_PATH=/models -e THREADS=1 -v $PWD/models:/models quay.io/go-skynet/local-ai:{{< version >}}-sycl-f16-ffmpeg-core
 ```

+Note also that sycl does have a known issue to hang with `mmap: true`. You have to disable it in the model configuration if explicitly enabled.
--- a/examples/kubernetes/deployment-intel-arc.yaml
+++ b/examples/kubernetes/deployment-intel-arc.yaml
@ -0,0 +1,68 @@
+apiVersion: v1
+kind: Namespace
+metadata:
+  name: local-ai
+---
+apiVersion: v1
+kind: PersistentVolumeClaim
+metadata:
+  name: models-pvc
+  namespace: local-ai
+spec:
+  accessModes:
+    - ReadWriteOnce
+  resources:
+    requests:
+      storage: 20Gi
+---
+apiVersion: apps/v1
+kind: Deployment
+metadata:
+  name: local-ai
+  namespace: local-ai
+  labels:
+    app: local-ai
+spec:
+  selector:
+    matchLabels:
+      app: local-ai
+  replicas: 1
+  template:
+    metadata:
+      labels:
+        app: local-ai
+      name: local-ai
+    spec:
+      containers:
+        - args:
+          - phi-2
+          env:
+          - name: DEBUG
+            value: "true"
+          name: local-ai
+          image: quay.io/go-skynet/local-ai:master-sycl-f32-ffmpeg-core
+          imagePullPolicy: Always
+          resources:
+            limits:
+              gpu.intel.com/i915: 1
+          volumeMounts:
+            - name: models-volume
+              mountPath: /build/models
+      volumes:
+        - name: models-volume
+          persistentVolumeClaim:
+            claimName: models-pvc
+---
+apiVersion: v1
+kind: Service
+metadata:
+  name: local-ai
+  namespace: local-ai
+spec:
+  selector:
+    app: local-ai
+  type: LoadBalancer
+  ports:
+    - protocol: TCP
+      port: 8080
+      targetPort: 8080
--- a/examples/kubernetes/deployment.yaml
+++ b/examples/kubernetes/deployment.yaml
@ -0,0 +1,65 @@
+apiVersion: v1
+kind: Namespace
+metadata:
+  name: local-ai
+---
+apiVersion: v1
+kind: PersistentVolumeClaim
+metadata:
+  name: models-pvc
+  namespace: local-ai
+spec:
+  accessModes:
+    - ReadWriteOnce
+  resources:
+    requests:
+      storage: 1Gi
+---
+apiVersion: apps/v1
+kind: Deployment
+metadata:
+  name: local-ai
+  namespace: local-ai
+  labels:
+    app: local-ai
+spec:
+  selector:
+    matchLabels:
+      app: local-ai
+  replicas: 1
+  template:
+    metadata:
+      labels:
+        app: local-ai
+      name: local-ai
+    spec:
+      containers:
+        - args:
+          - phi-2
+          env:
+          - name: DEBUG
+            value: "true"
+          name: local-ai
+          image: quay.io/go-skynet/local-ai:master-ffmpeg-core
+          imagePullPolicy: IfNotPresent
+          volumeMounts:
+            - name: models-volume
+              mountPath: /build/models
+      volumes:
+        - name: models-volume
+          persistentVolumeClaim:
+            claimName: models-pvc
+---
+apiVersion: v1
+kind: Service
+metadata:
+  name: local-ai
+  namespace: local-ai
+spec:
+  selector:
+    app: local-ai
+  type: LoadBalancer
+  ports:
+    - protocol: TCP
+      port: 8080
+      targetPort: 8080
--- a/main.go
+++ b/main.go
@ -404,7 +404,7 @@ For a list of compatible model, check out: https://localai.io/model-compatibilit

 					defer opts.Loader.StopAllGRPC()

-					filePath, _, err := backend.ModelTTS(backendOption, text, modelOption, opts.Loader, opts)
+					filePath, _, err := backend.ModelTTS(backendOption, text, modelOption, opts.Loader, opts, config.Config{})
 					if err != nil {
 						return err
 					}