diff --git a/Dockerfile b/Dockerfile index 5cee6a23..6c5e2745 100644 --- a/Dockerfile +++ b/Dockerfile @@ -39,7 +39,7 @@ RUN if [ "${BUILD_TYPE}" = "cublas" ]; then \ dpkg -i cuda-keyring_1.1-1_all.deb && \ rm -f cuda-keyring_1.1-1_all.deb && \ apt-get update && \ - apt-get install -y cuda-nvcc-${CUDA_MAJOR_VERSION}-${CUDA_MINOR_VERSION} libcublas-dev-${CUDA_MAJOR_VERSION}-${CUDA_MINOR_VERSION} libcusparse-dev-${CUDA_MAJOR_VERSION}-${CUDA_MINOR_VERSION} libcusolver-dev-${CUDA_MAJOR_VERSION}-${CUDA_MINOR_VERSION} && apt-get clean \ + apt-get install -y cuda-nvcc-${CUDA_MAJOR_VERSION}-${CUDA_MINOR_VERSION} libcurand-dev-${CUDA_MAJOR_VERSION}-${CUDA_MINOR_VERSION} libcublas-dev-${CUDA_MAJOR_VERSION}-${CUDA_MINOR_VERSION} libcusparse-dev-${CUDA_MAJOR_VERSION}-${CUDA_MINOR_VERSION} libcusolver-dev-${CUDA_MAJOR_VERSION}-${CUDA_MINOR_VERSION} && apt-get clean \ ; fi ENV PATH /usr/local/cuda/bin:${PATH} diff --git a/api/backend/tts.go b/api/backend/tts.go index ae8f53ee..6e5ffcc0 100644 --- a/api/backend/tts.go +++ b/api/backend/tts.go @@ -7,6 +7,7 @@ import ( "path/filepath" api_config "github.com/go-skynet/LocalAI/api/config" + config "github.com/go-skynet/LocalAI/api/config" "github.com/go-skynet/LocalAI/api/options" "github.com/go-skynet/LocalAI/pkg/grpc/proto" model "github.com/go-skynet/LocalAI/pkg/model" @@ -29,16 +30,20 @@ func generateUniqueFileName(dir, baseName, ext string) string { } } -func ModelTTS(backend, text, modelFile string, loader *model.ModelLoader, o *options.Option) (string, *proto.Result, error) { +func ModelTTS(backend, text, modelFile string, loader *model.ModelLoader, o *options.Option, c config.Config) (string, *proto.Result, error) { bb := backend if bb == "" { bb = model.PiperBackend } + + grpcOpts := gRPCModelOpts(c) + opts := modelOpts(api_config.Config{}, o, []model.Option{ model.WithBackendString(bb), model.WithModel(modelFile), model.WithContext(o.Context), model.WithAssetDir(o.AssetsDestination), + model.WithLoadGRPCLoadModelOpts(grpcOpts), }) piperModel, err := o.Loader.BackendLoader(opts...) if err != nil { diff --git a/api/config/config.go b/api/config/config.go index 1b27b574..48d1b791 100644 --- a/api/config/config.go +++ b/api/config/config.go @@ -183,6 +183,60 @@ func (c *Config) FunctionToCall() string { return c.functionCallNameString } +// Load a config file for a model +func Load(modelName, modelPath string, cm *ConfigLoader, debug bool, threads, ctx int, f16 bool) (*Config, error) { + // Load a config file if present after the model name + modelConfig := filepath.Join(modelPath, modelName+".yaml") + + var cfg *Config + + defaults := func() { + cfg = DefaultConfig(modelName) + cfg.ContextSize = ctx + cfg.Threads = threads + cfg.F16 = f16 + cfg.Debug = debug + } + + cfgExisting, exists := cm.GetConfig(modelName) + if !exists { + if _, err := os.Stat(modelConfig); err == nil { + if err := cm.LoadConfig(modelConfig); err != nil { + return nil, fmt.Errorf("failed loading model config (%s) %s", modelConfig, err.Error()) + } + cfgExisting, exists = cm.GetConfig(modelName) + if exists { + cfg = &cfgExisting + } else { + defaults() + } + } else { + defaults() + } + } else { + cfg = &cfgExisting + } + + // Set the parameters for the language model prediction + //updateConfig(cfg, input) + + // Don't allow 0 as setting + if cfg.Threads == 0 { + if threads != 0 { + cfg.Threads = threads + } else { + cfg.Threads = 4 + } + } + + // Enforce debug flag if passed from CLI + if debug { + cfg.Debug = true + } + + return cfg, nil +} + func defaultPredictOptions(modelFile string) PredictionOptions { return PredictionOptions{ TopP: 0.7, diff --git a/api/ctx/fiber.go b/api/ctx/fiber.go new file mode 100644 index 00000000..ffb63111 --- /dev/null +++ b/api/ctx/fiber.go @@ -0,0 +1,43 @@ +package fiberContext + +import ( + "fmt" + "strings" + + "github.com/go-skynet/LocalAI/pkg/model" + "github.com/gofiber/fiber/v2" + "github.com/rs/zerolog/log" +) + +// ModelFromContext returns the model from the context +// If no model is specified, it will take the first available +// Takes a model string as input which should be the one received from the user request. +// It returns the model name resolved from the context and an error if any. +func ModelFromContext(ctx *fiber.Ctx, loader *model.ModelLoader, modelInput string, firstModel bool) (string, error) { + if ctx.Params("model") != "" { + modelInput = ctx.Params("model") + } + + // Set model from bearer token, if available + bearer := strings.TrimLeft(ctx.Get("authorization"), "Bearer ") + bearerExists := bearer != "" && loader.ExistsInModelPath(bearer) + + // If no model was specified, take the first available + if modelInput == "" && !bearerExists && firstModel { + models, _ := loader.ListModels() + if len(models) > 0 { + modelInput = models[0] + log.Debug().Msgf("No model specified, using: %s", modelInput) + } else { + log.Debug().Msgf("No model specified, returning error") + return "", fmt.Errorf("no model specified") + } + } + + // If a model is found in bearer token takes precedence + if bearerExists { + log.Debug().Msgf("Using model from bearer token: %s", bearer) + modelInput = bearer + } + return modelInput, nil +} diff --git a/api/localai/localai.go b/api/localai/localai.go index c9aee2ae..7774ca47 100644 --- a/api/localai/localai.go +++ b/api/localai/localai.go @@ -3,6 +3,8 @@ package localai import ( "github.com/go-skynet/LocalAI/api/backend" config "github.com/go-skynet/LocalAI/api/config" + fiberContext "github.com/go-skynet/LocalAI/api/ctx" + "github.com/rs/zerolog/log" "github.com/go-skynet/LocalAI/api/options" "github.com/gofiber/fiber/v2" @@ -18,12 +20,31 @@ func TTSEndpoint(cm *config.ConfigLoader, o *options.Option) func(c *fiber.Ctx) return func(c *fiber.Ctx) error { input := new(TTSRequest) + // Get input data from the request body if err := c.BodyParser(input); err != nil { return err } - filePath, _, err := backend.ModelTTS(input.Backend, input.Input, input.Model, o.Loader, o) + modelFile, err := fiberContext.ModelFromContext(c, o.Loader, input.Model, false) + if err != nil { + modelFile = input.Model + log.Warn().Msgf("Model not found in context: %s", input.Model) + } + cfg, err := config.Load(modelFile, o.Loader.ModelPath, cm, false, 0, 0, false) + if err != nil { + modelFile = input.Model + log.Warn().Msgf("Model not found in context: %s", input.Model) + } else { + modelFile = cfg.Model + } + log.Debug().Msgf("Request for model: %s", modelFile) + + if input.Backend != "" { + cfg.Backend = input.Input + } + + filePath, _, err := backend.ModelTTS(cfg.Backend, input.Input, modelFile, o.Loader, o, *cfg) if err != nil { return err } diff --git a/api/openai/chat.go b/api/openai/chat.go index 02bf6149..819cd6b2 100644 --- a/api/openai/chat.go +++ b/api/openai/chat.go @@ -58,12 +58,12 @@ func ChatEndpoint(cm *config.ConfigLoader, o *options.Option) func(c *fiber.Ctx) return func(c *fiber.Ctx) error { processFunctions := false funcs := grammar.Functions{} - modelFile, input, err := readInput(c, o, true) + modelFile, input, err := readRequest(c, o, true) if err != nil { return fmt.Errorf("failed reading parameters from request:%w", err) } - config, input, err := readConfig(modelFile, input, cm, o.Loader, o.Debug, o.Threads, o.ContextSize, o.F16) + config, input, err := mergeRequestWithConfig(modelFile, input, cm, o.Loader, o.Debug, o.Threads, o.ContextSize, o.F16) if err != nil { return fmt.Errorf("failed reading parameters from request:%w", err) } diff --git a/api/openai/completion.go b/api/openai/completion.go index c0607632..b098451d 100644 --- a/api/openai/completion.go +++ b/api/openai/completion.go @@ -53,14 +53,14 @@ func CompletionEndpoint(cm *config.ConfigLoader, o *options.Option) func(c *fibe } return func(c *fiber.Ctx) error { - modelFile, input, err := readInput(c, o, true) + modelFile, input, err := readRequest(c, o, true) if err != nil { return fmt.Errorf("failed reading parameters from request:%w", err) } log.Debug().Msgf("`input`: %+v", input) - config, input, err := readConfig(modelFile, input, cm, o.Loader, o.Debug, o.Threads, o.ContextSize, o.F16) + config, input, err := mergeRequestWithConfig(modelFile, input, cm, o.Loader, o.Debug, o.Threads, o.ContextSize, o.F16) if err != nil { return fmt.Errorf("failed reading parameters from request:%w", err) } diff --git a/api/openai/edit.go b/api/openai/edit.go index 888b9db7..16679ae5 100644 --- a/api/openai/edit.go +++ b/api/openai/edit.go @@ -18,12 +18,12 @@ import ( func EditEndpoint(cm *config.ConfigLoader, o *options.Option) func(c *fiber.Ctx) error { return func(c *fiber.Ctx) error { - modelFile, input, err := readInput(c, o, true) + modelFile, input, err := readRequest(c, o, true) if err != nil { return fmt.Errorf("failed reading parameters from request:%w", err) } - config, input, err := readConfig(modelFile, input, cm, o.Loader, o.Debug, o.Threads, o.ContextSize, o.F16) + config, input, err := mergeRequestWithConfig(modelFile, input, cm, o.Loader, o.Debug, o.Threads, o.ContextSize, o.F16) if err != nil { return fmt.Errorf("failed reading parameters from request:%w", err) } diff --git a/api/openai/embeddings.go b/api/openai/embeddings.go index 15e31e92..44feb373 100644 --- a/api/openai/embeddings.go +++ b/api/openai/embeddings.go @@ -18,12 +18,12 @@ import ( // https://platform.openai.com/docs/api-reference/embeddings func EmbeddingsEndpoint(cm *config.ConfigLoader, o *options.Option) func(c *fiber.Ctx) error { return func(c *fiber.Ctx) error { - model, input, err := readInput(c, o, true) + model, input, err := readRequest(c, o, true) if err != nil { return fmt.Errorf("failed reading parameters from request:%w", err) } - config, input, err := readConfig(model, input, cm, o.Loader, o.Debug, o.Threads, o.ContextSize, o.F16) + config, input, err := mergeRequestWithConfig(model, input, cm, o.Loader, o.Debug, o.Threads, o.ContextSize, o.F16) if err != nil { return fmt.Errorf("failed reading parameters from request:%w", err) } diff --git a/api/openai/image.go b/api/openai/image.go index 3e4bc349..07f028f0 100644 --- a/api/openai/image.go +++ b/api/openai/image.go @@ -61,7 +61,7 @@ func downloadFile(url string) (string, error) { */ func ImageEndpoint(cm *config.ConfigLoader, o *options.Option) func(c *fiber.Ctx) error { return func(c *fiber.Ctx) error { - m, input, err := readInput(c, o, false) + m, input, err := readRequest(c, o, false) if err != nil { return fmt.Errorf("failed reading parameters from request:%w", err) } @@ -71,7 +71,7 @@ func ImageEndpoint(cm *config.ConfigLoader, o *options.Option) func(c *fiber.Ctx } log.Debug().Msgf("Loading model: %+v", m) - config, input, err := readConfig(m, input, cm, o.Loader, o.Debug, 0, 0, false) + config, input, err := mergeRequestWithConfig(m, input, cm, o.Loader, o.Debug, 0, 0, false) if err != nil { return fmt.Errorf("failed reading parameters from request:%w", err) } diff --git a/api/openai/request.go b/api/openai/request.go index cc15fe40..382a930e 100644 --- a/api/openai/request.go +++ b/api/openai/request.go @@ -7,11 +7,10 @@ import ( "fmt" "io/ioutil" "net/http" - "os" - "path/filepath" "strings" config "github.com/go-skynet/LocalAI/api/config" + fiberContext "github.com/go-skynet/LocalAI/api/ctx" options "github.com/go-skynet/LocalAI/api/options" "github.com/go-skynet/LocalAI/api/schema" model "github.com/go-skynet/LocalAI/pkg/model" @@ -19,8 +18,7 @@ import ( "github.com/rs/zerolog/log" ) -func readInput(c *fiber.Ctx, o *options.Option, randomModel bool) (string, *schema.OpenAIRequest, error) { - loader := o.Loader +func readRequest(c *fiber.Ctx, o *options.Option, firstModel bool) (string, *schema.OpenAIRequest, error) { input := new(schema.OpenAIRequest) ctx, cancel := context.WithCancel(o.Context) input.Context = ctx @@ -30,38 +28,13 @@ func readInput(c *fiber.Ctx, o *options.Option, randomModel bool) (string, *sche return "", nil, fmt.Errorf("failed parsing request body: %w", err) } - modelFile := input.Model - - if c.Params("model") != "" { - modelFile = c.Params("model") - } - received, _ := json.Marshal(input) log.Debug().Msgf("Request received: %s", string(received)) - // Set model from bearer token, if available - bearer := strings.TrimLeft(c.Get("authorization"), "Bearer ") - bearerExists := bearer != "" && loader.ExistsInModelPath(bearer) + modelFile, err := fiberContext.ModelFromContext(c, o.Loader, input.Model, firstModel) - // If no model was specified, take the first available - if modelFile == "" && !bearerExists && randomModel { - models, _ := loader.ListModels() - if len(models) > 0 { - modelFile = models[0] - log.Debug().Msgf("No model specified, using: %s", modelFile) - } else { - log.Debug().Msgf("No model specified, returning error") - return "", nil, fmt.Errorf("no model specified") - } - } - - // If a model is found in bearer token takes precedence - if bearerExists { - log.Debug().Msgf("Using model from bearer token: %s", bearer) - modelFile = bearer - } - return modelFile, input, nil + return modelFile, input, err } // this function check if the string is an URL, if it's an URL downloads the image in memory @@ -95,7 +68,7 @@ func getBase64Image(s string) (string, error) { return "", fmt.Errorf("not valid string") } -func updateConfig(config *config.Config, input *schema.OpenAIRequest) { +func updateRequestConfig(config *config.Config, input *schema.OpenAIRequest) { if input.Echo { config.Echo = input.Echo } @@ -282,55 +255,11 @@ func updateConfig(config *config.Config, input *schema.OpenAIRequest) { } } -func readConfig(modelFile string, input *schema.OpenAIRequest, cm *config.ConfigLoader, loader *model.ModelLoader, debug bool, threads, ctx int, f16 bool) (*config.Config, *schema.OpenAIRequest, error) { - // Load a config file if present after the model name - modelConfig := filepath.Join(loader.ModelPath, modelFile+".yaml") - - var cfg *config.Config - - defaults := func() { - cfg = config.DefaultConfig(modelFile) - cfg.ContextSize = ctx - cfg.Threads = threads - cfg.F16 = f16 - cfg.Debug = debug - } - - cfgExisting, exists := cm.GetConfig(modelFile) - if !exists { - if _, err := os.Stat(modelConfig); err == nil { - if err := cm.LoadConfig(modelConfig); err != nil { - return nil, nil, fmt.Errorf("failed loading model config (%s) %s", modelConfig, err.Error()) - } - cfgExisting, exists = cm.GetConfig(modelFile) - if exists { - cfg = &cfgExisting - } else { - defaults() - } - } else { - defaults() - } - } else { - cfg = &cfgExisting - } +func mergeRequestWithConfig(modelFile string, input *schema.OpenAIRequest, cm *config.ConfigLoader, loader *model.ModelLoader, debug bool, threads, ctx int, f16 bool) (*config.Config, *schema.OpenAIRequest, error) { + cfg, err := config.Load(modelFile, loader.ModelPath, cm, debug, threads, ctx, f16) // Set the parameters for the language model prediction - updateConfig(cfg, input) + updateRequestConfig(cfg, input) - // Don't allow 0 as setting - if cfg.Threads == 0 { - if threads != 0 { - cfg.Threads = threads - } else { - cfg.Threads = 4 - } - } - - // Enforce debug flag if passed from CLI - if debug { - cfg.Debug = true - } - - return cfg, input, nil + return cfg, input, err } diff --git a/api/openai/transcription.go b/api/openai/transcription.go index 895c110f..668a2069 100644 --- a/api/openai/transcription.go +++ b/api/openai/transcription.go @@ -19,12 +19,12 @@ import ( // https://platform.openai.com/docs/api-reference/audio/create func TranscriptEndpoint(cm *config.ConfigLoader, o *options.Option) func(c *fiber.Ctx) error { return func(c *fiber.Ctx) error { - m, input, err := readInput(c, o, false) + m, input, err := readRequest(c, o, false) if err != nil { return fmt.Errorf("failed reading parameters from request:%w", err) } - config, input, err := readConfig(m, input, cm, o.Loader, o.Debug, o.Threads, o.ContextSize, o.F16) + config, input, err := mergeRequestWithConfig(m, input, cm, o.Loader, o.Debug, o.Threads, o.ContextSize, o.F16) if err != nil { return fmt.Errorf("failed reading parameters from request:%w", err) } diff --git a/backend/python/vall-e-x/install.sh b/backend/python/vall-e-x/install.sh index 2fe29d19..653eab7f 100644 --- a/backend/python/vall-e-x/install.sh +++ b/backend/python/vall-e-x/install.sh @@ -12,6 +12,9 @@ echo $CONDA_PREFIX git clone https://github.com/Plachtaa/VALL-E-X.git $CONDA_PREFIX/vall-e-x && pushd $CONDA_PREFIX/vall-e-x && git checkout -b build $SHA && pip install -r requirements.txt && popd +# Pin some dependencies (the upstream requirements is too much loose) +pip install torchaudio==2.2.0 + cp -rfv $CONDA_PREFIX/vall-e-x/* ./ if [ "$PIP_CACHE_PURGE" = true ] ; then diff --git a/docs/content/docs/features/GPU-acceleration.md b/docs/content/docs/features/GPU-acceleration.md index 9688e787..aa931f07 100644 --- a/docs/content/docs/features/GPU-acceleration.md +++ b/docs/content/docs/features/GPU-acceleration.md @@ -112,14 +112,24 @@ llama_init_from_file: kv self size = 512.00 MB ## Intel acceleration (sycl) -#### Requirements +### Requirements -Requirement: [Intel oneAPI Base Toolkit](https://software.intel.com/content/www/us/en/develop/tools/oneapi/base-toolkit/download.html) +If building from source, you need to install [Intel oneAPI Base Toolkit](https://software.intel.com/content/www/us/en/develop/tools/oneapi/base-toolkit/download.html) and have the Intel drivers available in the system. + +### Container images To use SYCL, use the images with the `sycl-f16` or `sycl-f32` tag, for example `{{< version >}}-sycl-f32-core`, `{{< version >}}-sycl-f16-ffmpeg-core`, ... The image list is on [quay](https://quay.io/repository/go-skynet/local-ai?tab=tags). +#### Example + +To run LocalAI with Docker and sycl starting `phi-2`, you can use the following command as an example: + +```bash +docker run -e DEBUG=true --privileged -ti -v $PWD/models:/build/models -p 8080:8080 -v /dev/dri:/dev/dri --rm quay.io/go-skynet/local-ai:master-sycl-f32-ffmpeg-core phi-2 +``` + ### Notes In addition to the commands to run LocalAI normally, you need to specify `--device /dev/dri` to docker, for example: @@ -128,3 +138,4 @@ In addition to the commands to run LocalAI normally, you need to specify `--devi docker run --rm -ti --device /dev/dri -p 8080:8080 -e DEBUG=true -e MODELS_PATH=/models -e THREADS=1 -v $PWD/models:/models quay.io/go-skynet/local-ai:{{< version >}}-sycl-f16-ffmpeg-core ``` +Note also that sycl does have a known issue to hang with `mmap: true`. You have to disable it in the model configuration if explicitly enabled. diff --git a/examples/kubernetes/deployment-intel-arc.yaml b/examples/kubernetes/deployment-intel-arc.yaml new file mode 100644 index 00000000..f77182bd --- /dev/null +++ b/examples/kubernetes/deployment-intel-arc.yaml @@ -0,0 +1,68 @@ +apiVersion: v1 +kind: Namespace +metadata: + name: local-ai +--- +apiVersion: v1 +kind: PersistentVolumeClaim +metadata: + name: models-pvc + namespace: local-ai +spec: + accessModes: + - ReadWriteOnce + resources: + requests: + storage: 20Gi +--- +apiVersion: apps/v1 +kind: Deployment +metadata: + name: local-ai + namespace: local-ai + labels: + app: local-ai +spec: + selector: + matchLabels: + app: local-ai + replicas: 1 + template: + metadata: + labels: + app: local-ai + name: local-ai + spec: + containers: + - args: + - phi-2 + env: + - name: DEBUG + value: "true" + name: local-ai + image: quay.io/go-skynet/local-ai:master-sycl-f32-ffmpeg-core + imagePullPolicy: Always + resources: + limits: + gpu.intel.com/i915: 1 + volumeMounts: + - name: models-volume + mountPath: /build/models + volumes: + - name: models-volume + persistentVolumeClaim: + claimName: models-pvc +--- +apiVersion: v1 +kind: Service +metadata: + name: local-ai + namespace: local-ai +spec: + selector: + app: local-ai + type: LoadBalancer + ports: + - protocol: TCP + port: 8080 + targetPort: 8080 \ No newline at end of file diff --git a/examples/kubernetes/deployment.yaml b/examples/kubernetes/deployment.yaml new file mode 100644 index 00000000..601fffdb --- /dev/null +++ b/examples/kubernetes/deployment.yaml @@ -0,0 +1,65 @@ +apiVersion: v1 +kind: Namespace +metadata: + name: local-ai +--- +apiVersion: v1 +kind: PersistentVolumeClaim +metadata: + name: models-pvc + namespace: local-ai +spec: + accessModes: + - ReadWriteOnce + resources: + requests: + storage: 1Gi +--- +apiVersion: apps/v1 +kind: Deployment +metadata: + name: local-ai + namespace: local-ai + labels: + app: local-ai +spec: + selector: + matchLabels: + app: local-ai + replicas: 1 + template: + metadata: + labels: + app: local-ai + name: local-ai + spec: + containers: + - args: + - phi-2 + env: + - name: DEBUG + value: "true" + name: local-ai + image: quay.io/go-skynet/local-ai:master-ffmpeg-core + imagePullPolicy: IfNotPresent + volumeMounts: + - name: models-volume + mountPath: /build/models + volumes: + - name: models-volume + persistentVolumeClaim: + claimName: models-pvc +--- +apiVersion: v1 +kind: Service +metadata: + name: local-ai + namespace: local-ai +spec: + selector: + app: local-ai + type: LoadBalancer + ports: + - protocol: TCP + port: 8080 + targetPort: 8080 \ No newline at end of file diff --git a/main.go b/main.go index d2209285..edf70328 100644 --- a/main.go +++ b/main.go @@ -404,7 +404,7 @@ For a list of compatible model, check out: https://localai.io/model-compatibilit defer opts.Loader.StopAllGRPC() - filePath, _, err := backend.ModelTTS(backendOption, text, modelOption, opts.Loader, opts) + filePath, _, err := backend.ModelTTS(backendOption, text, modelOption, opts.Loader, opts, config.Config{}) if err != nil { return err }