mirror of
https://github.com/mudler/LocalAI.git
synced 2025-03-11 06:54:11 +00:00
feat(tts): respect YAMLs config file, add sycl docs/examples (#1692)
* feat(refactor): refactor config and input reading * feat(tts): read config file for TTS * examples(kubernetes): Add simple deployment example * examples(kubernetes): Add simple deployment for intel arc * docs(sycl): add sycl example * feat(tts): do not always pick a first model * fixups to run vall-e-x on container * Correctly resolve backend
This commit is contained in:
parent
081bd07fd1
commit
53dbe36f32
@ -39,7 +39,7 @@ RUN if [ "${BUILD_TYPE}" = "cublas" ]; then \
|
|||||||
dpkg -i cuda-keyring_1.1-1_all.deb && \
|
dpkg -i cuda-keyring_1.1-1_all.deb && \
|
||||||
rm -f cuda-keyring_1.1-1_all.deb && \
|
rm -f cuda-keyring_1.1-1_all.deb && \
|
||||||
apt-get update && \
|
apt-get update && \
|
||||||
apt-get install -y cuda-nvcc-${CUDA_MAJOR_VERSION}-${CUDA_MINOR_VERSION} libcublas-dev-${CUDA_MAJOR_VERSION}-${CUDA_MINOR_VERSION} libcusparse-dev-${CUDA_MAJOR_VERSION}-${CUDA_MINOR_VERSION} libcusolver-dev-${CUDA_MAJOR_VERSION}-${CUDA_MINOR_VERSION} && apt-get clean \
|
apt-get install -y cuda-nvcc-${CUDA_MAJOR_VERSION}-${CUDA_MINOR_VERSION} libcurand-dev-${CUDA_MAJOR_VERSION}-${CUDA_MINOR_VERSION} libcublas-dev-${CUDA_MAJOR_VERSION}-${CUDA_MINOR_VERSION} libcusparse-dev-${CUDA_MAJOR_VERSION}-${CUDA_MINOR_VERSION} libcusolver-dev-${CUDA_MAJOR_VERSION}-${CUDA_MINOR_VERSION} && apt-get clean \
|
||||||
; fi
|
; fi
|
||||||
|
|
||||||
ENV PATH /usr/local/cuda/bin:${PATH}
|
ENV PATH /usr/local/cuda/bin:${PATH}
|
||||||
|
@ -7,6 +7,7 @@ import (
|
|||||||
"path/filepath"
|
"path/filepath"
|
||||||
|
|
||||||
api_config "github.com/go-skynet/LocalAI/api/config"
|
api_config "github.com/go-skynet/LocalAI/api/config"
|
||||||
|
config "github.com/go-skynet/LocalAI/api/config"
|
||||||
"github.com/go-skynet/LocalAI/api/options"
|
"github.com/go-skynet/LocalAI/api/options"
|
||||||
"github.com/go-skynet/LocalAI/pkg/grpc/proto"
|
"github.com/go-skynet/LocalAI/pkg/grpc/proto"
|
||||||
model "github.com/go-skynet/LocalAI/pkg/model"
|
model "github.com/go-skynet/LocalAI/pkg/model"
|
||||||
@ -29,16 +30,20 @@ func generateUniqueFileName(dir, baseName, ext string) string {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
func ModelTTS(backend, text, modelFile string, loader *model.ModelLoader, o *options.Option) (string, *proto.Result, error) {
|
func ModelTTS(backend, text, modelFile string, loader *model.ModelLoader, o *options.Option, c config.Config) (string, *proto.Result, error) {
|
||||||
bb := backend
|
bb := backend
|
||||||
if bb == "" {
|
if bb == "" {
|
||||||
bb = model.PiperBackend
|
bb = model.PiperBackend
|
||||||
}
|
}
|
||||||
|
|
||||||
|
grpcOpts := gRPCModelOpts(c)
|
||||||
|
|
||||||
opts := modelOpts(api_config.Config{}, o, []model.Option{
|
opts := modelOpts(api_config.Config{}, o, []model.Option{
|
||||||
model.WithBackendString(bb),
|
model.WithBackendString(bb),
|
||||||
model.WithModel(modelFile),
|
model.WithModel(modelFile),
|
||||||
model.WithContext(o.Context),
|
model.WithContext(o.Context),
|
||||||
model.WithAssetDir(o.AssetsDestination),
|
model.WithAssetDir(o.AssetsDestination),
|
||||||
|
model.WithLoadGRPCLoadModelOpts(grpcOpts),
|
||||||
})
|
})
|
||||||
piperModel, err := o.Loader.BackendLoader(opts...)
|
piperModel, err := o.Loader.BackendLoader(opts...)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
|
@ -183,6 +183,60 @@ func (c *Config) FunctionToCall() string {
|
|||||||
return c.functionCallNameString
|
return c.functionCallNameString
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Load a config file for a model
|
||||||
|
func Load(modelName, modelPath string, cm *ConfigLoader, debug bool, threads, ctx int, f16 bool) (*Config, error) {
|
||||||
|
// Load a config file if present after the model name
|
||||||
|
modelConfig := filepath.Join(modelPath, modelName+".yaml")
|
||||||
|
|
||||||
|
var cfg *Config
|
||||||
|
|
||||||
|
defaults := func() {
|
||||||
|
cfg = DefaultConfig(modelName)
|
||||||
|
cfg.ContextSize = ctx
|
||||||
|
cfg.Threads = threads
|
||||||
|
cfg.F16 = f16
|
||||||
|
cfg.Debug = debug
|
||||||
|
}
|
||||||
|
|
||||||
|
cfgExisting, exists := cm.GetConfig(modelName)
|
||||||
|
if !exists {
|
||||||
|
if _, err := os.Stat(modelConfig); err == nil {
|
||||||
|
if err := cm.LoadConfig(modelConfig); err != nil {
|
||||||
|
return nil, fmt.Errorf("failed loading model config (%s) %s", modelConfig, err.Error())
|
||||||
|
}
|
||||||
|
cfgExisting, exists = cm.GetConfig(modelName)
|
||||||
|
if exists {
|
||||||
|
cfg = &cfgExisting
|
||||||
|
} else {
|
||||||
|
defaults()
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
defaults()
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
cfg = &cfgExisting
|
||||||
|
}
|
||||||
|
|
||||||
|
// Set the parameters for the language model prediction
|
||||||
|
//updateConfig(cfg, input)
|
||||||
|
|
||||||
|
// Don't allow 0 as setting
|
||||||
|
if cfg.Threads == 0 {
|
||||||
|
if threads != 0 {
|
||||||
|
cfg.Threads = threads
|
||||||
|
} else {
|
||||||
|
cfg.Threads = 4
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Enforce debug flag if passed from CLI
|
||||||
|
if debug {
|
||||||
|
cfg.Debug = true
|
||||||
|
}
|
||||||
|
|
||||||
|
return cfg, nil
|
||||||
|
}
|
||||||
|
|
||||||
func defaultPredictOptions(modelFile string) PredictionOptions {
|
func defaultPredictOptions(modelFile string) PredictionOptions {
|
||||||
return PredictionOptions{
|
return PredictionOptions{
|
||||||
TopP: 0.7,
|
TopP: 0.7,
|
||||||
|
43
api/ctx/fiber.go
Normal file
43
api/ctx/fiber.go
Normal file
@ -0,0 +1,43 @@
|
|||||||
|
package fiberContext
|
||||||
|
|
||||||
|
import (
|
||||||
|
"fmt"
|
||||||
|
"strings"
|
||||||
|
|
||||||
|
"github.com/go-skynet/LocalAI/pkg/model"
|
||||||
|
"github.com/gofiber/fiber/v2"
|
||||||
|
"github.com/rs/zerolog/log"
|
||||||
|
)
|
||||||
|
|
||||||
|
// ModelFromContext returns the model from the context
|
||||||
|
// If no model is specified, it will take the first available
|
||||||
|
// Takes a model string as input which should be the one received from the user request.
|
||||||
|
// It returns the model name resolved from the context and an error if any.
|
||||||
|
func ModelFromContext(ctx *fiber.Ctx, loader *model.ModelLoader, modelInput string, firstModel bool) (string, error) {
|
||||||
|
if ctx.Params("model") != "" {
|
||||||
|
modelInput = ctx.Params("model")
|
||||||
|
}
|
||||||
|
|
||||||
|
// Set model from bearer token, if available
|
||||||
|
bearer := strings.TrimLeft(ctx.Get("authorization"), "Bearer ")
|
||||||
|
bearerExists := bearer != "" && loader.ExistsInModelPath(bearer)
|
||||||
|
|
||||||
|
// If no model was specified, take the first available
|
||||||
|
if modelInput == "" && !bearerExists && firstModel {
|
||||||
|
models, _ := loader.ListModels()
|
||||||
|
if len(models) > 0 {
|
||||||
|
modelInput = models[0]
|
||||||
|
log.Debug().Msgf("No model specified, using: %s", modelInput)
|
||||||
|
} else {
|
||||||
|
log.Debug().Msgf("No model specified, returning error")
|
||||||
|
return "", fmt.Errorf("no model specified")
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// If a model is found in bearer token takes precedence
|
||||||
|
if bearerExists {
|
||||||
|
log.Debug().Msgf("Using model from bearer token: %s", bearer)
|
||||||
|
modelInput = bearer
|
||||||
|
}
|
||||||
|
return modelInput, nil
|
||||||
|
}
|
@ -3,6 +3,8 @@ package localai
|
|||||||
import (
|
import (
|
||||||
"github.com/go-skynet/LocalAI/api/backend"
|
"github.com/go-skynet/LocalAI/api/backend"
|
||||||
config "github.com/go-skynet/LocalAI/api/config"
|
config "github.com/go-skynet/LocalAI/api/config"
|
||||||
|
fiberContext "github.com/go-skynet/LocalAI/api/ctx"
|
||||||
|
"github.com/rs/zerolog/log"
|
||||||
|
|
||||||
"github.com/go-skynet/LocalAI/api/options"
|
"github.com/go-skynet/LocalAI/api/options"
|
||||||
"github.com/gofiber/fiber/v2"
|
"github.com/gofiber/fiber/v2"
|
||||||
@ -18,12 +20,31 @@ func TTSEndpoint(cm *config.ConfigLoader, o *options.Option) func(c *fiber.Ctx)
|
|||||||
return func(c *fiber.Ctx) error {
|
return func(c *fiber.Ctx) error {
|
||||||
|
|
||||||
input := new(TTSRequest)
|
input := new(TTSRequest)
|
||||||
|
|
||||||
// Get input data from the request body
|
// Get input data from the request body
|
||||||
if err := c.BodyParser(input); err != nil {
|
if err := c.BodyParser(input); err != nil {
|
||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
|
|
||||||
filePath, _, err := backend.ModelTTS(input.Backend, input.Input, input.Model, o.Loader, o)
|
modelFile, err := fiberContext.ModelFromContext(c, o.Loader, input.Model, false)
|
||||||
|
if err != nil {
|
||||||
|
modelFile = input.Model
|
||||||
|
log.Warn().Msgf("Model not found in context: %s", input.Model)
|
||||||
|
}
|
||||||
|
cfg, err := config.Load(modelFile, o.Loader.ModelPath, cm, false, 0, 0, false)
|
||||||
|
if err != nil {
|
||||||
|
modelFile = input.Model
|
||||||
|
log.Warn().Msgf("Model not found in context: %s", input.Model)
|
||||||
|
} else {
|
||||||
|
modelFile = cfg.Model
|
||||||
|
}
|
||||||
|
log.Debug().Msgf("Request for model: %s", modelFile)
|
||||||
|
|
||||||
|
if input.Backend != "" {
|
||||||
|
cfg.Backend = input.Input
|
||||||
|
}
|
||||||
|
|
||||||
|
filePath, _, err := backend.ModelTTS(cfg.Backend, input.Input, modelFile, o.Loader, o, *cfg)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
|
@ -58,12 +58,12 @@ func ChatEndpoint(cm *config.ConfigLoader, o *options.Option) func(c *fiber.Ctx)
|
|||||||
return func(c *fiber.Ctx) error {
|
return func(c *fiber.Ctx) error {
|
||||||
processFunctions := false
|
processFunctions := false
|
||||||
funcs := grammar.Functions{}
|
funcs := grammar.Functions{}
|
||||||
modelFile, input, err := readInput(c, o, true)
|
modelFile, input, err := readRequest(c, o, true)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return fmt.Errorf("failed reading parameters from request:%w", err)
|
return fmt.Errorf("failed reading parameters from request:%w", err)
|
||||||
}
|
}
|
||||||
|
|
||||||
config, input, err := readConfig(modelFile, input, cm, o.Loader, o.Debug, o.Threads, o.ContextSize, o.F16)
|
config, input, err := mergeRequestWithConfig(modelFile, input, cm, o.Loader, o.Debug, o.Threads, o.ContextSize, o.F16)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return fmt.Errorf("failed reading parameters from request:%w", err)
|
return fmt.Errorf("failed reading parameters from request:%w", err)
|
||||||
}
|
}
|
||||||
|
@ -53,14 +53,14 @@ func CompletionEndpoint(cm *config.ConfigLoader, o *options.Option) func(c *fibe
|
|||||||
}
|
}
|
||||||
|
|
||||||
return func(c *fiber.Ctx) error {
|
return func(c *fiber.Ctx) error {
|
||||||
modelFile, input, err := readInput(c, o, true)
|
modelFile, input, err := readRequest(c, o, true)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return fmt.Errorf("failed reading parameters from request:%w", err)
|
return fmt.Errorf("failed reading parameters from request:%w", err)
|
||||||
}
|
}
|
||||||
|
|
||||||
log.Debug().Msgf("`input`: %+v", input)
|
log.Debug().Msgf("`input`: %+v", input)
|
||||||
|
|
||||||
config, input, err := readConfig(modelFile, input, cm, o.Loader, o.Debug, o.Threads, o.ContextSize, o.F16)
|
config, input, err := mergeRequestWithConfig(modelFile, input, cm, o.Loader, o.Debug, o.Threads, o.ContextSize, o.F16)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return fmt.Errorf("failed reading parameters from request:%w", err)
|
return fmt.Errorf("failed reading parameters from request:%w", err)
|
||||||
}
|
}
|
||||||
|
@ -18,12 +18,12 @@ import (
|
|||||||
|
|
||||||
func EditEndpoint(cm *config.ConfigLoader, o *options.Option) func(c *fiber.Ctx) error {
|
func EditEndpoint(cm *config.ConfigLoader, o *options.Option) func(c *fiber.Ctx) error {
|
||||||
return func(c *fiber.Ctx) error {
|
return func(c *fiber.Ctx) error {
|
||||||
modelFile, input, err := readInput(c, o, true)
|
modelFile, input, err := readRequest(c, o, true)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return fmt.Errorf("failed reading parameters from request:%w", err)
|
return fmt.Errorf("failed reading parameters from request:%w", err)
|
||||||
}
|
}
|
||||||
|
|
||||||
config, input, err := readConfig(modelFile, input, cm, o.Loader, o.Debug, o.Threads, o.ContextSize, o.F16)
|
config, input, err := mergeRequestWithConfig(modelFile, input, cm, o.Loader, o.Debug, o.Threads, o.ContextSize, o.F16)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return fmt.Errorf("failed reading parameters from request:%w", err)
|
return fmt.Errorf("failed reading parameters from request:%w", err)
|
||||||
}
|
}
|
||||||
|
@ -18,12 +18,12 @@ import (
|
|||||||
// https://platform.openai.com/docs/api-reference/embeddings
|
// https://platform.openai.com/docs/api-reference/embeddings
|
||||||
func EmbeddingsEndpoint(cm *config.ConfigLoader, o *options.Option) func(c *fiber.Ctx) error {
|
func EmbeddingsEndpoint(cm *config.ConfigLoader, o *options.Option) func(c *fiber.Ctx) error {
|
||||||
return func(c *fiber.Ctx) error {
|
return func(c *fiber.Ctx) error {
|
||||||
model, input, err := readInput(c, o, true)
|
model, input, err := readRequest(c, o, true)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return fmt.Errorf("failed reading parameters from request:%w", err)
|
return fmt.Errorf("failed reading parameters from request:%w", err)
|
||||||
}
|
}
|
||||||
|
|
||||||
config, input, err := readConfig(model, input, cm, o.Loader, o.Debug, o.Threads, o.ContextSize, o.F16)
|
config, input, err := mergeRequestWithConfig(model, input, cm, o.Loader, o.Debug, o.Threads, o.ContextSize, o.F16)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return fmt.Errorf("failed reading parameters from request:%w", err)
|
return fmt.Errorf("failed reading parameters from request:%w", err)
|
||||||
}
|
}
|
||||||
|
@ -61,7 +61,7 @@ func downloadFile(url string) (string, error) {
|
|||||||
*/
|
*/
|
||||||
func ImageEndpoint(cm *config.ConfigLoader, o *options.Option) func(c *fiber.Ctx) error {
|
func ImageEndpoint(cm *config.ConfigLoader, o *options.Option) func(c *fiber.Ctx) error {
|
||||||
return func(c *fiber.Ctx) error {
|
return func(c *fiber.Ctx) error {
|
||||||
m, input, err := readInput(c, o, false)
|
m, input, err := readRequest(c, o, false)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return fmt.Errorf("failed reading parameters from request:%w", err)
|
return fmt.Errorf("failed reading parameters from request:%w", err)
|
||||||
}
|
}
|
||||||
@ -71,7 +71,7 @@ func ImageEndpoint(cm *config.ConfigLoader, o *options.Option) func(c *fiber.Ctx
|
|||||||
}
|
}
|
||||||
log.Debug().Msgf("Loading model: %+v", m)
|
log.Debug().Msgf("Loading model: %+v", m)
|
||||||
|
|
||||||
config, input, err := readConfig(m, input, cm, o.Loader, o.Debug, 0, 0, false)
|
config, input, err := mergeRequestWithConfig(m, input, cm, o.Loader, o.Debug, 0, 0, false)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return fmt.Errorf("failed reading parameters from request:%w", err)
|
return fmt.Errorf("failed reading parameters from request:%w", err)
|
||||||
}
|
}
|
||||||
|
@ -7,11 +7,10 @@ import (
|
|||||||
"fmt"
|
"fmt"
|
||||||
"io/ioutil"
|
"io/ioutil"
|
||||||
"net/http"
|
"net/http"
|
||||||
"os"
|
|
||||||
"path/filepath"
|
|
||||||
"strings"
|
"strings"
|
||||||
|
|
||||||
config "github.com/go-skynet/LocalAI/api/config"
|
config "github.com/go-skynet/LocalAI/api/config"
|
||||||
|
fiberContext "github.com/go-skynet/LocalAI/api/ctx"
|
||||||
options "github.com/go-skynet/LocalAI/api/options"
|
options "github.com/go-skynet/LocalAI/api/options"
|
||||||
"github.com/go-skynet/LocalAI/api/schema"
|
"github.com/go-skynet/LocalAI/api/schema"
|
||||||
model "github.com/go-skynet/LocalAI/pkg/model"
|
model "github.com/go-skynet/LocalAI/pkg/model"
|
||||||
@ -19,8 +18,7 @@ import (
|
|||||||
"github.com/rs/zerolog/log"
|
"github.com/rs/zerolog/log"
|
||||||
)
|
)
|
||||||
|
|
||||||
func readInput(c *fiber.Ctx, o *options.Option, randomModel bool) (string, *schema.OpenAIRequest, error) {
|
func readRequest(c *fiber.Ctx, o *options.Option, firstModel bool) (string, *schema.OpenAIRequest, error) {
|
||||||
loader := o.Loader
|
|
||||||
input := new(schema.OpenAIRequest)
|
input := new(schema.OpenAIRequest)
|
||||||
ctx, cancel := context.WithCancel(o.Context)
|
ctx, cancel := context.WithCancel(o.Context)
|
||||||
input.Context = ctx
|
input.Context = ctx
|
||||||
@ -30,38 +28,13 @@ func readInput(c *fiber.Ctx, o *options.Option, randomModel bool) (string, *sche
|
|||||||
return "", nil, fmt.Errorf("failed parsing request body: %w", err)
|
return "", nil, fmt.Errorf("failed parsing request body: %w", err)
|
||||||
}
|
}
|
||||||
|
|
||||||
modelFile := input.Model
|
|
||||||
|
|
||||||
if c.Params("model") != "" {
|
|
||||||
modelFile = c.Params("model")
|
|
||||||
}
|
|
||||||
|
|
||||||
received, _ := json.Marshal(input)
|
received, _ := json.Marshal(input)
|
||||||
|
|
||||||
log.Debug().Msgf("Request received: %s", string(received))
|
log.Debug().Msgf("Request received: %s", string(received))
|
||||||
|
|
||||||
// Set model from bearer token, if available
|
modelFile, err := fiberContext.ModelFromContext(c, o.Loader, input.Model, firstModel)
|
||||||
bearer := strings.TrimLeft(c.Get("authorization"), "Bearer ")
|
|
||||||
bearerExists := bearer != "" && loader.ExistsInModelPath(bearer)
|
|
||||||
|
|
||||||
// If no model was specified, take the first available
|
return modelFile, input, err
|
||||||
if modelFile == "" && !bearerExists && randomModel {
|
|
||||||
models, _ := loader.ListModels()
|
|
||||||
if len(models) > 0 {
|
|
||||||
modelFile = models[0]
|
|
||||||
log.Debug().Msgf("No model specified, using: %s", modelFile)
|
|
||||||
} else {
|
|
||||||
log.Debug().Msgf("No model specified, returning error")
|
|
||||||
return "", nil, fmt.Errorf("no model specified")
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// If a model is found in bearer token takes precedence
|
|
||||||
if bearerExists {
|
|
||||||
log.Debug().Msgf("Using model from bearer token: %s", bearer)
|
|
||||||
modelFile = bearer
|
|
||||||
}
|
|
||||||
return modelFile, input, nil
|
|
||||||
}
|
}
|
||||||
|
|
||||||
// this function check if the string is an URL, if it's an URL downloads the image in memory
|
// this function check if the string is an URL, if it's an URL downloads the image in memory
|
||||||
@ -95,7 +68,7 @@ func getBase64Image(s string) (string, error) {
|
|||||||
return "", fmt.Errorf("not valid string")
|
return "", fmt.Errorf("not valid string")
|
||||||
}
|
}
|
||||||
|
|
||||||
func updateConfig(config *config.Config, input *schema.OpenAIRequest) {
|
func updateRequestConfig(config *config.Config, input *schema.OpenAIRequest) {
|
||||||
if input.Echo {
|
if input.Echo {
|
||||||
config.Echo = input.Echo
|
config.Echo = input.Echo
|
||||||
}
|
}
|
||||||
@ -282,55 +255,11 @@ func updateConfig(config *config.Config, input *schema.OpenAIRequest) {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
func readConfig(modelFile string, input *schema.OpenAIRequest, cm *config.ConfigLoader, loader *model.ModelLoader, debug bool, threads, ctx int, f16 bool) (*config.Config, *schema.OpenAIRequest, error) {
|
func mergeRequestWithConfig(modelFile string, input *schema.OpenAIRequest, cm *config.ConfigLoader, loader *model.ModelLoader, debug bool, threads, ctx int, f16 bool) (*config.Config, *schema.OpenAIRequest, error) {
|
||||||
// Load a config file if present after the model name
|
cfg, err := config.Load(modelFile, loader.ModelPath, cm, debug, threads, ctx, f16)
|
||||||
modelConfig := filepath.Join(loader.ModelPath, modelFile+".yaml")
|
|
||||||
|
|
||||||
var cfg *config.Config
|
|
||||||
|
|
||||||
defaults := func() {
|
|
||||||
cfg = config.DefaultConfig(modelFile)
|
|
||||||
cfg.ContextSize = ctx
|
|
||||||
cfg.Threads = threads
|
|
||||||
cfg.F16 = f16
|
|
||||||
cfg.Debug = debug
|
|
||||||
}
|
|
||||||
|
|
||||||
cfgExisting, exists := cm.GetConfig(modelFile)
|
|
||||||
if !exists {
|
|
||||||
if _, err := os.Stat(modelConfig); err == nil {
|
|
||||||
if err := cm.LoadConfig(modelConfig); err != nil {
|
|
||||||
return nil, nil, fmt.Errorf("failed loading model config (%s) %s", modelConfig, err.Error())
|
|
||||||
}
|
|
||||||
cfgExisting, exists = cm.GetConfig(modelFile)
|
|
||||||
if exists {
|
|
||||||
cfg = &cfgExisting
|
|
||||||
} else {
|
|
||||||
defaults()
|
|
||||||
}
|
|
||||||
} else {
|
|
||||||
defaults()
|
|
||||||
}
|
|
||||||
} else {
|
|
||||||
cfg = &cfgExisting
|
|
||||||
}
|
|
||||||
|
|
||||||
// Set the parameters for the language model prediction
|
// Set the parameters for the language model prediction
|
||||||
updateConfig(cfg, input)
|
updateRequestConfig(cfg, input)
|
||||||
|
|
||||||
// Don't allow 0 as setting
|
return cfg, input, err
|
||||||
if cfg.Threads == 0 {
|
|
||||||
if threads != 0 {
|
|
||||||
cfg.Threads = threads
|
|
||||||
} else {
|
|
||||||
cfg.Threads = 4
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// Enforce debug flag if passed from CLI
|
|
||||||
if debug {
|
|
||||||
cfg.Debug = true
|
|
||||||
}
|
|
||||||
|
|
||||||
return cfg, input, nil
|
|
||||||
}
|
}
|
||||||
|
@ -19,12 +19,12 @@ import (
|
|||||||
// https://platform.openai.com/docs/api-reference/audio/create
|
// https://platform.openai.com/docs/api-reference/audio/create
|
||||||
func TranscriptEndpoint(cm *config.ConfigLoader, o *options.Option) func(c *fiber.Ctx) error {
|
func TranscriptEndpoint(cm *config.ConfigLoader, o *options.Option) func(c *fiber.Ctx) error {
|
||||||
return func(c *fiber.Ctx) error {
|
return func(c *fiber.Ctx) error {
|
||||||
m, input, err := readInput(c, o, false)
|
m, input, err := readRequest(c, o, false)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return fmt.Errorf("failed reading parameters from request:%w", err)
|
return fmt.Errorf("failed reading parameters from request:%w", err)
|
||||||
}
|
}
|
||||||
|
|
||||||
config, input, err := readConfig(m, input, cm, o.Loader, o.Debug, o.Threads, o.ContextSize, o.F16)
|
config, input, err := mergeRequestWithConfig(m, input, cm, o.Loader, o.Debug, o.Threads, o.ContextSize, o.F16)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return fmt.Errorf("failed reading parameters from request:%w", err)
|
return fmt.Errorf("failed reading parameters from request:%w", err)
|
||||||
}
|
}
|
||||||
|
@ -12,6 +12,9 @@ echo $CONDA_PREFIX
|
|||||||
|
|
||||||
git clone https://github.com/Plachtaa/VALL-E-X.git $CONDA_PREFIX/vall-e-x && pushd $CONDA_PREFIX/vall-e-x && git checkout -b build $SHA && pip install -r requirements.txt && popd
|
git clone https://github.com/Plachtaa/VALL-E-X.git $CONDA_PREFIX/vall-e-x && pushd $CONDA_PREFIX/vall-e-x && git checkout -b build $SHA && pip install -r requirements.txt && popd
|
||||||
|
|
||||||
|
# Pin some dependencies (the upstream requirements is too much loose)
|
||||||
|
pip install torchaudio==2.2.0
|
||||||
|
|
||||||
cp -rfv $CONDA_PREFIX/vall-e-x/* ./
|
cp -rfv $CONDA_PREFIX/vall-e-x/* ./
|
||||||
|
|
||||||
if [ "$PIP_CACHE_PURGE" = true ] ; then
|
if [ "$PIP_CACHE_PURGE" = true ] ; then
|
||||||
|
@ -112,14 +112,24 @@ llama_init_from_file: kv self size = 512.00 MB
|
|||||||
|
|
||||||
## Intel acceleration (sycl)
|
## Intel acceleration (sycl)
|
||||||
|
|
||||||
#### Requirements
|
### Requirements
|
||||||
|
|
||||||
Requirement: [Intel oneAPI Base Toolkit](https://software.intel.com/content/www/us/en/develop/tools/oneapi/base-toolkit/download.html)
|
If building from source, you need to install [Intel oneAPI Base Toolkit](https://software.intel.com/content/www/us/en/develop/tools/oneapi/base-toolkit/download.html) and have the Intel drivers available in the system.
|
||||||
|
|
||||||
|
### Container images
|
||||||
|
|
||||||
To use SYCL, use the images with the `sycl-f16` or `sycl-f32` tag, for example `{{< version >}}-sycl-f32-core`, `{{< version >}}-sycl-f16-ffmpeg-core`, ...
|
To use SYCL, use the images with the `sycl-f16` or `sycl-f32` tag, for example `{{< version >}}-sycl-f32-core`, `{{< version >}}-sycl-f16-ffmpeg-core`, ...
|
||||||
|
|
||||||
The image list is on [quay](https://quay.io/repository/go-skynet/local-ai?tab=tags).
|
The image list is on [quay](https://quay.io/repository/go-skynet/local-ai?tab=tags).
|
||||||
|
|
||||||
|
#### Example
|
||||||
|
|
||||||
|
To run LocalAI with Docker and sycl starting `phi-2`, you can use the following command as an example:
|
||||||
|
|
||||||
|
```bash
|
||||||
|
docker run -e DEBUG=true --privileged -ti -v $PWD/models:/build/models -p 8080:8080 -v /dev/dri:/dev/dri --rm quay.io/go-skynet/local-ai:master-sycl-f32-ffmpeg-core phi-2
|
||||||
|
```
|
||||||
|
|
||||||
### Notes
|
### Notes
|
||||||
|
|
||||||
In addition to the commands to run LocalAI normally, you need to specify `--device /dev/dri` to docker, for example:
|
In addition to the commands to run LocalAI normally, you need to specify `--device /dev/dri` to docker, for example:
|
||||||
@ -128,3 +138,4 @@ In addition to the commands to run LocalAI normally, you need to specify `--devi
|
|||||||
docker run --rm -ti --device /dev/dri -p 8080:8080 -e DEBUG=true -e MODELS_PATH=/models -e THREADS=1 -v $PWD/models:/models quay.io/go-skynet/local-ai:{{< version >}}-sycl-f16-ffmpeg-core
|
docker run --rm -ti --device /dev/dri -p 8080:8080 -e DEBUG=true -e MODELS_PATH=/models -e THREADS=1 -v $PWD/models:/models quay.io/go-skynet/local-ai:{{< version >}}-sycl-f16-ffmpeg-core
|
||||||
```
|
```
|
||||||
|
|
||||||
|
Note also that sycl does have a known issue to hang with `mmap: true`. You have to disable it in the model configuration if explicitly enabled.
|
||||||
|
68
examples/kubernetes/deployment-intel-arc.yaml
Normal file
68
examples/kubernetes/deployment-intel-arc.yaml
Normal file
@ -0,0 +1,68 @@
|
|||||||
|
apiVersion: v1
|
||||||
|
kind: Namespace
|
||||||
|
metadata:
|
||||||
|
name: local-ai
|
||||||
|
---
|
||||||
|
apiVersion: v1
|
||||||
|
kind: PersistentVolumeClaim
|
||||||
|
metadata:
|
||||||
|
name: models-pvc
|
||||||
|
namespace: local-ai
|
||||||
|
spec:
|
||||||
|
accessModes:
|
||||||
|
- ReadWriteOnce
|
||||||
|
resources:
|
||||||
|
requests:
|
||||||
|
storage: 20Gi
|
||||||
|
---
|
||||||
|
apiVersion: apps/v1
|
||||||
|
kind: Deployment
|
||||||
|
metadata:
|
||||||
|
name: local-ai
|
||||||
|
namespace: local-ai
|
||||||
|
labels:
|
||||||
|
app: local-ai
|
||||||
|
spec:
|
||||||
|
selector:
|
||||||
|
matchLabels:
|
||||||
|
app: local-ai
|
||||||
|
replicas: 1
|
||||||
|
template:
|
||||||
|
metadata:
|
||||||
|
labels:
|
||||||
|
app: local-ai
|
||||||
|
name: local-ai
|
||||||
|
spec:
|
||||||
|
containers:
|
||||||
|
- args:
|
||||||
|
- phi-2
|
||||||
|
env:
|
||||||
|
- name: DEBUG
|
||||||
|
value: "true"
|
||||||
|
name: local-ai
|
||||||
|
image: quay.io/go-skynet/local-ai:master-sycl-f32-ffmpeg-core
|
||||||
|
imagePullPolicy: Always
|
||||||
|
resources:
|
||||||
|
limits:
|
||||||
|
gpu.intel.com/i915: 1
|
||||||
|
volumeMounts:
|
||||||
|
- name: models-volume
|
||||||
|
mountPath: /build/models
|
||||||
|
volumes:
|
||||||
|
- name: models-volume
|
||||||
|
persistentVolumeClaim:
|
||||||
|
claimName: models-pvc
|
||||||
|
---
|
||||||
|
apiVersion: v1
|
||||||
|
kind: Service
|
||||||
|
metadata:
|
||||||
|
name: local-ai
|
||||||
|
namespace: local-ai
|
||||||
|
spec:
|
||||||
|
selector:
|
||||||
|
app: local-ai
|
||||||
|
type: LoadBalancer
|
||||||
|
ports:
|
||||||
|
- protocol: TCP
|
||||||
|
port: 8080
|
||||||
|
targetPort: 8080
|
65
examples/kubernetes/deployment.yaml
Normal file
65
examples/kubernetes/deployment.yaml
Normal file
@ -0,0 +1,65 @@
|
|||||||
|
apiVersion: v1
|
||||||
|
kind: Namespace
|
||||||
|
metadata:
|
||||||
|
name: local-ai
|
||||||
|
---
|
||||||
|
apiVersion: v1
|
||||||
|
kind: PersistentVolumeClaim
|
||||||
|
metadata:
|
||||||
|
name: models-pvc
|
||||||
|
namespace: local-ai
|
||||||
|
spec:
|
||||||
|
accessModes:
|
||||||
|
- ReadWriteOnce
|
||||||
|
resources:
|
||||||
|
requests:
|
||||||
|
storage: 1Gi
|
||||||
|
---
|
||||||
|
apiVersion: apps/v1
|
||||||
|
kind: Deployment
|
||||||
|
metadata:
|
||||||
|
name: local-ai
|
||||||
|
namespace: local-ai
|
||||||
|
labels:
|
||||||
|
app: local-ai
|
||||||
|
spec:
|
||||||
|
selector:
|
||||||
|
matchLabels:
|
||||||
|
app: local-ai
|
||||||
|
replicas: 1
|
||||||
|
template:
|
||||||
|
metadata:
|
||||||
|
labels:
|
||||||
|
app: local-ai
|
||||||
|
name: local-ai
|
||||||
|
spec:
|
||||||
|
containers:
|
||||||
|
- args:
|
||||||
|
- phi-2
|
||||||
|
env:
|
||||||
|
- name: DEBUG
|
||||||
|
value: "true"
|
||||||
|
name: local-ai
|
||||||
|
image: quay.io/go-skynet/local-ai:master-ffmpeg-core
|
||||||
|
imagePullPolicy: IfNotPresent
|
||||||
|
volumeMounts:
|
||||||
|
- name: models-volume
|
||||||
|
mountPath: /build/models
|
||||||
|
volumes:
|
||||||
|
- name: models-volume
|
||||||
|
persistentVolumeClaim:
|
||||||
|
claimName: models-pvc
|
||||||
|
---
|
||||||
|
apiVersion: v1
|
||||||
|
kind: Service
|
||||||
|
metadata:
|
||||||
|
name: local-ai
|
||||||
|
namespace: local-ai
|
||||||
|
spec:
|
||||||
|
selector:
|
||||||
|
app: local-ai
|
||||||
|
type: LoadBalancer
|
||||||
|
ports:
|
||||||
|
- protocol: TCP
|
||||||
|
port: 8080
|
||||||
|
targetPort: 8080
|
2
main.go
2
main.go
@ -404,7 +404,7 @@ For a list of compatible model, check out: https://localai.io/model-compatibilit
|
|||||||
|
|
||||||
defer opts.Loader.StopAllGRPC()
|
defer opts.Loader.StopAllGRPC()
|
||||||
|
|
||||||
filePath, _, err := backend.ModelTTS(backendOption, text, modelOption, opts.Loader, opts)
|
filePath, _, err := backend.ModelTTS(backendOption, text, modelOption, opts.Loader, opts, config.Config{})
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
|
Loading…
x
Reference in New Issue
Block a user