feat: auto load into memory on startup (#3627)

Signed-off-by: Sertac Ozercan <sozercan@gmail.com>
This commit is contained in:
Sertaç Özercan 2024-09-22 01:03:30 -07:00 committed by GitHub
parent 1f43678d53
commit ee21b00a8d
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
10 changed files with 259 additions and 213 deletions

View File

@ -12,7 +12,7 @@ import (
func ModelEmbedding(s string, tokens []int, loader *model.ModelLoader, backendConfig config.BackendConfig, appConfig *config.ApplicationConfig) (func() ([]float32, error), error) { func ModelEmbedding(s string, tokens []int, loader *model.ModelLoader, backendConfig config.BackendConfig, appConfig *config.ApplicationConfig) (func() ([]float32, error), error) {
modelFile := backendConfig.Model modelFile := backendConfig.Model
grpcOpts := gRPCModelOpts(backendConfig) grpcOpts := GRPCModelOpts(backendConfig)
var inferenceModel interface{} var inferenceModel interface{}
var err error var err error

View File

@ -12,7 +12,7 @@ func ImageGeneration(height, width, mode, step, seed int, positive_prompt, negat
if *threads == 0 && appConfig.Threads != 0 { if *threads == 0 && appConfig.Threads != 0 {
threads = &appConfig.Threads threads = &appConfig.Threads
} }
gRPCOpts := gRPCModelOpts(backendConfig) gRPCOpts := GRPCModelOpts(backendConfig)
opts := modelOpts(backendConfig, appConfig, []model.Option{ opts := modelOpts(backendConfig, appConfig, []model.Option{
model.WithBackendString(backendConfig.Backend), model.WithBackendString(backendConfig.Backend),
model.WithAssetDir(appConfig.AssetsDestination), model.WithAssetDir(appConfig.AssetsDestination),

View File

@ -37,7 +37,7 @@ func ModelInference(ctx context.Context, s string, messages []schema.Message, im
if *threads == 0 && o.Threads != 0 { if *threads == 0 && o.Threads != 0 {
threads = &o.Threads threads = &o.Threads
} }
grpcOpts := gRPCModelOpts(c) grpcOpts := GRPCModelOpts(c)
var inferenceModel grpc.Backend var inferenceModel grpc.Backend
var err error var err error

View File

@ -44,7 +44,7 @@ func getSeed(c config.BackendConfig) int32 {
return seed return seed
} }
func gRPCModelOpts(c config.BackendConfig) *pb.ModelOptions { func GRPCModelOpts(c config.BackendConfig) *pb.ModelOptions {
b := 512 b := 512
if c.Batch != 0 { if c.Batch != 0 {
b = c.Batch b = c.Batch

View File

@ -15,7 +15,7 @@ func Rerank(backend, modelFile string, request *proto.RerankRequest, loader *mod
return nil, fmt.Errorf("backend is required") return nil, fmt.Errorf("backend is required")
} }
grpcOpts := gRPCModelOpts(backendConfig) grpcOpts := GRPCModelOpts(backendConfig)
opts := modelOpts(config.BackendConfig{}, appConfig, []model.Option{ opts := modelOpts(config.BackendConfig{}, appConfig, []model.Option{
model.WithBackendString(bb), model.WithBackendString(bb),

View File

@ -29,7 +29,7 @@ func SoundGeneration(
return "", nil, fmt.Errorf("backend is a required parameter") return "", nil, fmt.Errorf("backend is a required parameter")
} }
grpcOpts := gRPCModelOpts(backendConfig) grpcOpts := GRPCModelOpts(backendConfig)
opts := modelOpts(config.BackendConfig{}, appConfig, []model.Option{ opts := modelOpts(config.BackendConfig{}, appConfig, []model.Option{
model.WithBackendString(backend), model.WithBackendString(backend),
model.WithModel(modelFile), model.WithModel(modelFile),

View File

@ -28,7 +28,7 @@ func ModelTTS(
bb = model.PiperBackend bb = model.PiperBackend
} }
grpcOpts := gRPCModelOpts(backendConfig) grpcOpts := GRPCModelOpts(backendConfig)
opts := modelOpts(config.BackendConfig{}, appConfig, []model.Option{ opts := modelOpts(config.BackendConfig{}, appConfig, []model.Option{
model.WithBackendString(bb), model.WithBackendString(bb),

View File

@ -69,6 +69,7 @@ type RunCMD struct {
WatchdogBusyTimeout string `env:"LOCALAI_WATCHDOG_BUSY_TIMEOUT,WATCHDOG_BUSY_TIMEOUT" default:"5m" help:"Threshold beyond which a busy backend should be stopped" group:"backends"` WatchdogBusyTimeout string `env:"LOCALAI_WATCHDOG_BUSY_TIMEOUT,WATCHDOG_BUSY_TIMEOUT" default:"5m" help:"Threshold beyond which a busy backend should be stopped" group:"backends"`
Federated bool `env:"LOCALAI_FEDERATED,FEDERATED" help:"Enable federated instance" group:"federated"` Federated bool `env:"LOCALAI_FEDERATED,FEDERATED" help:"Enable federated instance" group:"federated"`
DisableGalleryEndpoint bool `env:"LOCALAI_DISABLE_GALLERY_ENDPOINT,DISABLE_GALLERY_ENDPOINT" help:"Disable the gallery endpoints" group:"api"` DisableGalleryEndpoint bool `env:"LOCALAI_DISABLE_GALLERY_ENDPOINT,DISABLE_GALLERY_ENDPOINT" help:"Disable the gallery endpoints" group:"api"`
LoadToMemory []string `env:"LOCALAI_LOAD_TO_MEMORY,LOAD_TO_MEMORY" help:"A list of models to load into memory at startup" group:"models"`
} }
func (r *RunCMD) Run(ctx *cliContext.Context) error { func (r *RunCMD) Run(ctx *cliContext.Context) error {
@ -104,6 +105,7 @@ func (r *RunCMD) Run(ctx *cliContext.Context) error {
config.WithDisableApiKeyRequirementForHttpGet(r.DisableApiKeyRequirementForHttpGet), config.WithDisableApiKeyRequirementForHttpGet(r.DisableApiKeyRequirementForHttpGet),
config.WithHttpGetExemptedEndpoints(r.HttpGetExemptedEndpoints), config.WithHttpGetExemptedEndpoints(r.HttpGetExemptedEndpoints),
config.WithP2PNetworkID(r.Peer2PeerNetworkID), config.WithP2PNetworkID(r.Peer2PeerNetworkID),
config.WithLoadToMemory(r.LoadToMemory),
} }
token := "" token := ""

View File

@ -41,6 +41,7 @@ type ApplicationConfig struct {
DisableApiKeyRequirementForHttpGet bool DisableApiKeyRequirementForHttpGet bool
HttpGetExemptedEndpoints []*regexp.Regexp HttpGetExemptedEndpoints []*regexp.Regexp
DisableGalleryEndpoint bool DisableGalleryEndpoint bool
LoadToMemory []string
ModelLibraryURL string ModelLibraryURL string
@ -331,6 +332,12 @@ func WithOpaqueErrors(opaque bool) AppOption {
} }
} }
func WithLoadToMemory(models []string) AppOption {
return func(o *ApplicationConfig) {
o.LoadToMemory = models
}
}
func WithSubtleKeyComparison(subtle bool) AppOption { func WithSubtleKeyComparison(subtle bool) AppOption {
return func(o *ApplicationConfig) { return func(o *ApplicationConfig) {
o.UseSubtleKeyComparison = subtle o.UseSubtleKeyComparison = subtle

View File

@ -1,206 +1,243 @@
package startup package startup
import ( import (
"fmt" "fmt"
"os" "os"
"github.com/mudler/LocalAI/core" "github.com/mudler/LocalAI/core"
"github.com/mudler/LocalAI/core/config" "github.com/mudler/LocalAI/core/backend"
"github.com/mudler/LocalAI/core/services" "github.com/mudler/LocalAI/core/config"
"github.com/mudler/LocalAI/internal" "github.com/mudler/LocalAI/core/services"
"github.com/mudler/LocalAI/pkg/assets" "github.com/mudler/LocalAI/internal"
"github.com/mudler/LocalAI/pkg/library" "github.com/mudler/LocalAI/pkg/assets"
"github.com/mudler/LocalAI/pkg/model" "github.com/mudler/LocalAI/pkg/library"
pkgStartup "github.com/mudler/LocalAI/pkg/startup" "github.com/mudler/LocalAI/pkg/model"
"github.com/mudler/LocalAI/pkg/xsysinfo" pkgStartup "github.com/mudler/LocalAI/pkg/startup"
"github.com/rs/zerolog/log" "github.com/mudler/LocalAI/pkg/xsysinfo"
) "github.com/rs/zerolog/log"
)
func Startup(opts ...config.AppOption) (*config.BackendConfigLoader, *model.ModelLoader, *config.ApplicationConfig, error) {
options := config.NewApplicationConfig(opts...) func Startup(opts ...config.AppOption) (*config.BackendConfigLoader, *model.ModelLoader, *config.ApplicationConfig, error) {
options := config.NewApplicationConfig(opts...)
log.Info().Msgf("Starting LocalAI using %d threads, with models path: %s", options.Threads, options.ModelPath)
log.Info().Msgf("LocalAI version: %s", internal.PrintableVersion()) log.Info().Msgf("Starting LocalAI using %d threads, with models path: %s", options.Threads, options.ModelPath)
caps, err := xsysinfo.CPUCapabilities() log.Info().Msgf("LocalAI version: %s", internal.PrintableVersion())
if err == nil { caps, err := xsysinfo.CPUCapabilities()
log.Debug().Msgf("CPU capabilities: %v", caps) if err == nil {
} log.Debug().Msgf("CPU capabilities: %v", caps)
gpus, err := xsysinfo.GPUs() }
if err == nil { gpus, err := xsysinfo.GPUs()
log.Debug().Msgf("GPU count: %d", len(gpus)) if err == nil {
for _, gpu := range gpus { log.Debug().Msgf("GPU count: %d", len(gpus))
log.Debug().Msgf("GPU: %s", gpu.String()) for _, gpu := range gpus {
} log.Debug().Msgf("GPU: %s", gpu.String())
} }
}
// Make sure directories exists
if options.ModelPath == "" { // Make sure directories exists
return nil, nil, nil, fmt.Errorf("options.ModelPath cannot be empty") if options.ModelPath == "" {
} return nil, nil, nil, fmt.Errorf("options.ModelPath cannot be empty")
err = os.MkdirAll(options.ModelPath, 0750) }
if err != nil { err = os.MkdirAll(options.ModelPath, 0750)
return nil, nil, nil, fmt.Errorf("unable to create ModelPath: %q", err) if err != nil {
} return nil, nil, nil, fmt.Errorf("unable to create ModelPath: %q", err)
if options.ImageDir != "" { }
err := os.MkdirAll(options.ImageDir, 0750) if options.ImageDir != "" {
if err != nil { err := os.MkdirAll(options.ImageDir, 0750)
return nil, nil, nil, fmt.Errorf("unable to create ImageDir: %q", err) if err != nil {
} return nil, nil, nil, fmt.Errorf("unable to create ImageDir: %q", err)
} }
if options.AudioDir != "" { }
err := os.MkdirAll(options.AudioDir, 0750) if options.AudioDir != "" {
if err != nil { err := os.MkdirAll(options.AudioDir, 0750)
return nil, nil, nil, fmt.Errorf("unable to create AudioDir: %q", err) if err != nil {
} return nil, nil, nil, fmt.Errorf("unable to create AudioDir: %q", err)
} }
if options.UploadDir != "" { }
err := os.MkdirAll(options.UploadDir, 0750) if options.UploadDir != "" {
if err != nil { err := os.MkdirAll(options.UploadDir, 0750)
return nil, nil, nil, fmt.Errorf("unable to create UploadDir: %q", err) if err != nil {
} return nil, nil, nil, fmt.Errorf("unable to create UploadDir: %q", err)
} }
}
if err := pkgStartup.InstallModels(options.Galleries, options.ModelLibraryURL, options.ModelPath, options.EnforcePredownloadScans, nil, options.ModelsURL...); err != nil {
log.Error().Err(err).Msg("error installing models") if err := pkgStartup.InstallModels(options.Galleries, options.ModelLibraryURL, options.ModelPath, options.EnforcePredownloadScans, nil, options.ModelsURL...); err != nil {
} log.Error().Err(err).Msg("error installing models")
}
cl := config.NewBackendConfigLoader(options.ModelPath)
ml := model.NewModelLoader(options.ModelPath) cl := config.NewBackendConfigLoader(options.ModelPath)
ml := model.NewModelLoader(options.ModelPath)
configLoaderOpts := options.ToConfigLoaderOptions()
configLoaderOpts := options.ToConfigLoaderOptions()
if err := cl.LoadBackendConfigsFromPath(options.ModelPath, configLoaderOpts...); err != nil {
log.Error().Err(err).Msg("error loading config files") if err := cl.LoadBackendConfigsFromPath(options.ModelPath, configLoaderOpts...); err != nil {
} log.Error().Err(err).Msg("error loading config files")
}
if options.ConfigFile != "" {
if err := cl.LoadMultipleBackendConfigsSingleFile(options.ConfigFile, configLoaderOpts...); err != nil { if options.ConfigFile != "" {
log.Error().Err(err).Msg("error loading config file") if err := cl.LoadMultipleBackendConfigsSingleFile(options.ConfigFile, configLoaderOpts...); err != nil {
} log.Error().Err(err).Msg("error loading config file")
} }
}
if err := cl.Preload(options.ModelPath); err != nil {
log.Error().Err(err).Msg("error downloading models") if err := cl.Preload(options.ModelPath); err != nil {
} log.Error().Err(err).Msg("error downloading models")
}
if options.PreloadJSONModels != "" {
if err := services.ApplyGalleryFromString(options.ModelPath, options.PreloadJSONModels, options.EnforcePredownloadScans, options.Galleries); err != nil { if options.PreloadJSONModels != "" {
return nil, nil, nil, err if err := services.ApplyGalleryFromString(options.ModelPath, options.PreloadJSONModels, options.EnforcePredownloadScans, options.Galleries); err != nil {
} return nil, nil, nil, err
} }
}
if options.PreloadModelsFromPath != "" {
if err := services.ApplyGalleryFromFile(options.ModelPath, options.PreloadModelsFromPath, options.EnforcePredownloadScans, options.Galleries); err != nil { if options.PreloadModelsFromPath != "" {
return nil, nil, nil, err if err := services.ApplyGalleryFromFile(options.ModelPath, options.PreloadModelsFromPath, options.EnforcePredownloadScans, options.Galleries); err != nil {
} return nil, nil, nil, err
} }
}
if options.Debug {
for _, v := range cl.GetAllBackendConfigs() { if options.Debug {
log.Debug().Msgf("Model: %s (config: %+v)", v.Name, v) for _, v := range cl.GetAllBackendConfigs() {
} log.Debug().Msgf("Model: %s (config: %+v)", v.Name, v)
} }
}
if options.AssetsDestination != "" {
// Extract files from the embedded FS if options.AssetsDestination != "" {
err := assets.ExtractFiles(options.BackendAssets, options.AssetsDestination) // Extract files from the embedded FS
log.Debug().Msgf("Extracting backend assets files to %s", options.AssetsDestination) err := assets.ExtractFiles(options.BackendAssets, options.AssetsDestination)
if err != nil { log.Debug().Msgf("Extracting backend assets files to %s", options.AssetsDestination)
log.Warn().Msgf("Failed extracting backend assets files: %s (might be required for some backends to work properly)", err) if err != nil {
} log.Warn().Msgf("Failed extracting backend assets files: %s (might be required for some backends to work properly)", err)
} }
}
if options.LibPath != "" {
// If there is a lib directory, set LD_LIBRARY_PATH to include it if options.LibPath != "" {
err := library.LoadExternal(options.LibPath) // If there is a lib directory, set LD_LIBRARY_PATH to include it
if err != nil { err := library.LoadExternal(options.LibPath)
log.Error().Err(err).Str("LibPath", options.LibPath).Msg("Error while loading external libraries") if err != nil {
} log.Error().Err(err).Str("LibPath", options.LibPath).Msg("Error while loading external libraries")
} }
}
// turn off any process that was started by GRPC if the context is canceled
go func() { // turn off any process that was started by GRPC if the context is canceled
<-options.Context.Done() go func() {
log.Debug().Msgf("Context canceled, shutting down") <-options.Context.Done()
err := ml.StopAllGRPC() log.Debug().Msgf("Context canceled, shutting down")
if err != nil { err := ml.StopAllGRPC()
log.Error().Err(err).Msg("error while stopping all grpc backends") if err != nil {
} log.Error().Err(err).Msg("error while stopping all grpc backends")
}() }
}()
if options.WatchDog {
wd := model.NewWatchDog( if options.WatchDog {
ml, wd := model.NewWatchDog(
options.WatchDogBusyTimeout, ml,
options.WatchDogIdleTimeout, options.WatchDogBusyTimeout,
options.WatchDogBusy, options.WatchDogIdleTimeout,
options.WatchDogIdle) options.WatchDogBusy,
ml.SetWatchDog(wd) options.WatchDogIdle)
go wd.Run() ml.SetWatchDog(wd)
go func() { go wd.Run()
<-options.Context.Done() go func() {
log.Debug().Msgf("Context canceled, shutting down") <-options.Context.Done()
wd.Shutdown() log.Debug().Msgf("Context canceled, shutting down")
}() wd.Shutdown()
} }()
}
// Watch the configuration directory
startWatcher(options) if options.LoadToMemory != nil {
for _, m := range options.LoadToMemory {
log.Info().Msg("core/startup process completed!") cfg, err := cl.LoadBackendConfigFileByName(m, options.ModelPath,
return cl, ml, options, nil config.LoadOptionDebug(options.Debug),
} config.LoadOptionThreads(options.Threads),
config.LoadOptionContextSize(options.ContextSize),
func startWatcher(options *config.ApplicationConfig) { config.LoadOptionF16(options.F16),
if options.DynamicConfigsDir == "" { config.ModelPath(options.ModelPath),
// No need to start the watcher if the directory is not set )
return if err != nil {
} return nil, nil, nil, err
}
if _, err := os.Stat(options.DynamicConfigsDir); err != nil {
if os.IsNotExist(err) { log.Debug().Msgf("Auto loading model %s into memory from file: %s", m, cfg.Model)
// We try to create the directory if it does not exist and was specified
if err := os.MkdirAll(options.DynamicConfigsDir, 0700); err != nil { grpcOpts := backend.GRPCModelOpts(*cfg)
log.Error().Err(err).Msg("failed creating DynamicConfigsDir") o := []model.Option{
} model.WithModel(cfg.Model),
} else { model.WithAssetDir(options.AssetsDestination),
// something else happened, we log the error and don't start the watcher model.WithThreads(uint32(options.Threads)),
log.Error().Err(err).Msg("failed to read DynamicConfigsDir, watcher will not be started") model.WithLoadGRPCLoadModelOpts(grpcOpts),
return }
}
} var backendErr error
if cfg.Backend != "" {
configHandler := newConfigFileHandler(options) o = append(o, model.WithBackendString(cfg.Backend))
if err := configHandler.Watch(); err != nil { _, backendErr = ml.BackendLoader(o...)
log.Error().Err(err).Msg("failed creating watcher") } else {
} _, backendErr = ml.GreedyLoader(o...)
} }
if backendErr != nil {
// In Lieu of a proper DI framework, this function wires up the Application manually. return nil, nil, nil, err
// This is in core/startup rather than core/state.go to keep package references clean! }
func createApplication(appConfig *config.ApplicationConfig) *core.Application { }
app := &core.Application{ }
ApplicationConfig: appConfig,
BackendConfigLoader: config.NewBackendConfigLoader(appConfig.ModelPath), // Watch the configuration directory
ModelLoader: model.NewModelLoader(appConfig.ModelPath), startWatcher(options)
}
log.Info().Msg("core/startup process completed!")
var err error return cl, ml, options, nil
}
// app.EmbeddingsBackendService = backend.NewEmbeddingsBackendService(app.ModelLoader, app.BackendConfigLoader, app.ApplicationConfig)
// app.ImageGenerationBackendService = backend.NewImageGenerationBackendService(app.ModelLoader, app.BackendConfigLoader, app.ApplicationConfig) func startWatcher(options *config.ApplicationConfig) {
// app.LLMBackendService = backend.NewLLMBackendService(app.ModelLoader, app.BackendConfigLoader, app.ApplicationConfig) if options.DynamicConfigsDir == "" {
// app.TranscriptionBackendService = backend.NewTranscriptionBackendService(app.ModelLoader, app.BackendConfigLoader, app.ApplicationConfig) // No need to start the watcher if the directory is not set
// app.TextToSpeechBackendService = backend.NewTextToSpeechBackendService(app.ModelLoader, app.BackendConfigLoader, app.ApplicationConfig) return
}
app.BackendMonitorService = services.NewBackendMonitorService(app.ModelLoader, app.BackendConfigLoader, app.ApplicationConfig)
app.GalleryService = services.NewGalleryService(app.ApplicationConfig) if _, err := os.Stat(options.DynamicConfigsDir); err != nil {
// app.OpenAIService = services.NewOpenAIService(app.ModelLoader, app.BackendConfigLoader, app.ApplicationConfig, app.LLMBackendService) if os.IsNotExist(err) {
// We try to create the directory if it does not exist and was specified
app.LocalAIMetricsService, err = services.NewLocalAIMetricsService() if err := os.MkdirAll(options.DynamicConfigsDir, 0700); err != nil {
if err != nil { log.Error().Err(err).Msg("failed creating DynamicConfigsDir")
log.Error().Err(err).Msg("encountered an error initializing metrics service, startup will continue but metrics will not be tracked.") }
} } else {
// something else happened, we log the error and don't start the watcher
return app log.Error().Err(err).Msg("failed to read DynamicConfigsDir, watcher will not be started")
} return
}
}
configHandler := newConfigFileHandler(options)
if err := configHandler.Watch(); err != nil {
log.Error().Err(err).Msg("failed creating watcher")
}
}
// In Lieu of a proper DI framework, this function wires up the Application manually.
// This is in core/startup rather than core/state.go to keep package references clean!
func createApplication(appConfig *config.ApplicationConfig) *core.Application {
app := &core.Application{
ApplicationConfig: appConfig,
BackendConfigLoader: config.NewBackendConfigLoader(appConfig.ModelPath),
ModelLoader: model.NewModelLoader(appConfig.ModelPath),
}
var err error
// app.EmbeddingsBackendService = backend.NewEmbeddingsBackendService(app.ModelLoader, app.BackendConfigLoader, app.ApplicationConfig)
// app.ImageGenerationBackendService = backend.NewImageGenerationBackendService(app.ModelLoader, app.BackendConfigLoader, app.ApplicationConfig)
// app.LLMBackendService = backend.NewLLMBackendService(app.ModelLoader, app.BackendConfigLoader, app.ApplicationConfig)
// app.TranscriptionBackendService = backend.NewTranscriptionBackendService(app.ModelLoader, app.BackendConfigLoader, app.ApplicationConfig)
// app.TextToSpeechBackendService = backend.NewTextToSpeechBackendService(app.ModelLoader, app.BackendConfigLoader, app.ApplicationConfig)
app.BackendMonitorService = services.NewBackendMonitorService(app.ModelLoader, app.BackendConfigLoader, app.ApplicationConfig)
app.GalleryService = services.NewGalleryService(app.ApplicationConfig)
// app.OpenAIService = services.NewOpenAIService(app.ModelLoader, app.BackendConfigLoader, app.ApplicationConfig, app.LLMBackendService)
app.LocalAIMetricsService, err = services.NewLocalAIMetricsService()
if err != nil {
log.Error().Err(err).Msg("encountered an error initializing metrics service, startup will continue but metrics will not be tracked.")
}
return app
}