diff --git a/core/cli/transcript.go b/core/cli/transcript.go index fcd4b309..bf4dce6d 100644 --- a/core/cli/transcript.go +++ b/core/cli/transcript.go @@ -30,7 +30,7 @@ func (t *TranscriptCMD) Run(ctx *cliContext.Context) error { AssetsDestination: t.BackendAssetsPath, } - cl := config.NewBackendConfigLoader() + cl := config.NewBackendConfigLoader(t.ModelsPath) ml := model.NewModelLoader(opts.ModelPath) if err := cl.LoadBackendConfigsFromPath(t.ModelsPath); err != nil { return err diff --git a/core/config/application_config.go b/core/config/application_config.go index 50fa627d..46f4f90c 100644 --- a/core/config/application_config.go +++ b/core/config/application_config.go @@ -306,6 +306,7 @@ func (o *ApplicationConfig) ToConfigLoaderOptions() []ConfigLoaderOption { LoadOptionDebug(o.Debug), LoadOptionF16(o.F16), LoadOptionThreads(o.Threads), + ModelPath(o.ModelPath), } } diff --git a/core/config/backend_config.go b/core/config/backend_config.go index 1ca11716..b9c8dd65 100644 --- a/core/config/backend_config.go +++ b/core/config/backend_config.go @@ -21,7 +21,7 @@ type TTSConfig struct { Voice string `yaml:"voice"` // Vall-e-x - VallE VallE `yaml:"vall-e"` + VallE VallE `yaml:"vall-e"` } type BackendConfig struct { @@ -368,6 +368,8 @@ func (cfg *BackendConfig) SetDefaults(opts ...ConfigLoaderOption) { if debug { cfg.Debug = &trueV } + + guessDefaultsFromFile(cfg, lo.modelPath) } func (c *BackendConfig) Validate() bool { @@ -400,3 +402,7 @@ func (c *BackendConfig) Validate() bool { return true } + +func (c *BackendConfig) HasTemplate() bool { + return c.TemplateConfig.Completion != "" || c.TemplateConfig.Edit != "" || c.TemplateConfig.Chat != "" || c.TemplateConfig.ChatMessage != "" +} diff --git a/core/config/backend_config_loader.go b/core/config/backend_config_loader.go index 54e33cf8..242c651f 100644 --- a/core/config/backend_config_loader.go +++ b/core/config/backend_config_loader.go @@ -19,17 +19,20 @@ import ( ) type BackendConfigLoader struct { - configs map[string]BackendConfig + configs map[string]BackendConfig + modelPath string sync.Mutex } -func NewBackendConfigLoader() *BackendConfigLoader { +func NewBackendConfigLoader(modelPath string) *BackendConfigLoader { return &BackendConfigLoader{ - configs: make(map[string]BackendConfig), + configs: make(map[string]BackendConfig), + modelPath: modelPath, } } type LoadOptions struct { + modelPath string debug bool threads, ctxSize int f16 bool @@ -53,6 +56,12 @@ func LoadOptionContextSize(ctxSize int) ConfigLoaderOption { } } +func ModelPath(modelPath string) ConfigLoaderOption { + return func(o *LoadOptions) { + o.modelPath = modelPath + } +} + func LoadOptionF16(f16 bool) ConfigLoaderOption { return func(o *LoadOptions) { o.f16 = f16 diff --git a/core/config/config_test.go b/core/config/config_test.go index da8ba09d..5122c907 100644 --- a/core/config/config_test.go +++ b/core/config/config_test.go @@ -26,7 +26,7 @@ var _ = Describe("Test cases for config related functions", func() { It("Test LoadConfigs", func() { - bcl := NewBackendConfigLoader() + bcl := NewBackendConfigLoader(os.Getenv("MODELS_PATH")) err := bcl.LoadBackendConfigsFromPath(os.Getenv("MODELS_PATH")) Expect(err).To(BeNil()) diff --git a/core/config/guesser.go b/core/config/guesser.go new file mode 100644 index 00000000..e8cfa7be --- /dev/null +++ b/core/config/guesser.go @@ -0,0 +1,87 @@ +package config + +import ( + "os" + "path/filepath" + + "github.com/rs/zerolog/log" + + gguf "github.com/thxcode/gguf-parser-go" +) + +type familyType uint8 + +const ( + Unknown familyType = iota + LLaMa3 = iota + LLama2 = iota +) + +var defaultsTemplate map[familyType]TemplateConfig = map[familyType]TemplateConfig{ + LLaMa3: { + Chat: "<|begin_of_text|>{{.Input }}\n<|start_header_id|>assistant<|end_header_id|>", + ChatMessage: "<|start_header_id|>{{ .RoleName }}<|end_header_id|>\n\n{{.Content }}<|eot_id|>", + }, +} + +func guessDefaultsFromFile(cfg *BackendConfig, modelPath string) { + + if os.Getenv("LOCALAI_DISABLE_GUESSING") == "true" { + log.Debug().Msgf("guessDefaultsFromFile: %s", "guessing disabled with LOCALAI_DISABLE_GUESSING") + return + } + + if modelPath == "" { + log.Debug().Msgf("guessDefaultsFromFile: %s", "modelPath is empty") + return + } + + if cfg.HasTemplate() { + // nothing to guess here + log.Debug().Any("name", cfg.Name).Msgf("guessDefaultsFromFile: %s", "template already set") + return + } + + // We try to guess only if we don't have a template defined already + f, err := gguf.ParseGGUFFile(filepath.Join(modelPath, cfg.ModelFileName())) + if err != nil { + // Only valid for gguf files + log.Debug().Msgf("guessDefaultsFromFile: %s", "not a GGUF file") + return + } + + log.Debug(). + Any("eosTokenID", f.Tokenizer().EOSTokenID). + Any("bosTokenID", f.Tokenizer().BOSTokenID). + Any("modelName", f.Model().Name). + Any("architecture", f.Architecture().Architecture).Msgf("Model file loaded: %s", cfg.ModelFileName()) + + // guess the name + if cfg.Name == "" { + cfg.Name = f.Model().Name + } + + family := identifyFamily(f) + + if family == Unknown { + log.Debug().Msgf("guessDefaultsFromFile: %s", "family not identified") + return + } + + templ, ok := defaultsTemplate[family] + if ok { + cfg.TemplateConfig = templ + log.Debug().Any("family", family).Msgf("guessDefaultsFromFile: guessed template %+v", cfg.TemplateConfig) + } else { + log.Debug().Any("family", family).Msgf("guessDefaultsFromFile: no template found for family") + } +} + +func identifyFamily(f *gguf.GGUFFile) familyType { + switch { + case f.Architecture().Architecture == "llama" && f.Tokenizer().EOSTokenID == 128009: + return LLaMa3 + } + + return Unknown +} diff --git a/core/http/endpoints/openai/request.go b/core/http/endpoints/openai/request.go index 941a66e3..728645d7 100644 --- a/core/http/endpoints/openai/request.go +++ b/core/http/endpoints/openai/request.go @@ -289,6 +289,7 @@ func mergeRequestWithConfig(modelFile string, input *schema.OpenAIRequest, cm *c config.LoadOptionThreads(threads), config.LoadOptionContextSize(ctx), config.LoadOptionF16(f16), + config.ModelPath(loader.ModelPath), ) // Set the parameters for the language model prediction diff --git a/core/startup/startup.go b/core/startup/startup.go index 6e6c6707..c276e4ac 100644 --- a/core/startup/startup.go +++ b/core/startup/startup.go @@ -62,7 +62,7 @@ func Startup(opts ...config.AppOption) (*config.BackendConfigLoader, *model.Mode // pkgStartup.PreloadModelsConfigurations(options.ModelLibraryURL, options.ModelPath, options.ModelsURL...) - cl := config.NewBackendConfigLoader() + cl := config.NewBackendConfigLoader(options.ModelPath) ml := model.NewModelLoader(options.ModelPath) configLoaderOpts := options.ToConfigLoaderOptions() @@ -151,7 +151,7 @@ func Startup(opts ...config.AppOption) (*config.BackendConfigLoader, *model.Mode func createApplication(appConfig *config.ApplicationConfig) *core.Application { app := &core.Application{ ApplicationConfig: appConfig, - BackendConfigLoader: config.NewBackendConfigLoader(), + BackendConfigLoader: config.NewBackendConfigLoader(appConfig.ModelPath), ModelLoader: model.NewModelLoader(appConfig.ModelPath), } diff --git a/go.mod b/go.mod index 393608d5..2a1d39d3 100644 --- a/go.mod +++ b/go.mod @@ -1,6 +1,6 @@ module github.com/go-skynet/LocalAI -go 1.21.1 +go 1.22 toolchain go1.22.2 @@ -67,6 +67,7 @@ require ( github.com/creachadair/otp v0.4.2 // indirect github.com/davidlazar/go-crypto v0.0.0-20200604182044-b73af7476f6c // indirect github.com/decred/dcrd/dcrec/secp256k1/v4 v4.2.0 // indirect + github.com/dustin/go-humanize v1.0.1 // indirect github.com/elastic/gosigar v0.14.2 // indirect github.com/flynn/noise v1.0.0 // indirect github.com/francoispqt/gojay v1.2.13 // indirect @@ -79,6 +80,7 @@ require ( github.com/hashicorp/go-multierror v1.1.1 // indirect github.com/hashicorp/golang-lru v0.5.4 // indirect github.com/hashicorp/golang-lru/v2 v2.0.7 // indirect + github.com/henvic/httpretty v0.1.3 // indirect github.com/huin/goupnp v1.2.0 // indirect github.com/ipfs/boxo v0.10.0 // indirect github.com/ipfs/go-cid v0.4.1 // indirect @@ -132,8 +134,10 @@ require ( github.com/quic-go/quic-go v0.38.1 // indirect github.com/quic-go/webtransport-go v0.5.3 // indirect github.com/raulk/go-watchdog v1.3.0 // indirect + github.com/smallnest/ringbuffer v0.0.0-20240423223918-bab516b2000b // indirect github.com/songgao/packets v0.0.0-20160404182456-549a10cd4091 // indirect github.com/spaolacci/murmur3 v1.1.0 // indirect + github.com/thxcode/gguf-parser-go v0.0.6 // indirect github.com/tinylib/msgp v1.1.8 // indirect github.com/vishvananda/netlink v1.1.0 // indirect github.com/vishvananda/netns v0.0.0-20211101163701-50045581ed74 // indirect @@ -143,8 +147,8 @@ require ( go.uber.org/fx v1.20.0 // indirect go.uber.org/multierr v1.11.0 // indirect go.uber.org/zap v1.27.0 // indirect - golang.org/x/exp v0.0.0-20230817173708-d852ddb80c63 // indirect - golang.org/x/sync v0.6.0 // indirect + golang.org/x/exp v0.0.0-20240506185415-9bf2ced13842 // indirect + golang.org/x/sync v0.7.0 // indirect golang.org/x/sys v0.20.0 // indirect golang.zx2c4.com/wintun v0.0.0-20211104114900-415007cec224 // indirect golang.zx2c4.com/wireguard v0.0.0-20220703234212-c31a7b1ab478 // indirect @@ -252,11 +256,11 @@ require ( go.opentelemetry.io/otel/sdk v1.19.0 // indirect go.opentelemetry.io/otel/trace v1.19.0 // indirect golang.org/x/crypto v0.23.0 // indirect - golang.org/x/mod v0.16.0 // indirect + golang.org/x/mod v0.17.0 // indirect golang.org/x/net v0.25.0 // indirect golang.org/x/term v0.20.0 // indirect golang.org/x/text v0.15.0 // indirect - golang.org/x/tools v0.19.0 // indirect + golang.org/x/tools v0.21.0 // indirect google.golang.org/genproto/googleapis/rpc v0.0.0-20240528184218-531527333157 // indirect gopkg.in/fsnotify.v1 v1.4.7 // indirect gopkg.in/tomb.v1 v1.0.0-20141024135613-dd632973f1e7 // indirect diff --git a/go.sum b/go.sum index 792b9175..4413aea0 100644 --- a/go.sum +++ b/go.sum @@ -108,6 +108,8 @@ github.com/dsnet/compress v0.0.2-0.20210315054119-f66993602bf5 h1:iFaUwBSo5Svw6L github.com/dsnet/compress v0.0.2-0.20210315054119-f66993602bf5/go.mod h1:qssHWj60/X5sZFNxpG4HBPDHVqxNm4DfnCKgrbZOT+s= github.com/dsnet/golib v0.0.0-20171103203638-1ea166775780/go.mod h1:Lj+Z9rebOhdfkVLjJ8T6VcRQv3SXugXy999NBtR9aFY= github.com/dustin/go-humanize v1.0.0/go.mod h1:HtrtbFcZ19U5GC7JDqmcUSB87Iq5E25KnS6fMYU6eOk= +github.com/dustin/go-humanize v1.0.1 h1:GzkhY7T5VNhEkwH0PVJgjz+fX1rhBrR7pRT3mDkpeCY= +github.com/dustin/go-humanize v1.0.1/go.mod h1:Mu1zIs6XwVuF/gI1OepvI0qD18qycQx+mFykh5fBlto= github.com/elastic/gosigar v0.12.0/go.mod h1:iXRIGg2tLnu7LBdpqzyQfGDEidKCfWcCMS0WKyPWoMs= github.com/elastic/gosigar v0.14.2 h1:Dg80n8cr90OZ7x+bAax/QjoW/XqTI11RmA79ZwIm9/4= github.com/elastic/gosigar v0.14.2/go.mod h1:iXRIGg2tLnu7LBdpqzyQfGDEidKCfWcCMS0WKyPWoMs= @@ -262,6 +264,8 @@ github.com/hashicorp/golang-lru v0.5.4 h1:YDjusn29QI/Das2iO9M0BHnIbxPeyuCHsjMW+l github.com/hashicorp/golang-lru v0.5.4/go.mod h1:iADmTwqILo4mZ8BN3D2Q6+9jd8WM5uGBxy+E8yxSoD4= github.com/hashicorp/golang-lru/v2 v2.0.7 h1:a+bsQ5rvGLjzHuww6tVxozPZFVghXaHOwFs4luLUK2k= github.com/hashicorp/golang-lru/v2 v2.0.7/go.mod h1:QeFd9opnmA6QUJc5vARoKUSoFhyfM2/ZepoAG6RGpeM= +github.com/henvic/httpretty v0.1.3 h1:4A6vigjz6Q/+yAfTD4wqipCv+Px69C7Th/NhT0ApuU8= +github.com/henvic/httpretty v0.1.3/go.mod h1:UUEv7c2kHZ5SPQ51uS3wBpzPDibg2U3Y+IaXyHy5GBg= github.com/hexops/gotextdiff v1.0.3 h1:gitA9+qJrrTCsiCl7+kh75nPqQt1cx4ZkudSTLoUqJM= github.com/hexops/gotextdiff v1.0.3/go.mod h1:pSWU5MAI3yDq+fZBTazCSJysOMbxWL1BSow5/V2vxeg= github.com/hpcloud/tail v1.0.0 h1:nfCOvKYfkgYP8hkirhJocXT2+zOD8yUNjXaWfTlyFKI= @@ -612,6 +616,8 @@ github.com/shurcooL/webdavfs v0.0.0-20170829043945-18c3829fa133/go.mod h1:hKmq5k github.com/sirupsen/logrus v1.7.0/go.mod h1:yWOB1SBYBC5VeMP7gHvWumXLIWorT60ONWic61uBYv0= github.com/sirupsen/logrus v1.8.1 h1:dJKuHgqk1NNQlqoA6BTlM1Wf9DOH3NBjQyu0h9+AZZE= github.com/sirupsen/logrus v1.8.1/go.mod h1:yWOB1SBYBC5VeMP7gHvWumXLIWorT60ONWic61uBYv0= +github.com/smallnest/ringbuffer v0.0.0-20240423223918-bab516b2000b h1:e9eeuSYSLmUKxy7ALzKcxo7ggTceQaVcBhjDIcewa9c= +github.com/smallnest/ringbuffer v0.0.0-20240423223918-bab516b2000b/go.mod h1:tAG61zBM1DYRaGIPloumExGvScf08oHuo0kFoOqdbT0= github.com/smartystreets/assertions v1.2.0/go.mod h1:tcbTF8ujkAEcZ8TElKY+i30BzYlVhC/LOxJk7iOWnoo= github.com/smartystreets/assertions v1.13.0 h1:Dx1kYM01xsSqKPno3aqLnrwac2LetPvN23diwyr69Qs= github.com/smartystreets/assertions v1.13.0/go.mod h1:wDmR7qL282YbGsPy6H/yAsesrxfxaaSlJazyFLYVFx8= @@ -647,6 +653,10 @@ github.com/swaggo/files/v2 v2.0.0/go.mod h1:24kk2Y9NYEJ5lHuCra6iVwkMjIekMCaFq/0J github.com/swaggo/swag v1.16.3 h1:PnCYjPCah8FK4I26l2F/KQ4yz3sILcVUN3cTlBFA9Pg= github.com/swaggo/swag v1.16.3/go.mod h1:DImHIuOFXKpMFAQjcC7FG4m3Dg4+QuUgUzJmKjI/gRk= github.com/tarm/serial v0.0.0-20180830185346-98f6abe2eb07/go.mod h1:kDXzergiv9cbyO7IOYJZWg1U88JhDg3PB6klq9Hg2pA= +github.com/thxcode/gguf-parser-go v0.0.5 h1:fYjrrQ6DFMTOCxP5iWolFIgAS9uB6Lj0MLsabYd+WUA= +github.com/thxcode/gguf-parser-go v0.0.5/go.mod h1:xHPU1OI4c0KHVTGYjTZIkLRJhBZUb9wDTFYFvkRXo9M= +github.com/thxcode/gguf-parser-go v0.0.6 h1:2lbnqA9r/4kyfOUZxy3VWRP60IkfNb31l57GmzOzYKE= +github.com/thxcode/gguf-parser-go v0.0.6/go.mod h1:xHPU1OI4c0KHVTGYjTZIkLRJhBZUb9wDTFYFvkRXo9M= github.com/tinylib/msgp v1.1.8 h1:FCXC1xanKO4I8plpHGH2P7koL/RzZs12l/+r7vakfm0= github.com/tinylib/msgp v1.1.8/go.mod h1:qkpG+2ldGg4xRFmx+jfTvZPxfGFhi64BcnL9vkCm/Tw= github.com/tklauser/go-sysconf v0.3.11/go.mod h1:GqXfhXY3kiPa0nAXPDIQIWzJbMCB7AmcWpGR8lSZfqI= @@ -751,6 +761,8 @@ golang.org/x/crypto v0.23.0/go.mod h1:CKFgDieR+mRhux2Lsu27y0fO304Db0wZe70UKqHu0v golang.org/x/exp v0.0.0-20190121172915-509febef88a4/go.mod h1:CJ0aWSM057203Lf6IL+f9T1iT9GByDxfZKAQTCR3kQA= golang.org/x/exp v0.0.0-20230817173708-d852ddb80c63 h1:m64FZMko/V45gv0bNmrNYoDEq8U5YUhetc9cBWKS1TQ= golang.org/x/exp v0.0.0-20230817173708-d852ddb80c63/go.mod h1:0v4NqG35kSWCMzLaMeX+IQrlSnVE/bqGSyC2cz/9Le8= +golang.org/x/exp v0.0.0-20240506185415-9bf2ced13842 h1:vr/HnozRka3pE4EsMEg1lgkXJkTFJCVUX+S/ZT6wYzM= +golang.org/x/exp v0.0.0-20240506185415-9bf2ced13842/go.mod h1:XtvwrStGgqGPLc4cjQfWqZHG1YFdYs6swckp8vpsjnc= golang.org/x/lint v0.0.0-20180702182130-06c8688daad7/go.mod h1:UVdnD1Gm6xHRNCYTkRU2/jEulfH38KcIWyp/GAMgvoE= golang.org/x/lint v0.0.0-20181026193005-c67002cb31c3/go.mod h1:UVdnD1Gm6xHRNCYTkRU2/jEulfH38KcIWyp/GAMgvoE= golang.org/x/lint v0.0.0-20190227174305-5b3e6a55c961/go.mod h1:wehouNa3lNwaWXcvxsM5YxQ5yQlVC4a0KAMCusXpPoU= @@ -766,6 +778,8 @@ golang.org/x/mod v0.6.0-dev.0.20220419223038-86c51ed26bb4/go.mod h1:jJ57K6gSWd91 golang.org/x/mod v0.7.0/go.mod h1:iBbtSCu2XBx23ZKBPSOrRkjjQPZFPuis4dIYUhu/chs= golang.org/x/mod v0.16.0 h1:QX4fJ0Rr5cPQCF7O9lh9Se4pmwfwskqZfq5moyldzic= golang.org/x/mod v0.16.0/go.mod h1:hTbmBsO62+eylJbnUtE2MGJUyE7QWk4xUqPFrRgJ+7c= +golang.org/x/mod v0.17.0 h1:zY54UmvipHiNd+pm+m0x9KhZ9hl1/7QNMyxXbc6ICqA= +golang.org/x/mod v0.17.0/go.mod h1:hTbmBsO62+eylJbnUtE2MGJUyE7QWk4xUqPFrRgJ+7c= golang.org/x/net v0.0.0-20180724234803-3673e40ba225/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4= golang.org/x/net v0.0.0-20180826012351-8a410e7b638d/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4= golang.org/x/net v0.0.0-20180906233101-161cd47e91fd/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4= @@ -808,6 +822,8 @@ golang.org/x/sync v0.0.0-20220722155255-886fb9371eb4/go.mod h1:RxMgew5VJxzue5/jJ golang.org/x/sync v0.1.0/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= golang.org/x/sync v0.6.0 h1:5BMeUDZ7vkXGfEr1x9B4bRcTH4lpkTkpdh0T/J+qjbQ= golang.org/x/sync v0.6.0/go.mod h1:Czt+wKu1gCyEFDUtn0jG5QVvpJ6rzVqr5aXyt9drQfk= +golang.org/x/sync v0.7.0 h1:YsImfSBoP9QPYL0xyKJPq0gcaJdG3rInoqxTWbfQu9M= +golang.org/x/sync v0.7.0/go.mod h1:Czt+wKu1gCyEFDUtn0jG5QVvpJ6rzVqr5aXyt9drQfk= golang.org/x/sys v0.0.0-20180810173357-98c5dad5d1a0/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY= golang.org/x/sys v0.0.0-20180830151530-49385e6e1522/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY= golang.org/x/sys v0.0.0-20180909124046-d0be0721c37e/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY= @@ -892,6 +908,8 @@ golang.org/x/tools v0.1.12/go.mod h1:hNGJHUnrk76NpqgfD5Aqm5Crs+Hm0VOH/i9J2+nxYbc golang.org/x/tools v0.4.0/go.mod h1:UE5sM2OK9E/d67R0ANs2xJizIymRP5gJU295PvKXxjQ= golang.org/x/tools v0.19.0 h1:tfGCXNR1OsFG+sVdLAitlpjAvD/I6dHDKnYrpEZUHkw= golang.org/x/tools v0.19.0/go.mod h1:qoJWxmGSIBmAeriMx19ogtrEPrGtDbPK634QFIcLAhc= +golang.org/x/tools v0.21.0 h1:qc0xYgIbsSDt9EyWz05J5wfa7LOVW0YTLOXrqdLAWIw= +golang.org/x/tools v0.21.0/go.mod h1:aiJjzUbINMkxbQROHiO6hDPo2LHcIPhhQsa9DLh0yGk= golang.org/x/xerrors v0.0.0-20190717185122-a985d3407aa7/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= golang.org/x/xerrors v0.0.0-20191011141410-1b5146add898/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= golang.org/x/xerrors v0.0.0-20191204190536-9bdfabe68543/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0=