fix(shutdown): do not shutdown immediately busy backends (#3543)

* fix(shutdown): do not shutdown immediately busy backends

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>

* chore(refactor): avoid duplicate functions

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>

* fix: multiplicative backoff for shutdown (#3547)

* multiplicative backoff for shutdown

Rather than always retry every two seconds, back off the shutdown attempt rate? 

Signed-off-by: Dave <dave@gray101.com>

* Update loader.go

Signed-off-by: Dave <dave@gray101.com>

* add clamp of 2 minutes

Signed-off-by: Dave Lee <dave@gray101.com>

---------

Signed-off-by: Dave <dave@gray101.com>
Signed-off-by: Dave Lee <dave@gray101.com>

---------

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
Signed-off-by: Dave <dave@gray101.com>
Signed-off-by: Dave Lee <dave@gray101.com>
Co-authored-by: Dave <dave@gray101.com>
This commit is contained in:
Ettore Di Giacinto 2024-09-17 06:50:57 +02:00 committed by GitHub
parent 0e4e101101
commit d0f2bf3181
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
2 changed files with 26 additions and 15 deletions

View File

@ -69,6 +69,8 @@ var knownModelsNameSuffixToSkip []string = []string{
".tar.gz",
}
const retryTimeout = time.Duration(2 * time.Minute)
func (ml *ModelLoader) ListFilesInModelPath() ([]string, error) {
files, err := os.ReadDir(ml.ModelPath)
if err != nil {
@ -146,15 +148,23 @@ func (ml *ModelLoader) ShutdownModel(modelName string) error {
ml.mu.Lock()
defer ml.mu.Unlock()
return ml.stopModel(modelName)
}
func (ml *ModelLoader) stopModel(modelName string) error {
defer ml.deleteProcess(modelName)
if _, ok := ml.models[modelName]; !ok {
_, ok := ml.models[modelName]
if !ok {
return fmt.Errorf("model %s not found", modelName)
}
return nil
retries := 1
for ml.models[modelName].GRPC(false, ml.wd).IsBusy() {
log.Debug().Msgf("%s busy. Waiting.", modelName)
dur := time.Duration(retries*2) * time.Second
if dur > retryTimeout {
dur = retryTimeout
}
time.Sleep(dur)
retries++
}
return ml.deleteProcess(modelName)
}
func (ml *ModelLoader) CheckIsLoaded(s string) *Model {

View File

@ -18,15 +18,16 @@ import (
func (ml *ModelLoader) StopAllExcept(s string) error {
return ml.StopGRPC(func(id string, p *process.Process) bool {
if id != s {
for ml.models[id].GRPC(false, ml.wd).IsBusy() {
log.Debug().Msgf("%s busy. Waiting.", id)
time.Sleep(2 * time.Second)
}
log.Debug().Msgf("[single-backend] Stopping %s", id)
return true
if id == s {
return false
}
return false
for ml.models[id].GRPC(false, ml.wd).IsBusy() {
log.Debug().Msgf("%s busy. Waiting.", id)
time.Sleep(2 * time.Second)
}
log.Debug().Msgf("[single-backend] Stopping %s", id)
return true
})
}