mirror of
https://github.com/mudler/LocalAI.git
synced 2024-12-18 20:27:57 +00:00
fix(shutdown): do not shutdown immediately busy backends (#3543)
* fix(shutdown): do not shutdown immediately busy backends Signed-off-by: Ettore Di Giacinto <mudler@localai.io> * chore(refactor): avoid duplicate functions Signed-off-by: Ettore Di Giacinto <mudler@localai.io> * fix: multiplicative backoff for shutdown (#3547) * multiplicative backoff for shutdown Rather than always retry every two seconds, back off the shutdown attempt rate? Signed-off-by: Dave <dave@gray101.com> * Update loader.go Signed-off-by: Dave <dave@gray101.com> * add clamp of 2 minutes Signed-off-by: Dave Lee <dave@gray101.com> --------- Signed-off-by: Dave <dave@gray101.com> Signed-off-by: Dave Lee <dave@gray101.com> --------- Signed-off-by: Ettore Di Giacinto <mudler@localai.io> Signed-off-by: Dave <dave@gray101.com> Signed-off-by: Dave Lee <dave@gray101.com> Co-authored-by: Dave <dave@gray101.com>
This commit is contained in:
parent
0e4e101101
commit
d0f2bf3181
@ -69,6 +69,8 @@ var knownModelsNameSuffixToSkip []string = []string{
|
||||
".tar.gz",
|
||||
}
|
||||
|
||||
const retryTimeout = time.Duration(2 * time.Minute)
|
||||
|
||||
func (ml *ModelLoader) ListFilesInModelPath() ([]string, error) {
|
||||
files, err := os.ReadDir(ml.ModelPath)
|
||||
if err != nil {
|
||||
@ -146,15 +148,23 @@ func (ml *ModelLoader) ShutdownModel(modelName string) error {
|
||||
ml.mu.Lock()
|
||||
defer ml.mu.Unlock()
|
||||
|
||||
return ml.stopModel(modelName)
|
||||
}
|
||||
|
||||
func (ml *ModelLoader) stopModel(modelName string) error {
|
||||
defer ml.deleteProcess(modelName)
|
||||
if _, ok := ml.models[modelName]; !ok {
|
||||
_, ok := ml.models[modelName]
|
||||
if !ok {
|
||||
return fmt.Errorf("model %s not found", modelName)
|
||||
}
|
||||
return nil
|
||||
|
||||
retries := 1
|
||||
for ml.models[modelName].GRPC(false, ml.wd).IsBusy() {
|
||||
log.Debug().Msgf("%s busy. Waiting.", modelName)
|
||||
dur := time.Duration(retries*2) * time.Second
|
||||
if dur > retryTimeout {
|
||||
dur = retryTimeout
|
||||
}
|
||||
time.Sleep(dur)
|
||||
retries++
|
||||
}
|
||||
|
||||
return ml.deleteProcess(modelName)
|
||||
}
|
||||
|
||||
func (ml *ModelLoader) CheckIsLoaded(s string) *Model {
|
||||
|
@ -18,15 +18,16 @@ import (
|
||||
|
||||
func (ml *ModelLoader) StopAllExcept(s string) error {
|
||||
return ml.StopGRPC(func(id string, p *process.Process) bool {
|
||||
if id != s {
|
||||
for ml.models[id].GRPC(false, ml.wd).IsBusy() {
|
||||
log.Debug().Msgf("%s busy. Waiting.", id)
|
||||
time.Sleep(2 * time.Second)
|
||||
}
|
||||
log.Debug().Msgf("[single-backend] Stopping %s", id)
|
||||
return true
|
||||
if id == s {
|
||||
return false
|
||||
}
|
||||
return false
|
||||
|
||||
for ml.models[id].GRPC(false, ml.wd).IsBusy() {
|
||||
log.Debug().Msgf("%s busy. Waiting.", id)
|
||||
time.Sleep(2 * time.Second)
|
||||
}
|
||||
log.Debug().Msgf("[single-backend] Stopping %s", id)
|
||||
return true
|
||||
})
|
||||
}
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user