fix(shutdown): do not shutdown immediately busy backends (#3543)

* fix(shutdown): do not shutdown immediately busy backends Signed-off-by: Ettore Di Giacinto <mudler@localai.io> * chore(refactor): avoid duplicate functions Signed-off-by: Ettore Di Giacinto <mudler@localai.io> * fix: multiplicative backoff for shutdown (#3547) * multiplicative backoff for shutdown Rather than always retry every two seconds, back off the shutdown attempt rate? Signed-off-by: Dave <dave@gray101.com> * Update loader.go Signed-off-by: Dave <dave@gray101.com> * add clamp of 2 minutes Signed-off-by: Dave Lee <dave@gray101.com> --------- Signed-off-by: Dave <dave@gray101.com> Signed-off-by: Dave Lee <dave@gray101.com> --------- Signed-off-by: Ettore Di Giacinto <mudler@localai.io> Signed-off-by: Dave <dave@gray101.com> Signed-off-by: Dave Lee <dave@gray101.com> Co-authored-by: Dave <dave@gray101.com>
2025-05-28 21:14:15 +00:00 · 2024-09-17 06:50:57 +02:00 · 2024-09-17 06:50:57 +02:00 · d0f2bf3181
commit d0f2bf3181
parent 0e4e101101
2 changed files with 26 additions and 15 deletions
--- a/pkg/model/loader.go
+++ b/pkg/model/loader.go
@ -69,6 +69,8 @@ var knownModelsNameSuffixToSkip []string = []string{
 	".tar.gz",
 }

+const retryTimeout = time.Duration(2 * time.Minute)
+
 func (ml *ModelLoader) ListFilesInModelPath() ([]string, error) {
 	files, err := os.ReadDir(ml.ModelPath)
 	if err != nil {
@ -146,15 +148,23 @@ func (ml *ModelLoader) ShutdownModel(modelName string) error {
 	ml.mu.Lock()
 	defer ml.mu.Unlock()

-	return ml.stopModel(modelName)
-}
-
-func (ml *ModelLoader) stopModel(modelName string) error {
-	defer ml.deleteProcess(modelName)
-	if _, ok := ml.models[modelName]; !ok {
+	_, ok := ml.models[modelName]
+	if !ok {
 		return fmt.Errorf("model %s not found", modelName)
 	}
-	return nil
+
+	retries := 1
+	for ml.models[modelName].GRPC(false, ml.wd).IsBusy() {
+		log.Debug().Msgf("%s busy. Waiting.", modelName)
+		dur := time.Duration(retries*2) * time.Second
+		if dur > retryTimeout {
+			dur = retryTimeout
+		}
+		time.Sleep(dur)
+		retries++
+	}
+
+	return ml.deleteProcess(modelName)
 }

 func (ml *ModelLoader) CheckIsLoaded(s string) *Model {
--- a/pkg/model/process.go
+++ b/pkg/model/process.go
@ -18,15 +18,16 @@ import (

 func (ml *ModelLoader) StopAllExcept(s string) error {
 	return ml.StopGRPC(func(id string, p *process.Process) bool {
-		if id != s {
-			for ml.models[id].GRPC(false, ml.wd).IsBusy() {
-				log.Debug().Msgf("%s busy. Waiting.", id)
-				time.Sleep(2 * time.Second)
-			}
-			log.Debug().Msgf("[single-backend] Stopping %s", id)
-			return true
+		if id == s {
+			return false
 		}
-		return false
+
+		for ml.models[id].GRPC(false, ml.wd).IsBusy() {
+			log.Debug().Msgf("%s busy. Waiting.", id)
+			time.Sleep(2 * time.Second)
+		}
+		log.Debug().Msgf("[single-backend] Stopping %s", id)
+		return true
 	})
 }