feat(loader): enhance single active backend by treating as singleton (#5107)

feat(loader): enhance single active backend by treating at singleton Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2025-06-18 15:08:08 +00:00 · 2025-04-01 20:58:11 +02:00
parent c59975ab05
commit 2c425e9c69
24 changed files with 92 additions and 71 deletions
--- a/pkg/model/initializers.go
+++ b/pkg/model/initializers.go
@ -509,7 +509,23 @@ func (ml *ModelLoader) stopActiveBackends(modelID string, singleActiveBackend bo
 	}
 }

+func (ml *ModelLoader) Close() {
+	if !ml.singletonMode {
+		return
+	}
+	ml.singletonLock.Unlock()
+}
+
+func (ml *ModelLoader) lockBackend() {
+	if !ml.singletonMode {
+		return
+	}
+	ml.singletonLock.Lock()
+}
+
 func (ml *ModelLoader) Load(opts ...Option) (grpc.Backend, error) {
+	ml.lockBackend() // grab the singleton lock if needed
+
 	o := NewOptions(opts...)

 	// Return earlier if we have a model already loaded
@ -520,7 +536,7 @@ func (ml *ModelLoader) Load(opts ...Option) (grpc.Backend, error) {
 		return m.GRPC(o.parallelRequests, ml.wd), nil
 	}

-	ml.stopActiveBackends(o.modelID, o.singleActiveBackend)
+	ml.stopActiveBackends(o.modelID, ml.singletonMode)

 	// if a backend is defined, return the loader directly
 	if o.backendString != "" {
@ -533,6 +549,7 @@ func (ml *ModelLoader) Load(opts ...Option) (grpc.Backend, error) {
 	// get backends embedded in the binary
 	autoLoadBackends, err := ml.ListAvailableBackends(o.assetDir)
 	if err != nil {
+		ml.Close() // we failed, release the lock
 		return nil, err
 	}

@ -564,5 +581,7 @@ func (ml *ModelLoader) Load(opts ...Option) (grpc.Backend, error) {
 		}
 	}

+	ml.Close() // make sure to release the lock in case of failure
+
 	return nil, fmt.Errorf("could not load model - all backends returned error: %s", err.Error())
 }