fix(initializer): do select backends that exist (#2694)

we were not checking if the binary exists before picking these up from the asset dir. Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2024-12-19 20:57:54 +00:00 · 2024-07-01 22:50:36 +02:00 · 2024-07-01 22:50:36 +02:00 · e591ff2e74
commit e591ff2e74
parent bd2f95c130
2 changed files with 32 additions and 9 deletions
--- a/2
+++ b/2
@ -282,6 +282,8 @@ COPY --from=grpc /opt/grpc /usr/local
 # Rebuild with defaults backends
 WORKDIR /build
 ## Build the binary
 RUN make build
 RUN if [ ! -d "/build/sources/go-piper/piper-phonemize/pi/lib/" ]; then \
--- a/pkg/model/initializers.go
+++ b/pkg/model/initializers.go
@ -247,14 +247,23 @@ func selectGRPCProcess(backend, assetDir string, f16 bool) string {
 	}
 	if xsysinfo.HasCPUCaps(cpuid.AVX2) {
-		log.Info().Msgf("[%s] attempting to load with AVX2 variant", backend)
+		p := backendPath(assetDir, LLamaCPPAVX2)
-		grpcProcess = backendPath(assetDir, LLamaCPPAVX2)
+		if _, err := os.Stat(p); err == nil {
 			log.Info().Msgf("[%s] attempting to load with AVX2 variant", backend)
 			grpcProcess = p
 		}
 	} else if xsysinfo.HasCPUCaps(cpuid.AVX) {
-		log.Info().Msgf("[%s] attempting to load with AVX variant", backend)
+		p := backendPath(assetDir, LLamaCPPAVX)
-		grpcProcess = backendPath(assetDir, LLamaCPPAVX)
+		if _, err := os.Stat(p); err == nil {
 			log.Info().Msgf("[%s] attempting to load with AVX variant", backend)
 			grpcProcess = p
 		}
 	} else {
-		log.Info().Msgf("[%s] attempting to load with fallback variant", backend)
+		p := backendPath(assetDir, LLamaCPPFallback)
-		grpcProcess = backendPath(assetDir, LLamaCPPFallback)
+		if _, err := os.Stat(p); err == nil {
 			log.Info().Msgf("[%s] attempting to load with fallback variant", backend)
 			grpcProcess = p
 		}
 	}
 	return grpcProcess
@ -511,11 +520,23 @@ func (ml *ModelLoader) GreedyLoader(opts ...Option) (grpc.Backend, error) {
 		}
 		if autoDetect && key == LLamaCPP && err != nil {
-			backendToUse := LLamaCPPFallback
+			// try as hard as possible to run the llama.cpp variants
 			backendToUse := ""
 			if xsysinfo.HasCPUCaps(cpuid.AVX2) {
-				backendToUse = LLamaCPPAVX2
+				if _, err := os.Stat(backendPath(o.assetDir, LLamaCPPAVX2)); err == nil {
 					backendToUse = LLamaCPPAVX2
 				}
 			} else if xsysinfo.HasCPUCaps(cpuid.AVX) {
-				backendToUse = LLamaCPPAVX
+				if _, err := os.Stat(backendPath(o.assetDir, LLamaCPPAVX2)); err == nil {
 					backendToUse = LLamaCPPAVX
 				}
 			} else {
 				if _, err := os.Stat(backendPath(o.assetDir, LLamaCPPFallback)); err == nil {
 					backendToUse = LLamaCPPFallback
 				} else {
 					// If we don't have a fallback, just skip fallback
 					continue
 				}
 			}
 			// Autodetection failed, try the fallback