From e591ff2e743dc64e5d76e8e3b4c4b9bb60217bca Mon Sep 17 00:00:00 2001
From: Ettore Di Giacinto <mudler@users.noreply.github.com>
Date: Mon, 1 Jul 2024 22:50:36 +0200
Subject: [PATCH] fix(initializer): do select backends that exist (#2694)

we were not checking if the binary exists before picking these up from
the asset dir.

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
---
 Dockerfile                |  2 ++
 pkg/model/initializers.go | 39 ++++++++++++++++++++++++++++++---------
 2 files changed, 32 insertions(+), 9 deletions(-)

diff --git a/Dockerfile b/Dockerfile
index eb5c9b05..ac42db5d 100644
--- a/Dockerfile
+++ b/Dockerfile
@@ -282,6 +282,8 @@ COPY --from=grpc /opt/grpc /usr/local
 
 # Rebuild with defaults backends
 WORKDIR /build
+
+## Build the binary
 RUN make build
 
 RUN if [ ! -d "/build/sources/go-piper/piper-phonemize/pi/lib/" ]; then \
diff --git a/pkg/model/initializers.go b/pkg/model/initializers.go
index 92b3c0a0..901b4d99 100644
--- a/pkg/model/initializers.go
+++ b/pkg/model/initializers.go
@@ -247,14 +247,23 @@ func selectGRPCProcess(backend, assetDir string, f16 bool) string {
 	}
 
 	if xsysinfo.HasCPUCaps(cpuid.AVX2) {
-		log.Info().Msgf("[%s] attempting to load with AVX2 variant", backend)
-		grpcProcess = backendPath(assetDir, LLamaCPPAVX2)
+		p := backendPath(assetDir, LLamaCPPAVX2)
+		if _, err := os.Stat(p); err == nil {
+			log.Info().Msgf("[%s] attempting to load with AVX2 variant", backend)
+			grpcProcess = p
+		}
 	} else if xsysinfo.HasCPUCaps(cpuid.AVX) {
-		log.Info().Msgf("[%s] attempting to load with AVX variant", backend)
-		grpcProcess = backendPath(assetDir, LLamaCPPAVX)
+		p := backendPath(assetDir, LLamaCPPAVX)
+		if _, err := os.Stat(p); err == nil {
+			log.Info().Msgf("[%s] attempting to load with AVX variant", backend)
+			grpcProcess = p
+		}
 	} else {
-		log.Info().Msgf("[%s] attempting to load with fallback variant", backend)
-		grpcProcess = backendPath(assetDir, LLamaCPPFallback)
+		p := backendPath(assetDir, LLamaCPPFallback)
+		if _, err := os.Stat(p); err == nil {
+			log.Info().Msgf("[%s] attempting to load with fallback variant", backend)
+			grpcProcess = p
+		}
 	}
 
 	return grpcProcess
@@ -511,11 +520,23 @@ func (ml *ModelLoader) GreedyLoader(opts ...Option) (grpc.Backend, error) {
 		}
 
 		if autoDetect && key == LLamaCPP && err != nil {
-			backendToUse := LLamaCPPFallback
+			// try as hard as possible to run the llama.cpp variants
+			backendToUse := ""
 			if xsysinfo.HasCPUCaps(cpuid.AVX2) {
-				backendToUse = LLamaCPPAVX2
+				if _, err := os.Stat(backendPath(o.assetDir, LLamaCPPAVX2)); err == nil {
+					backendToUse = LLamaCPPAVX2
+				}
 			} else if xsysinfo.HasCPUCaps(cpuid.AVX) {
-				backendToUse = LLamaCPPAVX
+				if _, err := os.Stat(backendPath(o.assetDir, LLamaCPPAVX2)); err == nil {
+					backendToUse = LLamaCPPAVX
+				}
+			} else {
+				if _, err := os.Stat(backendPath(o.assetDir, LLamaCPPFallback)); err == nil {
+					backendToUse = LLamaCPPFallback
+				} else {
+					// If we don't have a fallback, just skip fallback
+					continue
+				}
 			}
 
 			// Autodetection failed, try the fallback