feat(llama.cpp/clip): inject gpu options if we detect GPUs (#5243)

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2025-05-09 20:13:17 +00:00 · 2025-04-26 00:04:47 +02:00 · 2025-04-26 00:04:47 +02:00 · 9628860c0e
commit 9628860c0e
parent cae9bf1308
3 changed files with 41 additions and 44 deletions
--- a/core/config/guesser.go
+++ b/core/config/guesser.go
@ -4,6 +4,7 @@ import (
 	"os"
 	"path/filepath"
 	"github.com/mudler/LocalAI/pkg/xsysinfo"
 	"github.com/rs/zerolog/log"
 	gguf "github.com/thxcode/gguf-parser-go"
 )
@ -35,4 +36,10 @@ func guessDefaultsFromFile(cfg *BackendConfig, modelPath string, defaultCtx int)
 		}
 		cfg.ContextSize = &defaultCtx
 	}
 	if cfg.Options == nil {
 		if xsysinfo.HasGPU("nvidia") || xsysinfo.HasGPU("amd") {
 			cfg.Options = []string{"gpu"}
 		}
 	}
 }
--- a/pkg/model/initializers.go
+++ b/pkg/model/initializers.go
@ -181,10 +181,6 @@ func orderBackends(backends map[string][]string) ([]string, error) {
 // selectGRPCProcessByHostCapabilities selects the GRPC process to start based on system capabilities
 // Note: this is now relevant only for llama.cpp
 func selectGRPCProcessByHostCapabilities(backend, assetDir string, f16 bool) string {
 	foundCUDA := false
 	foundAMDGPU := false
 	foundIntelGPU := false
 	var grpcProcess string
 	// Select backend now just for llama.cpp
 	if backend != LLamaCPP {
@ -198,48 +194,24 @@ func selectGRPCProcessByHostCapabilities(backend, assetDir string, f16 bool) str
 	}
 	// Check for GPU-binaries that are shipped with single binary releases
-	gpus, err := xsysinfo.GPUs()
+	gpuBinaries := map[string]string{
-	if err == nil {
+		"nvidia": LLamaCPPCUDA,
-		for _, gpu := range gpus {
+		"amd":    LLamaCPPHipblas,
-			if strings.Contains(gpu.String(), "nvidia") {
+		"intel":  LLamaCPPSycl16,
 				p := backendPath(assetDir, LLamaCPPCUDA)
 				if _, err := os.Stat(p); err == nil {
 					log.Info().Msgf("[%s] attempting to load with CUDA variant", backend)
 					grpcProcess = p
 					foundCUDA = true
 				} else {
 					log.Debug().Msgf("Nvidia GPU device found, no embedded CUDA variant found. You can ignore this message if you are using container with CUDA support")
 				}
 			}
 			if strings.Contains(gpu.String(), "amd") {
 				p := backendPath(assetDir, LLamaCPPHipblas)
 				if _, err := os.Stat(p); err == nil {
 					log.Info().Msgf("[%s] attempting to load with HIPBLAS variant", backend)
 					grpcProcess = p
 					foundAMDGPU = true
 				} else {
 					log.Debug().Msgf("AMD GPU device found, no embedded HIPBLAS variant found. You can ignore this message if you are using container with HIPBLAS support")
 				}
 			}
 			if strings.Contains(gpu.String(), "intel") {
 				backend := LLamaCPPSycl16
 				if !f16 {
 					backend = LLamaCPPSycl32
 				}
 				p := backendPath(assetDir, backend)
 				if _, err := os.Stat(p); err == nil {
 					log.Info().Msgf("[%s] attempting to load with Intel variant", backend)
 					grpcProcess = p
 					foundIntelGPU = true
 				} else {
 					log.Debug().Msgf("Intel GPU device found, no embedded SYCL variant found. You can ignore this message if you are using container with SYCL support")
 				}
 			}
 		}
 	}
-	if foundCUDA || foundAMDGPU || foundIntelGPU {
+	if !f16 {
-		return grpcProcess
+		gpuBinaries["intel"] = LLamaCPPSycl32
 	}
 	for vendor, binary := range gpuBinaries {
 		if xsysinfo.HasGPU(vendor) {
 			p := backendPath(assetDir, binary)
 			if _, err := os.Stat(p); err == nil {
 				log.Info().Msgf("[%s] attempting to load with %s variant (vendor: %s)", backend, binary, vendor)
 				return p
 			}
 		}
 	}
 	// No GPU found or no specific binaries found, try to load the CPU variant(s)
--- a/pkg/xsysinfo/gpu.go
+++ b/pkg/xsysinfo/gpu.go
@ -1,6 +1,8 @@
 package xsysinfo
 import (
 	"strings"
 	"github.com/jaypipes/ghw"
 	"github.com/jaypipes/ghw/pkg/gpu"
 )
@ -13,3 +15,19 @@ func GPUs() ([]*gpu.GraphicsCard, error) {
 	return gpu.GraphicsCards, nil
 }
 func HasGPU(vendor string) bool {
 	gpus, err := GPUs()
 	if err != nil {
 		return false
 	}
 	if vendor == "" {
 		return len(gpus) > 0
 	}
 	for _, gpu := range gpus {
 		if strings.Contains(gpu.String(), vendor) {
 			return true
 		}
 	}
 	return false
 }