diff --git a/core/config/guesser.go b/core/config/guesser.go index b7fb23de..e66df70d 100644 --- a/core/config/guesser.go +++ b/core/config/guesser.go @@ -4,6 +4,7 @@ import ( "os" "path/filepath" + "github.com/mudler/LocalAI/pkg/xsysinfo" "github.com/rs/zerolog/log" gguf "github.com/thxcode/gguf-parser-go" ) @@ -35,4 +36,10 @@ func guessDefaultsFromFile(cfg *BackendConfig, modelPath string, defaultCtx int) } cfg.ContextSize = &defaultCtx } + + if cfg.Options == nil { + if xsysinfo.HasGPU("nvidia") || xsysinfo.HasGPU("amd") { + cfg.Options = []string{"gpu"} + } + } } diff --git a/pkg/model/initializers.go b/pkg/model/initializers.go index 1a7fdc9c..a0d0d5fc 100644 --- a/pkg/model/initializers.go +++ b/pkg/model/initializers.go @@ -181,10 +181,6 @@ func orderBackends(backends map[string][]string) ([]string, error) { // selectGRPCProcessByHostCapabilities selects the GRPC process to start based on system capabilities // Note: this is now relevant only for llama.cpp func selectGRPCProcessByHostCapabilities(backend, assetDir string, f16 bool) string { - foundCUDA := false - foundAMDGPU := false - foundIntelGPU := false - var grpcProcess string // Select backend now just for llama.cpp if backend != LLamaCPP { @@ -198,48 +194,24 @@ func selectGRPCProcessByHostCapabilities(backend, assetDir string, f16 bool) str } // Check for GPU-binaries that are shipped with single binary releases - gpus, err := xsysinfo.GPUs() - if err == nil { - for _, gpu := range gpus { - if strings.Contains(gpu.String(), "nvidia") { - p := backendPath(assetDir, LLamaCPPCUDA) - if _, err := os.Stat(p); err == nil { - log.Info().Msgf("[%s] attempting to load with CUDA variant", backend) - grpcProcess = p - foundCUDA = true - } else { - log.Debug().Msgf("Nvidia GPU device found, no embedded CUDA variant found. You can ignore this message if you are using container with CUDA support") - } - } - if strings.Contains(gpu.String(), "amd") { - p := backendPath(assetDir, LLamaCPPHipblas) - if _, err := os.Stat(p); err == nil { - log.Info().Msgf("[%s] attempting to load with HIPBLAS variant", backend) - grpcProcess = p - foundAMDGPU = true - } else { - log.Debug().Msgf("AMD GPU device found, no embedded HIPBLAS variant found. You can ignore this message if you are using container with HIPBLAS support") - } - } - if strings.Contains(gpu.String(), "intel") { - backend := LLamaCPPSycl16 - if !f16 { - backend = LLamaCPPSycl32 - } - p := backendPath(assetDir, backend) - if _, err := os.Stat(p); err == nil { - log.Info().Msgf("[%s] attempting to load with Intel variant", backend) - grpcProcess = p - foundIntelGPU = true - } else { - log.Debug().Msgf("Intel GPU device found, no embedded SYCL variant found. You can ignore this message if you are using container with SYCL support") - } - } - } + gpuBinaries := map[string]string{ + "nvidia": LLamaCPPCUDA, + "amd": LLamaCPPHipblas, + "intel": LLamaCPPSycl16, } - if foundCUDA || foundAMDGPU || foundIntelGPU { - return grpcProcess + if !f16 { + gpuBinaries["intel"] = LLamaCPPSycl32 + } + + for vendor, binary := range gpuBinaries { + if xsysinfo.HasGPU(vendor) { + p := backendPath(assetDir, binary) + if _, err := os.Stat(p); err == nil { + log.Info().Msgf("[%s] attempting to load with %s variant (vendor: %s)", backend, binary, vendor) + return p + } + } } // No GPU found or no specific binaries found, try to load the CPU variant(s) diff --git a/pkg/xsysinfo/gpu.go b/pkg/xsysinfo/gpu.go index b6321e81..a692c775 100644 --- a/pkg/xsysinfo/gpu.go +++ b/pkg/xsysinfo/gpu.go @@ -1,6 +1,8 @@ package xsysinfo import ( + "strings" + "github.com/jaypipes/ghw" "github.com/jaypipes/ghw/pkg/gpu" ) @@ -13,3 +15,19 @@ func GPUs() ([]*gpu.GraphicsCard, error) { return gpu.GraphicsCards, nil } + +func HasGPU(vendor string) bool { + gpus, err := GPUs() + if err != nil { + return false + } + if vendor == "" { + return len(gpus) > 0 + } + for _, gpu := range gpus { + if strings.Contains(gpu.String(), vendor) { + return true + } + } + return false +}