feat(llama.cpp/clip): inject gpu options if we detect GPUs (#5243)

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
This commit is contained in:
Ettore Di Giacinto 2025-04-26 00:04:47 +02:00 committed by GitHub
parent cae9bf1308
commit 9628860c0e
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
3 changed files with 41 additions and 44 deletions

View File

@ -4,6 +4,7 @@ import (
"os" "os"
"path/filepath" "path/filepath"
"github.com/mudler/LocalAI/pkg/xsysinfo"
"github.com/rs/zerolog/log" "github.com/rs/zerolog/log"
gguf "github.com/thxcode/gguf-parser-go" gguf "github.com/thxcode/gguf-parser-go"
) )
@ -35,4 +36,10 @@ func guessDefaultsFromFile(cfg *BackendConfig, modelPath string, defaultCtx int)
} }
cfg.ContextSize = &defaultCtx cfg.ContextSize = &defaultCtx
} }
if cfg.Options == nil {
if xsysinfo.HasGPU("nvidia") || xsysinfo.HasGPU("amd") {
cfg.Options = []string{"gpu"}
}
}
} }

View File

@ -181,10 +181,6 @@ func orderBackends(backends map[string][]string) ([]string, error) {
// selectGRPCProcessByHostCapabilities selects the GRPC process to start based on system capabilities // selectGRPCProcessByHostCapabilities selects the GRPC process to start based on system capabilities
// Note: this is now relevant only for llama.cpp // Note: this is now relevant only for llama.cpp
func selectGRPCProcessByHostCapabilities(backend, assetDir string, f16 bool) string { func selectGRPCProcessByHostCapabilities(backend, assetDir string, f16 bool) string {
foundCUDA := false
foundAMDGPU := false
foundIntelGPU := false
var grpcProcess string
// Select backend now just for llama.cpp // Select backend now just for llama.cpp
if backend != LLamaCPP { if backend != LLamaCPP {
@ -198,48 +194,24 @@ func selectGRPCProcessByHostCapabilities(backend, assetDir string, f16 bool) str
} }
// Check for GPU-binaries that are shipped with single binary releases // Check for GPU-binaries that are shipped with single binary releases
gpus, err := xsysinfo.GPUs() gpuBinaries := map[string]string{
if err == nil { "nvidia": LLamaCPPCUDA,
for _, gpu := range gpus { "amd": LLamaCPPHipblas,
if strings.Contains(gpu.String(), "nvidia") { "intel": LLamaCPPSycl16,
p := backendPath(assetDir, LLamaCPPCUDA)
if _, err := os.Stat(p); err == nil {
log.Info().Msgf("[%s] attempting to load with CUDA variant", backend)
grpcProcess = p
foundCUDA = true
} else {
log.Debug().Msgf("Nvidia GPU device found, no embedded CUDA variant found. You can ignore this message if you are using container with CUDA support")
}
}
if strings.Contains(gpu.String(), "amd") {
p := backendPath(assetDir, LLamaCPPHipblas)
if _, err := os.Stat(p); err == nil {
log.Info().Msgf("[%s] attempting to load with HIPBLAS variant", backend)
grpcProcess = p
foundAMDGPU = true
} else {
log.Debug().Msgf("AMD GPU device found, no embedded HIPBLAS variant found. You can ignore this message if you are using container with HIPBLAS support")
}
}
if strings.Contains(gpu.String(), "intel") {
backend := LLamaCPPSycl16
if !f16 {
backend = LLamaCPPSycl32
}
p := backendPath(assetDir, backend)
if _, err := os.Stat(p); err == nil {
log.Info().Msgf("[%s] attempting to load with Intel variant", backend)
grpcProcess = p
foundIntelGPU = true
} else {
log.Debug().Msgf("Intel GPU device found, no embedded SYCL variant found. You can ignore this message if you are using container with SYCL support")
}
}
}
} }
if foundCUDA || foundAMDGPU || foundIntelGPU { if !f16 {
return grpcProcess gpuBinaries["intel"] = LLamaCPPSycl32
}
for vendor, binary := range gpuBinaries {
if xsysinfo.HasGPU(vendor) {
p := backendPath(assetDir, binary)
if _, err := os.Stat(p); err == nil {
log.Info().Msgf("[%s] attempting to load with %s variant (vendor: %s)", backend, binary, vendor)
return p
}
}
} }
// No GPU found or no specific binaries found, try to load the CPU variant(s) // No GPU found or no specific binaries found, try to load the CPU variant(s)

View File

@ -1,6 +1,8 @@
package xsysinfo package xsysinfo
import ( import (
"strings"
"github.com/jaypipes/ghw" "github.com/jaypipes/ghw"
"github.com/jaypipes/ghw/pkg/gpu" "github.com/jaypipes/ghw/pkg/gpu"
) )
@ -13,3 +15,19 @@ func GPUs() ([]*gpu.GraphicsCard, error) {
return gpu.GraphicsCards, nil return gpu.GraphicsCards, nil
} }
func HasGPU(vendor string) bool {
gpus, err := GPUs()
if err != nil {
return false
}
if vendor == "" {
return len(gpus) > 0
}
for _, gpu := range gpus {
if strings.Contains(gpu.String(), vendor) {
return true
}
}
return false
}