mirror of
https://github.com/mudler/LocalAI.git
synced 2025-01-18 10:46:46 +00:00
fix(llama.cpp): consider also native builds (#3839)
This is in order to identify also builds which are not using alternatives based on capabilities. For instance, there are cases when we build the backend only natively in the host. Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
This commit is contained in:
parent
97cf028175
commit
b82577d642
@ -251,8 +251,22 @@ func selectGRPCProcessByHostCapabilities(backend, assetDir string, f16 bool) str
|
||||
|
||||
// No GPU found or no specific binaries found, try to load the CPU variant(s)
|
||||
|
||||
// Select the Fallback by default
|
||||
selectedProcess := backendPath(assetDir, LLamaCPPFallback)
|
||||
// Select a binary based on availability/capability
|
||||
selectedProcess := ""
|
||||
|
||||
// Check if we have a native build (llama-cpp) and use that
|
||||
if _, err := os.Stat(backendPath(assetDir, LLamaCPPFallback)); err == nil {
|
||||
log.Debug().Msgf("[%s] %s variant available", LLamaCPPFallback, backend)
|
||||
selectedProcess = backendPath(assetDir, LLamaCPPFallback)
|
||||
}
|
||||
|
||||
// Check if we have a native build (llama-cpp) and use that instead
|
||||
// As a reminder, we do ultimately attempt again with the fallback variant
|
||||
// If things fail with what we select here
|
||||
if _, err := os.Stat(backendPath(assetDir, LLamaCPP)); err == nil {
|
||||
log.Debug().Msgf("[%s] attempting to load with native variant", backend)
|
||||
selectedProcess = backendPath(assetDir, LLamaCPP)
|
||||
}
|
||||
|
||||
// IF we find any optimized binary, we use that
|
||||
if xsysinfo.HasCPUCaps(cpuid.AVX2) {
|
||||
@ -269,7 +283,7 @@ func selectGRPCProcessByHostCapabilities(backend, assetDir string, f16 bool) str
|
||||
}
|
||||
}
|
||||
|
||||
// Check if the binary exists!
|
||||
// Safety measure: check if the binary exists otherwise return empty string
|
||||
if _, err := os.Stat(selectedProcess); err == nil {
|
||||
return selectedProcess
|
||||
}
|
||||
@ -277,6 +291,21 @@ func selectGRPCProcessByHostCapabilities(backend, assetDir string, f16 bool) str
|
||||
return ""
|
||||
}
|
||||
|
||||
func attemptLoadingOnFailure(backend string, ml *ModelLoader, o *Options, err error) (*Model, error) {
|
||||
// XXX: This is too backend specific(llama-cpp), remove this bit or generalize further
|
||||
// We failed somehow starting the binary. For instance, could be that we are missing
|
||||
// some libraries if running in binary-only mode.
|
||||
// In this case, we attempt to load the model with the fallback variant.
|
||||
|
||||
// If not llama-cpp backend, return the error immediately
|
||||
if backend != LLamaCPP {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
log.Error().Msgf("[%s] Failed loading model, trying with fallback '%s', error: %s", backend, LLamaCPPFallback, err.Error())
|
||||
return ml.LoadModel(o.modelID, o.model, ml.grpcModel(LLamaCPPFallback, false, o))
|
||||
}
|
||||
|
||||
// starts the grpcModelProcess for the backend, and returns a grpc client
|
||||
// It also loads the model
|
||||
func (ml *ModelLoader) grpcModel(backend string, autodetect bool, o *Options) func(string, string, string) (*Model, error) {
|
||||
@ -450,19 +479,7 @@ func (ml *ModelLoader) BackendLoader(opts ...Option) (client grpc.Backend, err e
|
||||
|
||||
model, err := ml.LoadModel(o.modelID, o.model, ml.grpcModel(backendToConsume, AutoDetect, o))
|
||||
if err != nil {
|
||||
// XXX: This is too backend specific(llama-cpp), remove this bit or generalize further
|
||||
// We failed somehow starting the binary. For instance, could be that we are missing
|
||||
// some libraries if running in binary-only mode.
|
||||
// In this case, we attempt to load the model with the fallback variant.
|
||||
|
||||
// If not llama-cpp backend, return error immediately
|
||||
if backend != LLamaCPP {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
// Otherwise attempt with fallback
|
||||
log.Error().Msgf("[%s] Failed loading model, trying with fallback '%s'", backend, LLamaCPPFallback)
|
||||
model, err = ml.LoadModel(o.modelID, o.model, ml.grpcModel(LLamaCPPFallback, false, o))
|
||||
model, err = attemptLoadingOnFailure(backend, ml, o, err)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
Loading…
Reference in New Issue
Block a user