feat(backend): fallback with autodetect (#2693)

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
This commit is contained in:
Ettore Di Giacinto 2024-07-01 18:11:04 +02:00 committed by GitHub
parent ad85c5a1e7
commit bd2f95c130
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194

View File

@ -509,6 +509,27 @@ func (ml *ModelLoader) GreedyLoader(opts ...Option) (grpc.Backend, error) {
err = errors.Join(err, fmt.Errorf("backend %s returned no usable model", key))
log.Info().Msgf("[%s] Fails: %s", key, "backend returned no usable model")
}
if autoDetect && key == LLamaCPP && err != nil {
backendToUse := LLamaCPPFallback
if xsysinfo.HasCPUCaps(cpuid.AVX2) {
backendToUse = LLamaCPPAVX2
} else if xsysinfo.HasCPUCaps(cpuid.AVX) {
backendToUse = LLamaCPPAVX
}
// Autodetection failed, try the fallback
log.Info().Msgf("[%s] Autodetection failed, trying the fallback", key)
options = append(options, WithBackendString(backendToUse))
model, modelerr = ml.BackendLoader(options...)
if modelerr == nil && model != nil {
log.Info().Msgf("[%s] Loads OK", key)
return model, nil
} else {
err = errors.Join(err, fmt.Errorf("[%s]: %w", key, modelerr))
log.Info().Msgf("[%s] Fails: %s", key, modelerr.Error())
}
}
}
return nil, fmt.Errorf("could not load model - all backends returned error: %s", err.Error())