From c89271b2e45712f0b2ec4c6fa11e0c301ca0002d Mon Sep 17 00:00:00 2001 From: Ettore Di Giacinto Date: Wed, 15 May 2024 01:17:02 +0200 Subject: [PATCH] feat(llama.cpp): add distributed llama.cpp inferencing (#2324) * feat(llama.cpp): support distributed llama.cpp Signed-off-by: Ettore Di Giacinto * feat: let tweak how chat messages are merged together Signed-off-by: Ettore Di Giacinto * refactor Signed-off-by: Ettore Di Giacinto * Makefile: register to ALL_GRPC_BACKENDS Signed-off-by: Ettore Di Giacinto * refactoring, allow disable auto-detection of backends Signed-off-by: Ettore Di Giacinto * minor fixups Signed-off-by: mudler * feat: add cmd to start rpc-server from llama.cpp Signed-off-by: mudler * ci: add ccache Signed-off-by: mudler --------- Signed-off-by: Ettore Di Giacinto Signed-off-by: mudler --- .env | 5 + .github/workflows/release.yaml | 4 +- Dockerfile | 1 + Makefile | 17 ++- backend/cpp/llama/grpc-server.cpp | 6 + core/cli/cli.go | 9 +- core/cli/llamacppworker.go | 37 ++++++ core/config/backend_config.go | 33 +++++- core/http/endpoints/openai/chat.go | 7 +- pkg/assets/extract.go | 6 +- pkg/model/initializers.go | 179 ++++++++++++++++++----------- 11 files changed, 222 insertions(+), 82 deletions(-) create mode 100644 core/cli/llamacppworker.go diff --git a/.env b/.env index ea2d4e35..95a515bc 100644 --- a/.env +++ b/.env @@ -71,6 +71,11 @@ ### Define the number of parallel LLAMA.cpp workers (Defaults to 1) # LLAMACPP_PARALLEL=1 +### Define a list of GRPC Servers for llama-cpp workers to distribute the load +# https://github.com/ggerganov/llama.cpp/pull/6829 +# https://github.com/ggerganov/llama.cpp/blob/master/examples/rpc/README.md +# LLAMACPP_GRPC_SERVERS="" + ### Enable to run parallel requests # LOCALAI_PARALLEL_REQUESTS=true diff --git a/.github/workflows/release.yaml b/.github/workflows/release.yaml index 30b6d950..0245725d 100644 --- a/.github/workflows/release.yaml +++ b/.github/workflows/release.yaml @@ -29,7 +29,7 @@ jobs: - name: Dependencies run: | sudo apt-get update - sudo apt-get install build-essential ffmpeg protobuf-compiler + sudo apt-get install build-essential ffmpeg protobuf-compiler ccache - name: Install CUDA Dependencies run: | curl -O https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2204/x86_64/cuda-keyring_1.1-1_all.deb @@ -86,7 +86,7 @@ jobs: cache: false - name: Dependencies run: | - sudo apt-get install -y --no-install-recommends libopencv-dev protobuf-compiler + sudo apt-get install -y --no-install-recommends libopencv-dev protobuf-compiler ccache go install google.golang.org/grpc/cmd/protoc-gen-go-grpc@latest go install google.golang.org/protobuf/cmd/protoc-gen-go@latest - name: Build stablediffusion diff --git a/Dockerfile b/Dockerfile index 9680ba5c..9cde257c 100644 --- a/Dockerfile +++ b/Dockerfile @@ -19,6 +19,7 @@ ARG GO_TAGS="stablediffusion tinydream tts" RUN apt-get update && \ apt-get install -y --no-install-recommends \ build-essential \ + ccache \ ca-certificates \ cmake \ curl \ diff --git a/Makefile b/Makefile index a42995f2..bd5c97f3 100644 --- a/Makefile +++ b/Makefile @@ -5,7 +5,7 @@ BINARY_NAME=local-ai # llama.cpp versions GOLLAMA_STABLE_VERSION?=2b57a8ae43e4699d3dc5d1496a1ccd42922993be -CPPLLAMA_VERSION?=dc685be46622a8fabfd57cfa804237c8f15679b8 +CPPLLAMA_VERSION?=4f0263633b40e94e8b69fd6e7e4395cfedfd5c12 # gpt4all version GPT4ALL_REPO?=https://github.com/nomic-ai/gpt4all @@ -158,6 +158,8 @@ ALL_GRPC_BACKENDS+=backend-assets/grpc/llama-cpp-avx ALL_GRPC_BACKENDS+=backend-assets/grpc/llama-cpp-avx2 ALL_GRPC_BACKENDS+=backend-assets/grpc/llama-cpp-fallback ALL_GRPC_BACKENDS+=backend-assets/grpc/llama-ggml +ALL_GRPC_BACKENDS+=backend-assets/grpc/llama-cpp-grpc +ALL_GRPC_BACKENDS+=backend-assets/util/llama-cpp-rpc-server ALL_GRPC_BACKENDS+=backend-assets/grpc/gpt4all ALL_GRPC_BACKENDS+=backend-assets/grpc/rwkv ALL_GRPC_BACKENDS+=backend-assets/grpc/whisper @@ -314,7 +316,7 @@ build: prepare backend-assets grpcs ## Build the project CGO_LDFLAGS="$(CGO_LDFLAGS)" $(GOCMD) build -ldflags "$(LD_FLAGS)" -tags "$(GO_TAGS)" -o $(BINARY_NAME) ./ build-minimal: - BUILD_GRPC_FOR_BACKEND_LLAMA=true GRPC_BACKENDS="backend-assets/grpc/llama-cpp" GO_TAGS=none $(MAKE) build + BUILD_GRPC_FOR_BACKEND_LLAMA=true GRPC_BACKENDS="backend-assets/grpc/llama-cpp-avx2" GO_TAGS=none $(MAKE) build build-api: BUILD_GRPC_FOR_BACKEND_LLAMA=true BUILD_API_ONLY=true GO_TAGS=none $(MAKE) build @@ -691,6 +693,17 @@ backend-assets/grpc/llama-cpp-cuda: backend-assets/grpc CMAKE_ARGS="$(CMAKE_ARGS) -DLLAMA_AVX=on -DLLAMA_AVX2=off -DLLAMA_AVX512=off -DLLAMA_FMA=off -DLLAMA_F16C=off -DLLAMA_CUDA=ON" $(MAKE) VARIANT="llama-cuda" build-llama-cpp-grpc-server cp -rfv backend/cpp/llama-cuda/grpc-server backend-assets/grpc/llama-cpp-cuda +backend-assets/grpc/llama-cpp-grpc: backend-assets/grpc + cp -rf backend/cpp/llama backend/cpp/llama-grpc + $(MAKE) -C backend/cpp/llama-grpc purge + $(info ${GREEN}I llama-cpp build info:grpc${RESET}) + CMAKE_ARGS="$(CMAKE_ARGS) -DLLAMA_RPC=ON -DLLAMA_AVX=off -DLLAMA_AVX2=off -DLLAMA_AVX512=off -DLLAMA_FMA=off -DLLAMA_F16C=off" $(MAKE) VARIANT="llama-grpc" build-llama-cpp-grpc-server + cp -rfv backend/cpp/llama-grpc/grpc-server backend-assets/grpc/llama-cpp-grpc + +backend-assets/util/llama-cpp-rpc-server: backend-assets/grpc/llama-cpp-grpc + mkdir -p backend-assets/util/ + cp -rf backend/cpp/llama-grpc/llama.cpp/build/bin/rpc-server backend-assets/util/llama-cpp-rpc-server + backend-assets/grpc/llama-ggml: sources/go-llama.cpp sources/go-llama.cpp/libbinding.a backend-assets/grpc CGO_LDFLAGS="$(CGO_LDFLAGS)" C_INCLUDE_PATH=$(CURDIR)/sources/go-llama.cpp LIBRARY_PATH=$(CURDIR)/sources/go-llama.cpp \ $(GOCMD) build -ldflags "$(LD_FLAGS)" -tags "$(GO_TAGS)" -o backend-assets/grpc/llama-ggml ./backend/go/llm/llama-ggml/ diff --git a/backend/cpp/llama/grpc-server.cpp b/backend/cpp/llama/grpc-server.cpp index f9673b33..fb1e1388 100644 --- a/backend/cpp/llama/grpc-server.cpp +++ b/backend/cpp/llama/grpc-server.cpp @@ -2217,6 +2217,12 @@ static void params_parse(const backend::ModelOptions* request, } else { params.n_parallel = 1; } + + const char *llama_grpc_servers = std::getenv("LLAMACPP_GRPC_SERVERS"); + if (llama_grpc_servers != NULL) { + params.rpc_servers = std::string(llama_grpc_servers); + } + // TODO: Add yarn if (!request->tensorsplit().empty()) { diff --git a/core/cli/cli.go b/core/cli/cli.go index 2f2dcd8b..71f877b8 100644 --- a/core/cli/cli.go +++ b/core/cli/cli.go @@ -13,8 +13,9 @@ type Context struct { var CLI struct { Context `embed:""` - Run RunCMD `cmd:"" help:"Run LocalAI, this the default command if no other command is specified. Run 'local-ai run --help' for more information" default:"withargs"` - Models ModelsCMD `cmd:"" help:"Manage LocalAI models and definitions"` - TTS TTSCMD `cmd:"" help:"Convert text to speech"` - Transcript TranscriptCMD `cmd:"" help:"Convert audio to text"` + Run RunCMD `cmd:"" help:"Run LocalAI, this the default command if no other command is specified. Run 'local-ai run --help' for more information" default:"withargs"` + Models ModelsCMD `cmd:"" help:"Manage LocalAI models and definitions"` + TTS TTSCMD `cmd:"" help:"Convert text to speech"` + Transcript TranscriptCMD `cmd:"" help:"Convert audio to text"` + LLAMACPPWorker LLAMACPPWorkerCMD `cmd:"" help:"Run workers to distribute workload (llama.cpp-only)"` } diff --git a/core/cli/llamacppworker.go b/core/cli/llamacppworker.go new file mode 100644 index 00000000..832b5bde --- /dev/null +++ b/core/cli/llamacppworker.go @@ -0,0 +1,37 @@ +package cli + +import ( + "os" + "syscall" + + "github.com/go-skynet/LocalAI/pkg/assets" + "github.com/rs/zerolog/log" +) + +type LLAMACPPWorkerCMD struct { + Args []string `arg:"" optional:"" name:"models" help:"Worker arguments: host port"` + BackendAssetsPath string `env:"LOCALAI_BACKEND_ASSETS_PATH,BACKEND_ASSETS_PATH" type:"path" default:"/tmp/localai/backend_data" help:"Path used to extract libraries that are required by some of the backends in runtime" group:"storage"` +} + +func (r *LLAMACPPWorkerCMD) Run(ctx *Context) error { + // Extract files from the embedded FS + err := assets.ExtractFiles(ctx.BackendAssets, r.BackendAssetsPath) + log.Debug().Msgf("Extracting backend assets files to %s", r.BackendAssetsPath) + if err != nil { + log.Warn().Msgf("Failed extracting backend assets files: %s (might be required for some backends to work properly, like gpt4all)", err) + } + + return syscall.Exec( + assets.ResolvePath( + r.BackendAssetsPath, + "util", + "llama-cpp-rpc-server", + ), + append([]string{ + assets.ResolvePath( + r.BackendAssetsPath, + "util", + "llama-cpp-rpc-server", + )}, r.Args...), + os.Environ()) +} diff --git a/core/config/backend_config.go b/core/config/backend_config.go index 41c792fb..6b9aa54e 100644 --- a/core/config/backend_config.go +++ b/core/config/backend_config.go @@ -93,6 +93,8 @@ type Diffusers struct { ControlNet string `yaml:"control_net"` } +// LLMConfig is a struct that holds the configuration that are +// generic for most of the LLM backends. type LLMConfig struct { SystemPrompt string `yaml:"system_prompt"` TensorSplit string `yaml:"tensor_split"` @@ -144,6 +146,7 @@ type LLMConfig struct { YarnBetaSlow float32 `yaml:"yarn_beta_slow"` } +// AutoGPTQ is a struct that holds the configuration specific to the AutoGPTQ backend type AutoGPTQ struct { ModelBaseName string `yaml:"model_base_name"` Device string `yaml:"device"` @@ -151,13 +154,31 @@ type AutoGPTQ struct { UseFastTokenizer bool `yaml:"use_fast_tokenizer"` } +// TemplateConfig is a struct that holds the configuration of the templating system type TemplateConfig struct { - Chat string `yaml:"chat"` - ChatMessage string `yaml:"chat_message"` - Completion string `yaml:"completion"` - Edit string `yaml:"edit"` - Functions string `yaml:"function"` - UseTokenizerTemplate bool `yaml:"use_tokenizer_template"` + // Chat is the template used in the chat completion endpoint + Chat string `yaml:"chat"` + + // ChatMessage is the template used for chat messages + ChatMessage string `yaml:"chat_message"` + + // Completion is the template used for completion requests + Completion string `yaml:"completion"` + + // Edit is the template used for edit completion requests + Edit string `yaml:"edit"` + + // Functions is the template used when tools are present in the client requests + Functions string `yaml:"function"` + + // UseTokenizerTemplate is a flag that indicates if the tokenizer template should be used. + // Note: this is mostly consumed for backends such as vllm and transformers + // that can use the tokenizers specified in the JSON config files of the models + UseTokenizerTemplate bool `yaml:"use_tokenizer_template"` + + // JoinChatMessagesByCharacter is a string that will be used to join chat messages together. + // It defaults to \n + JoinChatMessagesByCharacter *string `yaml:"join_chat_messages_by_character"` } func (c *BackendConfig) SetFunctionCallString(s string) { diff --git a/core/http/endpoints/openai/chat.go b/core/http/endpoints/openai/chat.go index ccbf0946..c49ef263 100644 --- a/core/http/endpoints/openai/chat.go +++ b/core/http/endpoints/openai/chat.go @@ -349,7 +349,12 @@ func ChatEndpoint(cl *config.BackendConfigLoader, ml *model.ModelLoader, startup mess = append(mess, content) } - predInput = strings.Join(mess, "\n") + joinCharacter := "\n" + if config.TemplateConfig.JoinChatMessagesByCharacter != nil { + joinCharacter = *config.TemplateConfig.JoinChatMessagesByCharacter + } + + predInput = strings.Join(mess, joinCharacter) log.Debug().Msgf("Prompt (before templating): %s", predInput) templateFile := "" diff --git a/pkg/assets/extract.go b/pkg/assets/extract.go index b795cb30..8f668a1a 100644 --- a/pkg/assets/extract.go +++ b/pkg/assets/extract.go @@ -8,6 +8,10 @@ import ( "path/filepath" ) +func ResolvePath(dir string, paths ...string) string { + return filepath.Join(append([]string{dir, "backend-assets"}, paths...)...) +} + func ExtractFiles(content embed.FS, extractDir string) error { // Create the target directory if it doesn't exist err := os.MkdirAll(extractDir, 0750) @@ -39,7 +43,7 @@ func ExtractFiles(content embed.FS, extractDir string) error { } // Create the file in the target directory - err = os.WriteFile(targetFile, fileData, 0600) + err = os.WriteFile(targetFile, fileData, 0700) if err != nil { return fmt.Errorf("failed to write file: %v", err) } diff --git a/pkg/model/initializers.go b/pkg/model/initializers.go index 115a12a0..d013740c 100644 --- a/pkg/model/initializers.go +++ b/pkg/model/initializers.go @@ -12,9 +12,9 @@ import ( grpc "github.com/go-skynet/LocalAI/pkg/grpc" "github.com/go-skynet/LocalAI/pkg/xsysinfo" + "github.com/klauspost/cpuid/v2" "github.com/phayes/freeport" "github.com/rs/zerolog/log" - "golang.org/x/sys/cpu" "github.com/elliotchance/orderedmap/v2" ) @@ -26,16 +26,18 @@ var Aliases map[string]string = map[string]string{ "langchain-huggingface": LCHuggingFaceBackend, } +var autoDetect = os.Getenv("DISABLE_AUTODETECT") != "true" + const ( LlamaGGML = "llama-ggml" - LLamaCPP = "llama-cpp" + LLamaCPP = "llama-cpp" - LLamaCPPCUDA12 = "llama-cpp-cuda12" LLamaCPPAVX2 = "llama-cpp-avx2" LLamaCPPAVX = "llama-cpp-avx" LLamaCPPFallback = "llama-cpp-fallback" LLamaCPPCUDA = "llama-cpp-cuda" + LLamaCPPGRPC = "llama-cpp-grpc" Gpt4AllLlamaBackend = "gpt4all-llama" Gpt4AllMptBackend = "gpt4all-mpt" @@ -59,7 +61,7 @@ func backendPath(assetDir, backend string) string { // backendsInAssetDir returns the list of backends in the asset directory // that should be loaded -func backendsInAssetDir(assetDir string) (*orderedmap.OrderedMap[string, any], error) { +func backendsInAssetDir(assetDir string) ([]string, error) { // Exclude backends from automatic loading excludeBackends := []string{LocalStoreBackend} entry, err := os.ReadDir(backendPath(assetDir, "")) @@ -74,27 +76,46 @@ ENTRY: continue ENTRY } } - if !e.IsDir() { - if !strings.Contains(e.Name(), LLamaCPP) || strings.Contains(e.Name(), LLamaCPPFallback) { - backends[e.Name()] = []string{} - } + if e.IsDir() { + continue } + + // Skip the llama.cpp variants if we are autoDetecting + // But we always load the fallback variant if it exists + if strings.Contains(e.Name(), LLamaCPP) && !strings.Contains(e.Name(), LLamaCPPFallback) && autoDetect { + continue + } + + backends[e.Name()] = []string{} } - foundLCPPAVX, foundLCPPAVX2, foundLCPPFallback := false, false, false - if _, ok := backends[LLamaCPP]; !ok { - for _, e := range entry { - if strings.Contains(e.Name(), LLamaCPPAVX2) && !foundLCPPAVX2 { - backends[LLamaCPP] = append(backends[LLamaCPP], LLamaCPPAVX2) - foundLCPPAVX2 = true - } - if strings.Contains(e.Name(), LLamaCPPAVX) && !foundLCPPAVX { - backends[LLamaCPP] = append(backends[LLamaCPP], LLamaCPPAVX) - foundLCPPAVX = true - } - if strings.Contains(e.Name(), LLamaCPPFallback) && !foundLCPPFallback { - backends[LLamaCPP] = append(backends[LLamaCPP], LLamaCPPFallback) - foundLCPPFallback = true + // if we are autoDetecting, we want to show the llama.cpp variants as a single backend + if autoDetect { + // if we find the llama.cpp variants, show them of as a single backend (llama-cpp) as later we are going to pick that up + // when starting the service + foundLCPPAVX, foundLCPPAVX2, foundLCPPFallback, foundLCPPGRPC, foundLCPPCuda := false, false, false, false, false + if _, ok := backends[LLamaCPP]; !ok { + for _, e := range entry { + if strings.Contains(e.Name(), LLamaCPPAVX2) && !foundLCPPAVX2 { + backends[LLamaCPP] = append(backends[LLamaCPP], LLamaCPPAVX2) + foundLCPPAVX2 = true + } + if strings.Contains(e.Name(), LLamaCPPAVX) && !foundLCPPAVX { + backends[LLamaCPP] = append(backends[LLamaCPP], LLamaCPPAVX) + foundLCPPAVX = true + } + if strings.Contains(e.Name(), LLamaCPPFallback) && !foundLCPPFallback { + backends[LLamaCPP] = append(backends[LLamaCPP], LLamaCPPFallback) + foundLCPPFallback = true + } + if strings.Contains(e.Name(), LLamaCPPGRPC) && !foundLCPPGRPC { + backends[LLamaCPP] = append(backends[LLamaCPP], LLamaCPPGRPC) + foundLCPPGRPC = true + } + if strings.Contains(e.Name(), LLamaCPPCUDA) && !foundLCPPCuda { + backends[LLamaCPP] = append(backends[LLamaCPP], LLamaCPPCUDA) + foundLCPPCuda = true + } } } } @@ -102,10 +123,13 @@ ENTRY: // order backends from the asset directory. // as we scan for backends, we want to keep some order which backends are tried of. // for example, llama.cpp should be tried first, and we want to keep the huggingface backend at the last. - // sets a priority list - // First has more priority + + // sets a priority list - first has more priority priorityList := []string{ - // First llama.cpp and llama-ggml + + // First llama.cpp(variants) and llama-ggml to follow. + // We keep the fallback to prevent that if the llama.cpp variants + // that depends on shared libs if breaks have still a safety net. LLamaCPP, LlamaGGML, Gpt4All, LLamaCPPFallback, } @@ -139,7 +163,57 @@ ENTRY: } } - return orderedBackends, nil + return orderedBackends.Keys(), nil +} + +// selectGRPCProcess selects the GRPC process to start based on system capabilities +func selectGRPCProcess(backend, assetDir string) string { + foundCUDA := false + var grpcProcess string + + // Select backend now just for llama.cpp + if backend != LLamaCPP { + return "" + } + + // Note: This environment variable is read by the LocalAI's llama.cpp grpc-server + if os.Getenv("LLAMACPP_GRPC_SERVERS") != "" { + log.Info().Msgf("[%s] attempting to load with GRPC variant", LLamaCPPGRPC) + return backendPath(assetDir, LLamaCPPGRPC) + } + + gpus, err := xsysinfo.GPUs() + if err == nil { + for _, gpu := range gpus { + if strings.Contains(gpu.String(), "nvidia") { + p := backendPath(assetDir, LLamaCPPCUDA) + if _, err := os.Stat(p); err == nil { + log.Info().Msgf("[%s] attempting to load with CUDA variant", backend) + grpcProcess = p + foundCUDA = true + } else { + log.Info().Msgf("GPU device found but no CUDA backend present") + } + } + } + } + + if foundCUDA { + return grpcProcess + } + + if xsysinfo.HasCPUCaps(cpuid.AVX2) { + log.Info().Msgf("[%s] attempting to load with AVX2 variant", backend) + grpcProcess = backendPath(assetDir, LLamaCPPAVX2) + } else if xsysinfo.HasCPUCaps(cpuid.AVX) { + log.Info().Msgf("[%s] attempting to load with AVX variant", backend) + grpcProcess = backendPath(assetDir, LLamaCPPAVX) + } else { + log.Info().Msgf("[%s] attempting to load with fallback variant", backend) + grpcProcess = backendPath(assetDir, LLamaCPPFallback) + } + + return grpcProcess } // starts the grpcModelProcess for the backend, and returns a grpc client @@ -192,33 +266,10 @@ func (ml *ModelLoader) grpcModel(backend string, o *Options) func(string, string } else { grpcProcess := backendPath(o.assetDir, backend) - foundCUDA := false - // for llama-cpp, check CPU capabilities and load the appropriate variant - if backend == LLamaCPP { - gpus, err := xsysinfo.GPUs() - if err == nil { - for _, gpu := range gpus { - if strings.Contains(gpu.String(), "nvidia") { - log.Info().Msgf("[%s] attempting to load with CUDA variant", backend) - grpcProcess = backendPath(o.assetDir, LLamaCPPCUDA) - if _, err := os.Stat(grpcProcess); err == nil { - foundCUDA = true - } - } - } - } - - if !foundCUDA { - if cpu.X86.HasAVX2 { - log.Info().Msgf("[%s] attempting to load with AVX2 variant", backend) - grpcProcess = backendPath(o.assetDir, LLamaCPPAVX2) - } else if cpu.X86.HasAVX { - log.Info().Msgf("[%s] attempting to load with AVX variant", backend) - grpcProcess = backendPath(o.assetDir, LLamaCPPAVX) - } else { - log.Info().Msgf("[%s] attempting to load with fallback variant", backend) - grpcProcess = backendPath(o.assetDir, LLamaCPPFallback) - } + if autoDetect { + // autoDetect GRPC process to start based on system capabilities + if selectedProcess := selectGRPCProcess(backend, o.assetDir); selectedProcess != "" { + grpcProcess = selectedProcess } } @@ -363,28 +414,24 @@ func (ml *ModelLoader) GreedyLoader(opts ...Option) (grpc.Backend, error) { var err error - // autoload also external backends - allBackendsToAutoLoad := orderedmap.NewOrderedMap[string, any]() + // get backends embedded in the binary autoLoadBackends, err := backendsInAssetDir(o.assetDir) if err != nil { return nil, err } + + // append externalBackends supplied by the user via the CLI + for _, b := range o.externalBackends { + autoLoadBackends = append(autoLoadBackends, b) + } + log.Debug().Msgf("Loading from the following backends (in order): %+v", autoLoadBackends) - for _, k := range autoLoadBackends.Keys() { - v, _ := autoLoadBackends.Get(k) - allBackendsToAutoLoad.Set(k, v) - } - - for _, b := range o.externalBackends { - allBackendsToAutoLoad.Set(b, []string{}) - } - if o.model != "" { - log.Info().Msgf("Trying to load the model '%s' with the backend '%s'", o.model, allBackendsToAutoLoad.Keys()) + log.Info().Msgf("Trying to load the model '%s' with the backend '%s'", o.model, autoLoadBackends) } - for _, key := range allBackendsToAutoLoad.Keys() { + for _, key := range autoLoadBackends { log.Info().Msgf("[%s] Attempting to load", key) options := []Option{ WithBackendString(key),