fix(llama.cpp-ggml): fixup max_tokens for old backend (#2094)

fix(llama.cpp-ggml): set 0 as default for `max_tokens` Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2025-06-11 19:51:43 +00:00 · 2024-04-21 16:34:00 +02:00
parent 284ad026b1
commit 180cd4ccda
1 changed files with 2 additions and 2 deletions
--- a/core/config/backend_config.go
+++ b/core/config/backend_config.go
@ -210,7 +210,7 @@ func (cfg *BackendConfig) SetDefaults(opts ...ConfigLoaderOption) {
 	defaultMirostatETA := 0.1
 	defaultTypicalP := 1.0
 	defaultTFZ := 1.0
-	defaultInfinity := -1
+	defaultZero := 0
 	// Try to offload all GPU layers (if GPU is found)
 	defaultHigh := 99999999
@ -254,7 +254,7 @@ func (cfg *BackendConfig) SetDefaults(opts ...ConfigLoaderOption) {
 	}
 	if cfg.Maxtokens == nil {
-		cfg.Maxtokens = &defaultInfinity
+		cfg.Maxtokens = &defaultZero
 	}
 	if cfg.Mirostat == nil {