From 180cd4ccda0753ef1afb2eb07857ec0534ea3366 Mon Sep 17 00:00:00 2001
From: Ettore Di Giacinto <mudler@users.noreply.github.com>
Date: Sun, 21 Apr 2024 16:34:00 +0200
Subject: [PATCH] fix(llama.cpp-ggml): fixup `max_tokens` for old backend
 (#2094)

fix(llama.cpp-ggml): set 0 as default for `max_tokens`

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
---
 core/config/backend_config.go | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/core/config/backend_config.go b/core/config/backend_config.go
index 6ca24afa..dfc216dc 100644
--- a/core/config/backend_config.go
+++ b/core/config/backend_config.go
@@ -210,7 +210,7 @@ func (cfg *BackendConfig) SetDefaults(opts ...ConfigLoaderOption) {
 	defaultMirostatETA := 0.1
 	defaultTypicalP := 1.0
 	defaultTFZ := 1.0
-	defaultInfinity := -1
+	defaultZero := 0
 
 	// Try to offload all GPU layers (if GPU is found)
 	defaultHigh := 99999999
@@ -254,7 +254,7 @@ func (cfg *BackendConfig) SetDefaults(opts ...ConfigLoaderOption) {
 	}
 
 	if cfg.Maxtokens == nil {
-		cfg.Maxtokens = &defaultInfinity
+		cfg.Maxtokens = &defaultZero
 	}
 
 	if cfg.Mirostat == nil {