From a8bfb6f9c2dfb36b4fa88e6e435ffddcbd1ddfaf Mon Sep 17 00:00:00 2001
From: Ettore Di Giacinto <mudler@users.noreply.github.com>
Date: Wed, 26 Jun 2024 14:58:50 +0200
Subject: [PATCH] feat(options): add `repeat_last_n` (#2660)

feat(options): add repeat_last_n

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
---
 backend/cpp/llama/grpc-server.cpp | 2 ++
 core/backend/options.go           | 6 ++++--
 core/schema/prediction.go         | 5 ++++-
 3 files changed, 10 insertions(+), 3 deletions(-)

diff --git a/backend/cpp/llama/grpc-server.cpp b/backend/cpp/llama/grpc-server.cpp
index ccf083c2..aa9a9497 100644
--- a/backend/cpp/llama/grpc-server.cpp
+++ b/backend/cpp/llama/grpc-server.cpp
@@ -886,6 +886,8 @@ struct llama_server_context
             {"task_id", slot->task_id},
         });
 
+        LOG_TEE("sampling: \n%s\n", llama_sampling_print(slot->sparams).c_str());
+
         return true;
     }
 
diff --git a/core/backend/options.go b/core/backend/options.go
index 96b176c9..e6ce87eb 100644
--- a/core/backend/options.go
+++ b/core/backend/options.go
@@ -142,12 +142,14 @@ func gRPCPredictOpts(c config.BackendConfig, modelPath string) *pb.PredictOption
 		MirostatTAU:         float32(*c.LLMConfig.MirostatTAU),
 		Debug:               *c.Debug,
 		StopPrompts:         c.StopWords,
-		Repeat:              int32(c.RepeatPenalty),
+		Repeat:              int32(c.RepeatLastN),
+		FrequencyPenalty:    float32(c.FrequencyPenalty),
+		PresencePenalty:     float32(c.PresencePenalty),
+		Penalty:             float32(c.RepeatPenalty),
 		NKeep:               int32(c.Keep),
 		Batch:               int32(c.Batch),
 		IgnoreEOS:           c.IgnoreEOS,
 		Seed:                getSeed(c),
-		FrequencyPenalty:    float32(c.FrequencyPenalty),
 		MLock:               *c.MMlock,
 		MMap:                *c.MMap,
 		MainGPU:             c.MainGPU,
diff --git a/core/schema/prediction.go b/core/schema/prediction.go
index 8ad56928..18d2782b 100644
--- a/core/schema/prediction.go
+++ b/core/schema/prediction.go
@@ -25,7 +25,10 @@ type PredictionOptions struct {
 	Batch         int     `json:"batch" yaml:"batch"`
 	IgnoreEOS     bool    `json:"ignore_eos" yaml:"ignore_eos"`
 	RepeatPenalty float64 `json:"repeat_penalty" yaml:"repeat_penalty"`
-	Keep          int     `json:"n_keep" yaml:"n_keep"`
+
+	RepeatLastN int `json:"repeat_last_n" yaml:"repeat_last_n"`
+
+	Keep int `json:"n_keep" yaml:"n_keep"`
 
 	FrequencyPenalty float64  `json:"frequency_penalty" yaml:"frequency_penalty"`
 	PresencePenalty  float64  `json:"presence_penalty" yaml:"presence_penalty"`