From a8bfb6f9c2dfb36b4fa88e6e435ffddcbd1ddfaf Mon Sep 17 00:00:00 2001 From: Ettore Di Giacinto <mudler@users.noreply.github.com> Date: Wed, 26 Jun 2024 14:58:50 +0200 Subject: [PATCH] feat(options): add `repeat_last_n` (#2660) feat(options): add repeat_last_n Signed-off-by: Ettore Di Giacinto <mudler@localai.io> --- backend/cpp/llama/grpc-server.cpp | 2 ++ core/backend/options.go | 6 ++++-- core/schema/prediction.go | 5 ++++- 3 files changed, 10 insertions(+), 3 deletions(-) diff --git a/backend/cpp/llama/grpc-server.cpp b/backend/cpp/llama/grpc-server.cpp index ccf083c2..aa9a9497 100644 --- a/backend/cpp/llama/grpc-server.cpp +++ b/backend/cpp/llama/grpc-server.cpp @@ -886,6 +886,8 @@ struct llama_server_context {"task_id", slot->task_id}, }); + LOG_TEE("sampling: \n%s\n", llama_sampling_print(slot->sparams).c_str()); + return true; } diff --git a/core/backend/options.go b/core/backend/options.go index 96b176c9..e6ce87eb 100644 --- a/core/backend/options.go +++ b/core/backend/options.go @@ -142,12 +142,14 @@ func gRPCPredictOpts(c config.BackendConfig, modelPath string) *pb.PredictOption MirostatTAU: float32(*c.LLMConfig.MirostatTAU), Debug: *c.Debug, StopPrompts: c.StopWords, - Repeat: int32(c.RepeatPenalty), + Repeat: int32(c.RepeatLastN), + FrequencyPenalty: float32(c.FrequencyPenalty), + PresencePenalty: float32(c.PresencePenalty), + Penalty: float32(c.RepeatPenalty), NKeep: int32(c.Keep), Batch: int32(c.Batch), IgnoreEOS: c.IgnoreEOS, Seed: getSeed(c), - FrequencyPenalty: float32(c.FrequencyPenalty), MLock: *c.MMlock, MMap: *c.MMap, MainGPU: c.MainGPU, diff --git a/core/schema/prediction.go b/core/schema/prediction.go index 8ad56928..18d2782b 100644 --- a/core/schema/prediction.go +++ b/core/schema/prediction.go @@ -25,7 +25,10 @@ type PredictionOptions struct { Batch int `json:"batch" yaml:"batch"` IgnoreEOS bool `json:"ignore_eos" yaml:"ignore_eos"` RepeatPenalty float64 `json:"repeat_penalty" yaml:"repeat_penalty"` - Keep int `json:"n_keep" yaml:"n_keep"` + + RepeatLastN int `json:"repeat_last_n" yaml:"repeat_last_n"` + + Keep int `json:"n_keep" yaml:"n_keep"` FrequencyPenalty float64 `json:"frequency_penalty" yaml:"frequency_penalty"` PresencePenalty float64 `json:"presence_penalty" yaml:"presence_penalty"`