diff --git a/core/backend/options.go b/core/backend/options.go index 143a9332..5b303b05 100644 --- a/core/backend/options.go +++ b/core/backend/options.go @@ -144,7 +144,7 @@ func gRPCPredictOpts(c config.BackendConfig, modelPath string) *pb.PredictOption MMap: *c.MMap, MainGPU: c.MainGPU, TensorSplit: c.TensorSplit, - TailFreeSamplingZ: float32(c.TFZ), - TypicalP: float32(c.TypicalP), + TailFreeSamplingZ: float32(*c.TFZ), + TypicalP: float32(*c.TypicalP), } } diff --git a/core/config/backend_config.go b/core/config/backend_config.go index 25edd343..a90b1c1b 100644 --- a/core/config/backend_config.go +++ b/core/config/backend_config.go @@ -205,13 +205,16 @@ func (cfg *BackendConfig) SetDefaults(opts ...ConfigLoaderOption) { threads := lo.threads f16 := lo.f16 debug := lo.debug - defaultTopP := 0.7 - defaultTopK := 80 + // https://github.com/ggerganov/llama.cpp/blob/75cd4c77292034ecec587ecb401366f57338f7c0/common/sampling.h#L22 + defaultTopP := 0.95 + defaultTopK := 40 defaultTemp := 0.9 defaultMaxTokens := 2048 defaultMirostat := 2 defaultMirostatTAU := 5.0 defaultMirostatETA := 0.1 + defaultTypicalP := 1.0 + defaultTFZ := 1.0 // Try to offload all GPU layers (if GPU is found) defaultNGPULayers := 99999999 @@ -229,6 +232,14 @@ func (cfg *BackendConfig) SetDefaults(opts ...ConfigLoaderOption) { cfg.TopK = &defaultTopK } + if cfg.TypicalP == nil { + cfg.TypicalP = &defaultTypicalP + } + + if cfg.TFZ == nil { + cfg.TFZ = &defaultTFZ + } + if cfg.MMap == nil { // MMap is enabled by default cfg.MMap = &trueV diff --git a/core/http/endpoints/openai/request.go b/core/http/endpoints/openai/request.go index c9981204..369fb0b8 100644 --- a/core/http/endpoints/openai/request.go +++ b/core/http/endpoints/openai/request.go @@ -192,11 +192,11 @@ func updateRequestConfig(config *config.BackendConfig, input *schema.OpenAIReque config.RepeatPenalty = input.RepeatPenalty } - if input.FrequencyPenalty!= 0 { + if input.FrequencyPenalty != 0 { config.FrequencyPenalty = input.FrequencyPenalty } - if input.PresencePenalty!= 0 { + if input.PresencePenalty != 0 { config.PresencePenalty = input.PresencePenalty } @@ -216,7 +216,7 @@ func updateRequestConfig(config *config.BackendConfig, input *schema.OpenAIReque config.Seed = input.Seed } - if input.TypicalP != 0 { + if input.TypicalP != nil { config.TypicalP = input.TypicalP } diff --git a/core/schema/prediction.go b/core/schema/prediction.go index 4933f2d2..7e509167 100644 --- a/core/schema/prediction.go +++ b/core/schema/prediction.go @@ -24,12 +24,12 @@ type PredictionOptions struct { RepeatPenalty float64 `json:"repeat_penalty" yaml:"repeat_penalty"` Keep int `json:"n_keep" yaml:"n_keep"` - FrequencyPenalty float64 `json:"frequency_penalty" yaml:"frequency_penalty"` - PresencePenalty float64 `json:"presence_penalty" yaml:"presence_penalty"` - TFZ float64 `json:"tfz" yaml:"tfz"` + FrequencyPenalty float64 `json:"frequency_penalty" yaml:"frequency_penalty"` + PresencePenalty float64 `json:"presence_penalty" yaml:"presence_penalty"` + TFZ *float64 `json:"tfz" yaml:"tfz"` - TypicalP float64 `json:"typical_p" yaml:"typical_p"` - Seed *int `json:"seed" yaml:"seed"` + TypicalP *float64 `json:"typical_p" yaml:"typical_p"` + Seed *int `json:"seed" yaml:"seed"` NegativePrompt string `json:"negative_prompt" yaml:"negative_prompt"` RopeFreqBase float32 `json:"rope_freq_base" yaml:"rope_freq_base"`