fix(llama.cpp): set better defaults for llama.cpp (#1961)

fix(defaults): set better defaults for llama.cpp

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
This commit is contained in:
Ettore Di Giacinto 2024-04-06 22:56:45 +02:00 committed by GitHub
parent 8aa5f5a660
commit 8342553214
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
4 changed files with 23 additions and 12 deletions

View File

@ -144,7 +144,7 @@ func gRPCPredictOpts(c config.BackendConfig, modelPath string) *pb.PredictOption
MMap: *c.MMap, MMap: *c.MMap,
MainGPU: c.MainGPU, MainGPU: c.MainGPU,
TensorSplit: c.TensorSplit, TensorSplit: c.TensorSplit,
TailFreeSamplingZ: float32(c.TFZ), TailFreeSamplingZ: float32(*c.TFZ),
TypicalP: float32(c.TypicalP), TypicalP: float32(*c.TypicalP),
} }
} }

View File

@ -205,13 +205,16 @@ func (cfg *BackendConfig) SetDefaults(opts ...ConfigLoaderOption) {
threads := lo.threads threads := lo.threads
f16 := lo.f16 f16 := lo.f16
debug := lo.debug debug := lo.debug
defaultTopP := 0.7 // https://github.com/ggerganov/llama.cpp/blob/75cd4c77292034ecec587ecb401366f57338f7c0/common/sampling.h#L22
defaultTopK := 80 defaultTopP := 0.95
defaultTopK := 40
defaultTemp := 0.9 defaultTemp := 0.9
defaultMaxTokens := 2048 defaultMaxTokens := 2048
defaultMirostat := 2 defaultMirostat := 2
defaultMirostatTAU := 5.0 defaultMirostatTAU := 5.0
defaultMirostatETA := 0.1 defaultMirostatETA := 0.1
defaultTypicalP := 1.0
defaultTFZ := 1.0
// Try to offload all GPU layers (if GPU is found) // Try to offload all GPU layers (if GPU is found)
defaultNGPULayers := 99999999 defaultNGPULayers := 99999999
@ -229,6 +232,14 @@ func (cfg *BackendConfig) SetDefaults(opts ...ConfigLoaderOption) {
cfg.TopK = &defaultTopK cfg.TopK = &defaultTopK
} }
if cfg.TypicalP == nil {
cfg.TypicalP = &defaultTypicalP
}
if cfg.TFZ == nil {
cfg.TFZ = &defaultTFZ
}
if cfg.MMap == nil { if cfg.MMap == nil {
// MMap is enabled by default // MMap is enabled by default
cfg.MMap = &trueV cfg.MMap = &trueV

View File

@ -216,7 +216,7 @@ func updateRequestConfig(config *config.BackendConfig, input *schema.OpenAIReque
config.Seed = input.Seed config.Seed = input.Seed
} }
if input.TypicalP != 0 { if input.TypicalP != nil {
config.TypicalP = input.TypicalP config.TypicalP = input.TypicalP
} }

View File

@ -26,9 +26,9 @@ type PredictionOptions struct {
FrequencyPenalty float64 `json:"frequency_penalty" yaml:"frequency_penalty"` FrequencyPenalty float64 `json:"frequency_penalty" yaml:"frequency_penalty"`
PresencePenalty float64 `json:"presence_penalty" yaml:"presence_penalty"` PresencePenalty float64 `json:"presence_penalty" yaml:"presence_penalty"`
TFZ float64 `json:"tfz" yaml:"tfz"` TFZ *float64 `json:"tfz" yaml:"tfz"`
TypicalP float64 `json:"typical_p" yaml:"typical_p"` TypicalP *float64 `json:"typical_p" yaml:"typical_p"`
Seed *int `json:"seed" yaml:"seed"` Seed *int `json:"seed" yaml:"seed"`
NegativePrompt string `json:"negative_prompt" yaml:"negative_prompt"` NegativePrompt string `json:"negative_prompt" yaml:"negative_prompt"`