mirror of
https://github.com/mudler/LocalAI.git
synced 2025-01-18 02:40:01 +00:00
fix(llama.cpp): set better defaults for llama.cpp (#1961)
fix(defaults): set better defaults for llama.cpp Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
This commit is contained in:
parent
8aa5f5a660
commit
8342553214
@ -144,7 +144,7 @@ func gRPCPredictOpts(c config.BackendConfig, modelPath string) *pb.PredictOption
|
|||||||
MMap: *c.MMap,
|
MMap: *c.MMap,
|
||||||
MainGPU: c.MainGPU,
|
MainGPU: c.MainGPU,
|
||||||
TensorSplit: c.TensorSplit,
|
TensorSplit: c.TensorSplit,
|
||||||
TailFreeSamplingZ: float32(c.TFZ),
|
TailFreeSamplingZ: float32(*c.TFZ),
|
||||||
TypicalP: float32(c.TypicalP),
|
TypicalP: float32(*c.TypicalP),
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -205,13 +205,16 @@ func (cfg *BackendConfig) SetDefaults(opts ...ConfigLoaderOption) {
|
|||||||
threads := lo.threads
|
threads := lo.threads
|
||||||
f16 := lo.f16
|
f16 := lo.f16
|
||||||
debug := lo.debug
|
debug := lo.debug
|
||||||
defaultTopP := 0.7
|
// https://github.com/ggerganov/llama.cpp/blob/75cd4c77292034ecec587ecb401366f57338f7c0/common/sampling.h#L22
|
||||||
defaultTopK := 80
|
defaultTopP := 0.95
|
||||||
|
defaultTopK := 40
|
||||||
defaultTemp := 0.9
|
defaultTemp := 0.9
|
||||||
defaultMaxTokens := 2048
|
defaultMaxTokens := 2048
|
||||||
defaultMirostat := 2
|
defaultMirostat := 2
|
||||||
defaultMirostatTAU := 5.0
|
defaultMirostatTAU := 5.0
|
||||||
defaultMirostatETA := 0.1
|
defaultMirostatETA := 0.1
|
||||||
|
defaultTypicalP := 1.0
|
||||||
|
defaultTFZ := 1.0
|
||||||
|
|
||||||
// Try to offload all GPU layers (if GPU is found)
|
// Try to offload all GPU layers (if GPU is found)
|
||||||
defaultNGPULayers := 99999999
|
defaultNGPULayers := 99999999
|
||||||
@ -229,6 +232,14 @@ func (cfg *BackendConfig) SetDefaults(opts ...ConfigLoaderOption) {
|
|||||||
cfg.TopK = &defaultTopK
|
cfg.TopK = &defaultTopK
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if cfg.TypicalP == nil {
|
||||||
|
cfg.TypicalP = &defaultTypicalP
|
||||||
|
}
|
||||||
|
|
||||||
|
if cfg.TFZ == nil {
|
||||||
|
cfg.TFZ = &defaultTFZ
|
||||||
|
}
|
||||||
|
|
||||||
if cfg.MMap == nil {
|
if cfg.MMap == nil {
|
||||||
// MMap is enabled by default
|
// MMap is enabled by default
|
||||||
cfg.MMap = &trueV
|
cfg.MMap = &trueV
|
||||||
|
@ -216,7 +216,7 @@ func updateRequestConfig(config *config.BackendConfig, input *schema.OpenAIReque
|
|||||||
config.Seed = input.Seed
|
config.Seed = input.Seed
|
||||||
}
|
}
|
||||||
|
|
||||||
if input.TypicalP != 0 {
|
if input.TypicalP != nil {
|
||||||
config.TypicalP = input.TypicalP
|
config.TypicalP = input.TypicalP
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -26,9 +26,9 @@ type PredictionOptions struct {
|
|||||||
|
|
||||||
FrequencyPenalty float64 `json:"frequency_penalty" yaml:"frequency_penalty"`
|
FrequencyPenalty float64 `json:"frequency_penalty" yaml:"frequency_penalty"`
|
||||||
PresencePenalty float64 `json:"presence_penalty" yaml:"presence_penalty"`
|
PresencePenalty float64 `json:"presence_penalty" yaml:"presence_penalty"`
|
||||||
TFZ float64 `json:"tfz" yaml:"tfz"`
|
TFZ *float64 `json:"tfz" yaml:"tfz"`
|
||||||
|
|
||||||
TypicalP float64 `json:"typical_p" yaml:"typical_p"`
|
TypicalP *float64 `json:"typical_p" yaml:"typical_p"`
|
||||||
Seed *int `json:"seed" yaml:"seed"`
|
Seed *int `json:"seed" yaml:"seed"`
|
||||||
|
|
||||||
NegativePrompt string `json:"negative_prompt" yaml:"negative_prompt"`
|
NegativePrompt string `json:"negative_prompt" yaml:"negative_prompt"`
|
||||||
|
Loading…
Reference in New Issue
Block a user