diff --git a/core/backend/options.go b/core/backend/options.go
index 143a9332..5b303b05 100644
--- a/core/backend/options.go
+++ b/core/backend/options.go
@@ -144,7 +144,7 @@ func gRPCPredictOpts(c config.BackendConfig, modelPath string) *pb.PredictOption
 		MMap:                *c.MMap,
 		MainGPU:             c.MainGPU,
 		TensorSplit:         c.TensorSplit,
-		TailFreeSamplingZ:   float32(c.TFZ),
-		TypicalP:            float32(c.TypicalP),
+		TailFreeSamplingZ:   float32(*c.TFZ),
+		TypicalP:            float32(*c.TypicalP),
 	}
 }
diff --git a/core/config/backend_config.go b/core/config/backend_config.go
index 25edd343..a90b1c1b 100644
--- a/core/config/backend_config.go
+++ b/core/config/backend_config.go
@@ -205,13 +205,16 @@ func (cfg *BackendConfig) SetDefaults(opts ...ConfigLoaderOption) {
 	threads := lo.threads
 	f16 := lo.f16
 	debug := lo.debug
-	defaultTopP := 0.7
-	defaultTopK := 80
+	// https://github.com/ggerganov/llama.cpp/blob/75cd4c77292034ecec587ecb401366f57338f7c0/common/sampling.h#L22
+	defaultTopP := 0.95
+	defaultTopK := 40
 	defaultTemp := 0.9
 	defaultMaxTokens := 2048
 	defaultMirostat := 2
 	defaultMirostatTAU := 5.0
 	defaultMirostatETA := 0.1
+	defaultTypicalP := 1.0
+	defaultTFZ := 1.0
 
 	// Try to offload all GPU layers (if GPU is found)
 	defaultNGPULayers := 99999999
@@ -229,6 +232,14 @@ func (cfg *BackendConfig) SetDefaults(opts ...ConfigLoaderOption) {
 		cfg.TopK = &defaultTopK
 	}
 
+	if cfg.TypicalP == nil {
+		cfg.TypicalP = &defaultTypicalP
+	}
+
+	if cfg.TFZ == nil {
+		cfg.TFZ = &defaultTFZ
+	}
+
 	if cfg.MMap == nil {
 		// MMap is enabled by default
 		cfg.MMap = &trueV
diff --git a/core/http/endpoints/openai/request.go b/core/http/endpoints/openai/request.go
index c9981204..369fb0b8 100644
--- a/core/http/endpoints/openai/request.go
+++ b/core/http/endpoints/openai/request.go
@@ -192,11 +192,11 @@ func updateRequestConfig(config *config.BackendConfig, input *schema.OpenAIReque
 		config.RepeatPenalty = input.RepeatPenalty
 	}
 
-	if input.FrequencyPenalty!= 0 {
+	if input.FrequencyPenalty != 0 {
 		config.FrequencyPenalty = input.FrequencyPenalty
 	}
 
-	if input.PresencePenalty!= 0 {
+	if input.PresencePenalty != 0 {
 		config.PresencePenalty = input.PresencePenalty
 	}
 
@@ -216,7 +216,7 @@ func updateRequestConfig(config *config.BackendConfig, input *schema.OpenAIReque
 		config.Seed = input.Seed
 	}
 
-	if input.TypicalP != 0 {
+	if input.TypicalP != nil {
 		config.TypicalP = input.TypicalP
 	}
 
diff --git a/core/schema/prediction.go b/core/schema/prediction.go
index 4933f2d2..7e509167 100644
--- a/core/schema/prediction.go
+++ b/core/schema/prediction.go
@@ -24,12 +24,12 @@ type PredictionOptions struct {
 	RepeatPenalty float64 `json:"repeat_penalty" yaml:"repeat_penalty"`
 	Keep          int     `json:"n_keep" yaml:"n_keep"`
 
-	FrequencyPenalty float64 `json:"frequency_penalty" yaml:"frequency_penalty"`
-	PresencePenalty  float64 `json:"presence_penalty" yaml:"presence_penalty"`
-	TFZ              float64 `json:"tfz" yaml:"tfz"`
+	FrequencyPenalty float64  `json:"frequency_penalty" yaml:"frequency_penalty"`
+	PresencePenalty  float64  `json:"presence_penalty" yaml:"presence_penalty"`
+	TFZ              *float64 `json:"tfz" yaml:"tfz"`
 
-	TypicalP float64 `json:"typical_p" yaml:"typical_p"`
-	Seed     *int    `json:"seed" yaml:"seed"`
+	TypicalP *float64 `json:"typical_p" yaml:"typical_p"`
+	Seed     *int     `json:"seed" yaml:"seed"`
 
 	NegativePrompt      string  `json:"negative_prompt" yaml:"negative_prompt"`
 	RopeFreqBase        float32 `json:"rope_freq_base" yaml:"rope_freq_base"`