feat: update llama, enable NUMA (#684)

2025-05-27 20:44:21 +00:00 · 2023-06-27 09:00:10 +02:00 · 2023-06-27 09:00:10 +02:00 · 3593cb0c87
commit 3593cb0c87
parent e130b208ab
3 changed files with 6 additions and 1 deletions
--- a/2
+++ b/2
@ -3,7 +3,7 @@ GOTEST=$(GOCMD) test
 GOVET=$(GOCMD) vet
 BINARY_NAME=local-ai

-GOLLAMA_VERSION?=9470597ae75ad8b5f17cfab73805ee4a6685d199
+GOLLAMA_VERSION?=f104111358e8098aea69ce408b85b707528179ef
 GPT4ALL_REPO?=https://github.com/nomic-ai/gpt4all
 GPT4ALL_VERSION?=3417a37c5472fb5111a7bd0ed747c8df749c595e
 GOGGMLTRANSFORMERS_VERSION?=a459d2726792132541152c981ed9fbfe28f4fd20
--- a/api/config.go
+++ b/api/config.go
@ -23,6 +23,7 @@ type Config struct {
 	TrimSpace      []string          `yaml:"trimspace"`
 	ContextSize    int               `yaml:"context_size"`
 	F16            bool              `yaml:"f16"`
+	NUMA           bool              `yaml:"numa"`
 	Threads        int               `yaml:"threads"`
 	Debug          bool              `yaml:"debug"`
 	Roles          map[string]string `yaml:"roles"`
--- a/api/prediction.go
+++ b/api/prediction.go
@ -48,6 +48,10 @@ func defaultLLamaOpts(c Config) []llama.ModelOption {
 		llamaOpts = append(llamaOpts, llama.SetNBatch(512))
 	}

+	if c.NUMA {
+		llamaOpts = append(llamaOpts, llama.EnableNUMA)
+	}
+
 	if c.LowVRAM {
 		llamaOpts = append(llamaOpts, llama.EnabelLowVRAM)
 	}