From acd03d15f211c517f176f893c10de74f2b61eda5 Mon Sep 17 00:00:00 2001
From: Ettore Di Giacinto <mudler@users.noreply.github.com>
Date: Tue, 16 May 2023 16:26:25 +0200
Subject: [PATCH] feat: add support for cublas/openblas in the llama.cpp
 backend (#258)

---
 Makefile          | 27 +++++++++++++--------------
 api/config.go     | 34 +++++++++++++++++-----------------
 api/prediction.go |  4 ++++
 3 files changed, 34 insertions(+), 31 deletions(-)

diff --git a/Makefile b/Makefile
index 683b5d32..f296a836 100644
--- a/Makefile
+++ b/Makefile
@@ -3,7 +3,7 @@ GOTEST=$(GOCMD) test
 GOVET=$(GOCMD) vet
 BINARY_NAME=local-ai
 
-GOLLAMA_VERSION?=eb99b5438787cbd687682da445e879e02bfeaa07
+GOLLAMA_VERSION?=7f9ae4246088f0c08ed322acbae21d69cd2eb547
 GPT4ALL_REPO?=https://github.com/go-skynet/gpt4all
 GPT4ALL_VERSION?=a330bfe26e9e35ca402e16df18973a3b162fb4db
 GOGPT2_VERSION?=92421a8cf61ed6e03babd9067af292b094cb1307
@@ -12,7 +12,9 @@ RWKV_VERSION?=07166da10cb2a9e8854395a4f210464dcea76e47
 WHISPER_CPP_VERSION?=a5defbc1b98bea0f070331ce1e8b62d947b0443d
 BERT_VERSION?=33118e0da50318101408986b86a331daeb4a6658
 BLOOMZ_VERSION?=e9366e82abdfe70565644fbfae9651976714efd1
-
+BUILD_TYPE?=
+CGO_LDFLAGS?=
+CUDA_LIBPATH?=/usr/local/cuda/lib64/
 
 GREEN  := $(shell tput -Txterm setaf 2)
 YELLOW := $(shell tput -Txterm setaf 3)
@@ -23,15 +25,12 @@ RESET  := $(shell tput -Txterm sgr0)
 C_INCLUDE_PATH=$(shell pwd)/go-llama:$(shell pwd)/gpt4all/gpt4all-bindings/golang/:$(shell pwd)/go-gpt2:$(shell pwd)/go-rwkv:$(shell pwd)/whisper.cpp:$(shell pwd)/go-bert:$(shell pwd)/bloomz
 LIBRARY_PATH=$(shell pwd)/go-llama:$(shell pwd)/gpt4all/gpt4all-bindings/golang/:$(shell pwd)/go-gpt2:$(shell pwd)/go-rwkv:$(shell pwd)/whisper.cpp:$(shell pwd)/go-bert:$(shell pwd)/bloomz
 
-# Use this if you want to set the default behavior
-ifndef BUILD_TYPE
-	BUILD_TYPE:=default
+ifeq ($(BUILD_TYPE),openblas)
+	CGO_LDFLAGS+=-lopenblas
 endif
 
-ifeq ($(BUILD_TYPE), "generic")
-	GENERIC_PREFIX:=generic-
-else
-	GENERIC_PREFIX:=
+ifeq ($(BUILD_TYPE),cublas)
+	CGO_LDFLAGS+=-lcublas -lcudart -L$(CUDA_LIBPATH)
 endif
 
 .PHONY: all test build vendor
@@ -94,7 +93,7 @@ go-bert/libgobert.a: go-bert
 	$(MAKE) -C go-bert libgobert.a
 
 gpt4all/gpt4all-bindings/golang/libgpt4all.a: gpt4all
-	$(MAKE) -C gpt4all/gpt4all-bindings/golang/ $(GENERIC_PREFIX)libgpt4all.a
+	$(MAKE) -C gpt4all/gpt4all-bindings/golang/ libgpt4all.a
 
 ## CEREBRAS GPT
 go-gpt2: 
@@ -113,7 +112,7 @@ go-gpt2:
 	@find ./go-gpt2 -type f -name "*.cpp" -exec sed -i'' -e 's/json_/json_gpt2_/g' {} +
 
 go-gpt2/libgpt2.a: go-gpt2
-	$(MAKE) -C go-gpt2 $(GENERIC_PREFIX)libgpt2.a
+	$(MAKE) -C go-gpt2 libgpt2.a
 
 whisper.cpp:
 	git clone https://github.com/ggerganov/whisper.cpp.git
@@ -130,7 +129,7 @@ go-llama:
 	cd go-llama && git checkout -b build $(GOLLAMA_VERSION) && git submodule update --init --recursive --depth 1
 
 go-llama/libbinding.a: go-llama 
-	$(MAKE) -C go-llama $(GENERIC_PREFIX)libbinding.a
+	$(MAKE) -C go-llama BUILD_TYPE=$(BUILD_TYPE) libbinding.a
 
 replace:
 	$(GOCMD) mod edit -replace github.com/go-skynet/go-llama.cpp=$(shell pwd)/go-llama
@@ -171,14 +170,14 @@ clean: ## Remove build related file
 build: prepare ## Build the project
 	$(info ${GREEN}I local-ai build info:${RESET})
 	$(info ${GREEN}I BUILD_TYPE: ${YELLOW}$(BUILD_TYPE)${RESET})
-	C_INCLUDE_PATH=${C_INCLUDE_PATH} LIBRARY_PATH=${LIBRARY_PATH} $(GOCMD) build -x -o $(BINARY_NAME) ./
+	CGO_LDFLAGS="$(CGO_LDFLAGS)" C_INCLUDE_PATH=${C_INCLUDE_PATH} LIBRARY_PATH=${LIBRARY_PATH} $(GOCMD) build -x -o $(BINARY_NAME) ./
 
 generic-build: ## Build the project using generic
 	BUILD_TYPE="generic" $(MAKE) build
 
 ## Run
 run: prepare ## run local-ai
-	C_INCLUDE_PATH=${C_INCLUDE_PATH} LIBRARY_PATH=${LIBRARY_PATH} $(GOCMD) run ./main.go
+	CGO_LDFLAGS="$(CGO_LDFLAGS)" C_INCLUDE_PATH=${C_INCLUDE_PATH} LIBRARY_PATH=${LIBRARY_PATH} $(GOCMD) run ./main.go
 
 test-models/testmodel:
 	mkdir test-models
diff --git a/api/config.go b/api/config.go
index 43051286..3791d490 100644
--- a/api/config.go
+++ b/api/config.go
@@ -15,23 +15,23 @@ import (
 )
 
 type Config struct {
-	OpenAIRequest  `yaml:"parameters"`
-	Name           string            `yaml:"name"`
-	StopWords      []string          `yaml:"stopwords"`
-	Cutstrings     []string          `yaml:"cutstrings"`
-	TrimSpace      []string          `yaml:"trimspace"`
-	ContextSize    int               `yaml:"context_size"`
-	F16            bool              `yaml:"f16"`
-	Threads        int               `yaml:"threads"`
-	Debug          bool              `yaml:"debug"`
-	Roles          map[string]string `yaml:"roles"`
-	Embeddings     bool              `yaml:"embeddings"`
-	Backend        string            `yaml:"backend"`
-	TemplateConfig TemplateConfig    `yaml:"template"`
-	MirostatETA    float64           `yaml:"mirostat_eta"`
-	MirostatTAU    float64           `yaml:"mirostat_tau"`
-	Mirostat       int               `yaml:"mirostat"`
-
+	OpenAIRequest               `yaml:"parameters"`
+	Name                        string            `yaml:"name"`
+	StopWords                   []string          `yaml:"stopwords"`
+	Cutstrings                  []string          `yaml:"cutstrings"`
+	TrimSpace                   []string          `yaml:"trimspace"`
+	ContextSize                 int               `yaml:"context_size"`
+	F16                         bool              `yaml:"f16"`
+	Threads                     int               `yaml:"threads"`
+	Debug                       bool              `yaml:"debug"`
+	Roles                       map[string]string `yaml:"roles"`
+	Embeddings                  bool              `yaml:"embeddings"`
+	Backend                     string            `yaml:"backend"`
+	TemplateConfig              TemplateConfig    `yaml:"template"`
+	MirostatETA                 float64           `yaml:"mirostat_eta"`
+	MirostatTAU                 float64           `yaml:"mirostat_tau"`
+	Mirostat                    int               `yaml:"mirostat"`
+	NGPULayers                  int               `yaml:"gpu_layers"`
 	PromptStrings, InputStrings []string
 	InputToken                  [][]int
 }
diff --git a/api/prediction.go b/api/prediction.go
index 3dfb45fd..7aa839bb 100644
--- a/api/prediction.go
+++ b/api/prediction.go
@@ -31,6 +31,10 @@ func defaultLLamaOpts(c Config) []llama.ModelOption {
 		llamaOpts = append(llamaOpts, llama.EnableEmbeddings)
 	}
 
+	if c.NGPULayers != 0 {
+		llamaOpts = append(llamaOpts, llama.SetGPULayers(c.NGPULayers))
+	}
+
 	return llamaOpts
 }