mirror of
https://github.com/mudler/LocalAI.git
synced 2024-12-18 20:27:57 +00:00
make use of new bindings for gpt4all (#232)
This commit is contained in:
parent
032dee256f
commit
59e3c02002
3
.github/workflows/bump_deps.yaml
vendored
3
.github/workflows/bump_deps.yaml
vendored
@ -30,6 +30,9 @@ jobs:
|
||||
- repository: "go-skynet/bloomz.cpp"
|
||||
variable: "BLOOMZ_VERSION"
|
||||
branch: "main"
|
||||
- repository: "go-skynet/gpt4all"
|
||||
variable: "GPT4ALL_VERSION"
|
||||
branch: "main"
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- uses: actions/checkout@v3
|
||||
|
54
Makefile
54
Makefile
@ -4,7 +4,8 @@ GOVET=$(GOCMD) vet
|
||||
BINARY_NAME=local-ai
|
||||
|
||||
GOLLAMA_VERSION?=c03e8adbc45c866e0f6d876af1887d6b01d57eb4
|
||||
GOGPT4ALLJ_VERSION?=1f7bff57f66cb7062e40d0ac3abd2217815e5109
|
||||
GPT4ALL_REPO?=https://github.com/go-skynet/gpt4all
|
||||
GPT4ALL_VERSION?=3657f9417e17edf378c27d0a9274a1bf41caa914
|
||||
GOGPT2_VERSION?=abf038a7d8efa4eefdc7c891f05ad33d4e59e49d
|
||||
RWKV_REPO?=https://github.com/donomii/go-rwkv.cpp
|
||||
RWKV_VERSION?=07166da10cb2a9e8854395a4f210464dcea76e47
|
||||
@ -19,8 +20,8 @@ WHITE := $(shell tput -Txterm setaf 7)
|
||||
CYAN := $(shell tput -Txterm setaf 6)
|
||||
RESET := $(shell tput -Txterm sgr0)
|
||||
|
||||
C_INCLUDE_PATH=$(shell pwd)/go-llama:$(shell pwd)/go-gpt4all-j:$(shell pwd)/go-gpt2:$(shell pwd)/go-rwkv:$(shell pwd)/whisper.cpp:$(shell pwd)/go-bert:$(shell pwd)/bloomz
|
||||
LIBRARY_PATH=$(shell pwd)/go-llama:$(shell pwd)/go-gpt4all-j:$(shell pwd)/go-gpt2:$(shell pwd)/go-rwkv:$(shell pwd)/whisper.cpp:$(shell pwd)/go-bert:$(shell pwd)/bloomz
|
||||
C_INCLUDE_PATH=$(shell pwd)/go-llama:$(shell pwd)/gpt4all/gpt4all-bindings/golang/:$(shell pwd)/go-gpt2:$(shell pwd)/go-rwkv:$(shell pwd)/whisper.cpp:$(shell pwd)/go-bert:$(shell pwd)/bloomz
|
||||
LIBRARY_PATH=$(shell pwd)/go-llama:$(shell pwd)/gpt4all/gpt4all-bindings/golang/:$(shell pwd)/go-gpt2:$(shell pwd)/go-rwkv:$(shell pwd)/whisper.cpp:$(shell pwd)/go-bert:$(shell pwd)/bloomz
|
||||
|
||||
# Use this if you want to set the default behavior
|
||||
ifndef BUILD_TYPE
|
||||
@ -37,19 +38,26 @@ endif
|
||||
|
||||
all: help
|
||||
|
||||
## GPT4ALL-J
|
||||
go-gpt4all-j:
|
||||
git clone --recurse-submodules https://github.com/go-skynet/go-gpt4all-j.cpp go-gpt4all-j
|
||||
cd go-gpt4all-j && git checkout -b build $(GOGPT4ALLJ_VERSION) && git submodule update --init --recursive --depth 1
|
||||
## GPT4ALL
|
||||
gpt4all:
|
||||
git clone --recurse-submodules $(GPT4ALL_REPO) gpt4all
|
||||
cd gpt4all && git checkout -b build $(GPT4ALL_VERSION) && git submodule update --init --recursive --depth 1
|
||||
# This is hackish, but needed as both go-llama and go-gpt4allj have their own version of ggml..
|
||||
@find ./go-gpt4all-j -type f -name "*.c" -exec sed -i'' -e 's/ggml_/ggml_gptj_/g' {} +
|
||||
@find ./go-gpt4all-j -type f -name "*.cpp" -exec sed -i'' -e 's/ggml_/ggml_gptj_/g' {} +
|
||||
@find ./go-gpt4all-j -type f -name "*.h" -exec sed -i'' -e 's/ggml_/ggml_gptj_/g' {} +
|
||||
@find ./go-gpt4all-j -type f -name "*.cpp" -exec sed -i'' -e 's/gpt_/gptj_/g' {} +
|
||||
@find ./go-gpt4all-j -type f -name "*.h" -exec sed -i'' -e 's/gpt_/gptj_/g' {} +
|
||||
@find ./go-gpt4all-j -type f -name "*.cpp" -exec sed -i'' -e 's/json_/json_gptj_/g' {} +
|
||||
@find ./go-gpt4all-j -type f -name "*.cpp" -exec sed -i'' -e 's/void replace/void json_gptj_replace/g' {} +
|
||||
@find ./go-gpt4all-j -type f -name "*.cpp" -exec sed -i'' -e 's/::replace/::json_gptj_replace/g' {} +
|
||||
@find ./gpt4all -type f -name "*.c" -exec sed -i'' -e 's/ggml_/ggml_gptj_/g' {} +
|
||||
@find ./gpt4all -type f -name "*.cpp" -exec sed -i'' -e 's/ggml_/ggml_gptj_/g' {} +
|
||||
@find ./gpt4all -type f -name "*.h" -exec sed -i'' -e 's/ggml_/ggml_gptj_/g' {} +
|
||||
@find ./gpt4all -type f -name "*.cpp" -exec sed -i'' -e 's/gpt_/gptj_/g' {} +
|
||||
@find ./gpt4all -type f -name "*.h" -exec sed -i'' -e 's/gpt_/gptj_/g' {} +
|
||||
@find ./gpt4all -type f -name "*.h" -exec sed -i'' -e 's/set_console_color/set_gptj_console_color/g' {} +
|
||||
@find ./gpt4all -type f -name "*.cpp" -exec sed -i'' -e 's/set_console_color/set_gptj_console_color/g' {} +
|
||||
@find ./gpt4all -type f -name "*.cpp" -exec sed -i'' -e 's/llama_/gptjllama_/g' {} +
|
||||
@find ./gpt4all -type f -name "*.go" -exec sed -i'' -e 's/llama_/gptjllama_/g' {} +
|
||||
@find ./gpt4all -type f -name "*.h" -exec sed -i'' -e 's/llama_/gptjllama_/g' {} +
|
||||
@find ./gpt4all -type f -name "*.txt" -exec sed -i'' -e 's/llama_/gptjllama_/g' {} +
|
||||
@find ./gpt4all -type f -name "*.cpp" -exec sed -i'' -e 's/json_/json_gptj_/g' {} +
|
||||
@find ./gpt4all -type f -name "*.cpp" -exec sed -i'' -e 's/void replace/void json_gptj_replace/g' {} +
|
||||
@find ./gpt4all -type f -name "*.cpp" -exec sed -i'' -e 's/::replace/::json_gptj_replace/g' {} +
|
||||
mv ./gpt4all/gpt4all-backend/llama.cpp/llama_util.h ./gpt4all/gpt4all-backend/llama.cpp/gptjllama_util.h
|
||||
|
||||
## BERT embeddings
|
||||
go-bert:
|
||||
@ -85,8 +93,8 @@ bloomz/libbloomz.a: bloomz
|
||||
go-bert/libgobert.a: go-bert
|
||||
$(MAKE) -C go-bert libgobert.a
|
||||
|
||||
go-gpt4all-j/libgptj.a: go-gpt4all-j
|
||||
$(MAKE) -C go-gpt4all-j $(GENERIC_PREFIX)libgptj.a
|
||||
gpt4all/gpt4all-bindings/golang/libgpt4all.a: gpt4all
|
||||
$(MAKE) -C gpt4all/gpt4all-bindings/golang/ $(GENERIC_PREFIX)libgpt4all.a
|
||||
|
||||
## CEREBRAS GPT
|
||||
go-gpt2:
|
||||
@ -119,20 +127,20 @@ go-llama/libbinding.a: go-llama
|
||||
|
||||
replace:
|
||||
$(GOCMD) mod edit -replace github.com/go-skynet/go-llama.cpp=$(shell pwd)/go-llama
|
||||
$(GOCMD) mod edit -replace github.com/go-skynet/go-gpt4all-j.cpp=$(shell pwd)/go-gpt4all-j
|
||||
$(GOCMD) mod edit -replace github.com/nomic/gpt4all/gpt4all-bindings/golang=$(shell pwd)/gpt4all/gpt4all-bindings/golang
|
||||
$(GOCMD) mod edit -replace github.com/go-skynet/go-gpt2.cpp=$(shell pwd)/go-gpt2
|
||||
$(GOCMD) mod edit -replace github.com/donomii/go-rwkv.cpp=$(shell pwd)/go-rwkv
|
||||
$(GOCMD) mod edit -replace github.com/ggerganov/whisper.cpp=$(shell pwd)/whisper.cpp
|
||||
$(GOCMD) mod edit -replace github.com/go-skynet/go-bert.cpp=$(shell pwd)/go-bert
|
||||
$(GOCMD) mod edit -replace github.com/go-skynet/bloomz.cpp=$(shell pwd)/bloomz
|
||||
|
||||
prepare-sources: go-llama go-gpt2 go-gpt4all-j go-rwkv whisper.cpp go-bert bloomz
|
||||
prepare-sources: go-llama go-gpt2 gpt4all go-rwkv whisper.cpp go-bert bloomz replace
|
||||
$(GOCMD) mod download
|
||||
|
||||
## GENERIC
|
||||
rebuild: ## Rebuilds the project
|
||||
$(MAKE) -C go-llama clean
|
||||
$(MAKE) -C go-gpt4all-j clean
|
||||
$(MAKE) -C gpt4all/gpt4all-bindings/golang/ clean
|
||||
$(MAKE) -C go-gpt2 clean
|
||||
$(MAKE) -C go-rwkv clean
|
||||
$(MAKE) -C whisper.cpp clean
|
||||
@ -140,11 +148,11 @@ rebuild: ## Rebuilds the project
|
||||
$(MAKE) -C bloomz clean
|
||||
$(MAKE) build
|
||||
|
||||
prepare: prepare-sources go-llama/libbinding.a go-gpt4all-j/libgptj.a go-bert/libgobert.a go-gpt2/libgpt2.a go-rwkv/librwkv.a whisper.cpp/libwhisper.a bloomz/libbloomz.a replace ## Prepares for building
|
||||
prepare: prepare-sources gpt4all/gpt4all-bindings/golang/libgpt4all.a go-llama/libbinding.a go-bert/libgobert.a go-gpt2/libgpt2.a go-rwkv/librwkv.a whisper.cpp/libwhisper.a bloomz/libbloomz.a ## Prepares for building
|
||||
|
||||
clean: ## Remove build related file
|
||||
rm -fr ./go-llama
|
||||
rm -rf ./go-gpt4all-j
|
||||
rm -rf ./gpt4all
|
||||
rm -rf ./go-gpt2
|
||||
rm -rf ./go-rwkv
|
||||
rm -rf ./go-bert
|
||||
@ -156,7 +164,7 @@ clean: ## Remove build related file
|
||||
build: prepare ## Build the project
|
||||
$(info ${GREEN}I local-ai build info:${RESET})
|
||||
$(info ${GREEN}I BUILD_TYPE: ${YELLOW}$(BUILD_TYPE)${RESET})
|
||||
C_INCLUDE_PATH=${C_INCLUDE_PATH} LIBRARY_PATH=${LIBRARY_PATH} $(GOCMD) build -o $(BINARY_NAME) ./
|
||||
C_INCLUDE_PATH=${C_INCLUDE_PATH} LIBRARY_PATH=${LIBRARY_PATH} $(GOCMD) build -x -o $(BINARY_NAME) ./
|
||||
|
||||
generic-build: ## Build the project using generic
|
||||
BUILD_TYPE="generic" $(MAKE) build
|
||||
|
@ -79,7 +79,7 @@ var _ = Describe("API test", func() {
|
||||
It("returns errors", func() {
|
||||
_, err := client.CreateCompletion(context.TODO(), openai.CompletionRequest{Model: "foomodel", Prompt: "abcdedfghikl"})
|
||||
Expect(err).To(HaveOccurred())
|
||||
Expect(err.Error()).To(ContainSubstring("error, status code: 500, message: could not load model - all backends returned error: 9 errors occurred:"))
|
||||
Expect(err.Error()).To(ContainSubstring("error, status code: 500, message: could not load model - all backends returned error: 10 errors occurred:"))
|
||||
})
|
||||
|
||||
})
|
||||
|
@ -11,8 +11,8 @@ import (
|
||||
"github.com/go-skynet/bloomz.cpp"
|
||||
bert "github.com/go-skynet/go-bert.cpp"
|
||||
gpt2 "github.com/go-skynet/go-gpt2.cpp"
|
||||
gptj "github.com/go-skynet/go-gpt4all-j.cpp"
|
||||
llama "github.com/go-skynet/go-llama.cpp"
|
||||
gpt4all "github.com/nomic/gpt4all/gpt4all-bindings/golang"
|
||||
)
|
||||
|
||||
// mutex still needed, see: https://github.com/ggerganov/llama.cpp/discussions/784
|
||||
@ -315,29 +315,35 @@ func ModelInference(s string, loader *model.ModelLoader, c Config, tokenCallback
|
||||
predictOptions...,
|
||||
)
|
||||
}
|
||||
case *gptj.GPTJ:
|
||||
case *gpt4all.Model:
|
||||
supportStreams = true
|
||||
|
||||
fn = func() (string, error) {
|
||||
if tokenCallback != nil {
|
||||
model.SetTokenCallback(tokenCallback)
|
||||
}
|
||||
|
||||
// Generate the prediction using the language model
|
||||
predictOptions := []gptj.PredictOption{
|
||||
gptj.SetTemperature(c.Temperature),
|
||||
gptj.SetTopP(c.TopP),
|
||||
gptj.SetTopK(c.TopK),
|
||||
gptj.SetTokens(c.Maxtokens),
|
||||
gptj.SetThreads(c.Threads),
|
||||
predictOptions := []gpt4all.PredictOption{
|
||||
gpt4all.SetTemperature(c.Temperature),
|
||||
gpt4all.SetTopP(c.TopP),
|
||||
gpt4all.SetTopK(c.TopK),
|
||||
gpt4all.SetTokens(c.Maxtokens),
|
||||
}
|
||||
|
||||
if c.Batch != 0 {
|
||||
predictOptions = append(predictOptions, gptj.SetBatch(c.Batch))
|
||||
predictOptions = append(predictOptions, gpt4all.SetBatch(c.Batch))
|
||||
}
|
||||
|
||||
if c.Seed != 0 {
|
||||
predictOptions = append(predictOptions, gptj.SetSeed(c.Seed))
|
||||
}
|
||||
|
||||
return model.Predict(
|
||||
str, er := model.Predict(
|
||||
s,
|
||||
predictOptions...,
|
||||
)
|
||||
// Seems that if we don't free the callback explicitly we leave functions registered (that might try to send on closed channels)
|
||||
// For instance otherwise the API returns: {"error":{"code":500,"message":"send on closed channel","type":""}}
|
||||
// after a stream event has occurred
|
||||
model.SetTokenCallback(nil)
|
||||
return str, er
|
||||
}
|
||||
case *llama.LLama:
|
||||
supportStreams = true
|
||||
|
1
go.mod
1
go.mod
@ -49,6 +49,7 @@ require (
|
||||
github.com/mattn/go-colorable v0.1.13 // indirect
|
||||
github.com/mattn/go-isatty v0.0.18 // indirect
|
||||
github.com/mattn/go-runewidth v0.0.14 // indirect
|
||||
github.com/nomic/gpt4all/gpt4all-bindings/golang v0.0.0-00010101000000-000000000000 // indirect
|
||||
github.com/philhofer/fwd v1.1.2 // indirect
|
||||
github.com/rivo/uniseg v0.2.0 // indirect
|
||||
github.com/russross/blackfriday/v2 v2.1.0 // indirect
|
||||
|
@ -15,9 +15,9 @@ import (
|
||||
bloomz "github.com/go-skynet/bloomz.cpp"
|
||||
bert "github.com/go-skynet/go-bert.cpp"
|
||||
gpt2 "github.com/go-skynet/go-gpt2.cpp"
|
||||
gptj "github.com/go-skynet/go-gpt4all-j.cpp"
|
||||
llama "github.com/go-skynet/go-llama.cpp"
|
||||
"github.com/hashicorp/go-multierror"
|
||||
gpt4all "github.com/nomic/gpt4all/gpt4all-bindings/golang"
|
||||
"github.com/rs/zerolog/log"
|
||||
)
|
||||
|
||||
@ -26,7 +26,7 @@ type ModelLoader struct {
|
||||
mu sync.Mutex
|
||||
// TODO: this needs generics
|
||||
models map[string]*llama.LLama
|
||||
gptmodels map[string]*gptj.GPTJ
|
||||
gptmodels map[string]*gpt4all.Model
|
||||
gpt2models map[string]*gpt2.GPT2
|
||||
gptstablelmmodels map[string]*gpt2.StableLM
|
||||
dollymodels map[string]*gpt2.Dolly
|
||||
@ -42,7 +42,7 @@ func NewModelLoader(modelPath string) *ModelLoader {
|
||||
return &ModelLoader{
|
||||
ModelPath: modelPath,
|
||||
gpt2models: make(map[string]*gpt2.GPT2),
|
||||
gptmodels: make(map[string]*gptj.GPTJ),
|
||||
gptmodels: make(map[string]*gpt4all.Model),
|
||||
gptstablelmmodels: make(map[string]*gpt2.StableLM),
|
||||
dollymodels: make(map[string]*gpt2.Dolly),
|
||||
redpajama: make(map[string]*gpt2.RedPajama),
|
||||
@ -328,7 +328,7 @@ func (ml *ModelLoader) LoadGPT2Model(modelName string) (*gpt2.GPT2, error) {
|
||||
return model, err
|
||||
}
|
||||
|
||||
func (ml *ModelLoader) LoadGPTJModel(modelName string) (*gptj.GPTJ, error) {
|
||||
func (ml *ModelLoader) LoadGPT4AllModel(modelName string, opts ...gpt4all.ModelOption) (*gpt4all.Model, error) {
|
||||
ml.mu.Lock()
|
||||
defer ml.mu.Unlock()
|
||||
|
||||
@ -346,7 +346,7 @@ func (ml *ModelLoader) LoadGPTJModel(modelName string) (*gptj.GPTJ, error) {
|
||||
modelFile := filepath.Join(ml.ModelPath, modelName)
|
||||
log.Debug().Msgf("Loading model in memory from file: %s", modelFile)
|
||||
|
||||
model, err := gptj.New(modelFile)
|
||||
model, err := gpt4all.New(modelFile, opts...)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
@ -470,8 +470,12 @@ func (ml *ModelLoader) BackendLoader(backendString string, modelFile string, lla
|
||||
return ml.LoadRedPajama(modelFile)
|
||||
case "gpt2":
|
||||
return ml.LoadGPT2Model(modelFile)
|
||||
case "gptj":
|
||||
return ml.LoadGPTJModel(modelFile)
|
||||
case "gpt4all-llama":
|
||||
return ml.LoadGPT4AllModel(modelFile, gpt4all.SetThreads(int(threads)), gpt4all.SetModelType(gpt4all.LLaMAType))
|
||||
case "gpt4all-mpt":
|
||||
return ml.LoadGPT4AllModel(modelFile, gpt4all.SetThreads(int(threads)), gpt4all.SetModelType(gpt4all.MPTType))
|
||||
case "gpt4all-j":
|
||||
return ml.LoadGPT4AllModel(modelFile, gpt4all.SetThreads(int(threads)), gpt4all.SetModelType(gpt4all.GPTJType))
|
||||
case "bert-embeddings":
|
||||
return ml.LoadBERT(modelFile)
|
||||
case "rwkv":
|
||||
@ -514,7 +518,23 @@ func (ml *ModelLoader) GreedyLoader(modelFile string, llamaOpts []llama.ModelOpt
|
||||
err = multierror.Append(err, modelerr)
|
||||
}
|
||||
|
||||
model, modelerr = ml.LoadGPTJModel(modelFile)
|
||||
model, modelerr = ml.LoadGPT4AllModel(modelFile, gpt4all.SetThreads(int(threads)), gpt4all.SetModelType(gpt4all.GPTJType))
|
||||
if modelerr == nil {
|
||||
updateModels(model)
|
||||
return model, nil
|
||||
} else {
|
||||
err = multierror.Append(err, modelerr)
|
||||
}
|
||||
|
||||
model, modelerr = ml.LoadGPT4AllModel(modelFile, gpt4all.SetThreads(int(threads)), gpt4all.SetModelType(gpt4all.LLaMAType))
|
||||
if modelerr == nil {
|
||||
updateModels(model)
|
||||
return model, nil
|
||||
} else {
|
||||
err = multierror.Append(err, modelerr)
|
||||
}
|
||||
|
||||
model, modelerr = ml.LoadGPT4AllModel(modelFile, gpt4all.SetThreads(int(threads)), gpt4all.SetModelType(gpt4all.MPTType))
|
||||
if modelerr == nil {
|
||||
updateModels(model)
|
||||
return model, nil
|
||||
@ -553,14 +573,14 @@ func (ml *ModelLoader) GreedyLoader(modelFile string, llamaOpts []llama.ModelOpt
|
||||
} else {
|
||||
err = multierror.Append(err, modelerr)
|
||||
}
|
||||
|
||||
model, modelerr = ml.LoadBloomz(modelFile)
|
||||
if modelerr == nil {
|
||||
updateModels(model)
|
||||
return model, nil
|
||||
} else {
|
||||
err = multierror.Append(err, modelerr)
|
||||
}
|
||||
// Do not autoload bloomz
|
||||
//model, modelerr = ml.LoadBloomz(modelFile)
|
||||
//if modelerr == nil {
|
||||
// updateModels(model)
|
||||
// return model, nil
|
||||
//} else {
|
||||
// err = multierror.Append(err, modelerr)
|
||||
//}
|
||||
|
||||
model, modelerr = ml.LoadRWKV(modelFile, modelFile+tokenizerSuffix, threads)
|
||||
if modelerr == nil {
|
||||
|
Loading…
Reference in New Issue
Block a user