From ae533cadef85c0f046060c332ebbc7cc6c4794cc Mon Sep 17 00:00:00 2001 From: Ettore Di Giacinto Date: Sat, 15 Jul 2023 01:19:43 +0200 Subject: [PATCH] feat: move gpt4all to a grpc service Signed-off-by: Ettore Di Giacinto --- .gitignore | 2 +- Makefile | 30 +++------ api/prediction.go | 33 +--------- cmd/grpc/gpt4all/main.go | 23 +++++++ pkg/grpc/llm/gpt4all/gpt4all.go | 61 ++++++++++++++++++ pkg/grpc/proto/llmserver.pb.go | 110 +++++++++++++++++++------------- pkg/grpc/proto/llmserver.proto | 2 + pkg/model/initializers.go | 16 +++-- 8 files changed, 170 insertions(+), 107 deletions(-) create mode 100644 cmd/grpc/gpt4all/main.go create mode 100644 pkg/grpc/llm/gpt4all/gpt4all.go diff --git a/.gitignore b/.gitignore index 8819ad71..a40bf192 100644 --- a/.gitignore +++ b/.gitignore @@ -1,6 +1,6 @@ # go-llama build artifacts go-llama -gpt4all +/gpt4all go-stable-diffusion go-piper go-ggllm diff --git a/Makefile b/Makefile index 35141615..df7a16e4 100644 --- a/Makefile +++ b/Makefile @@ -110,24 +110,6 @@ all: help gpt4all: git clone --recurse-submodules $(GPT4ALL_REPO) gpt4all cd gpt4all && git checkout -b build $(GPT4ALL_VERSION) && git submodule update --init --recursive --depth 1 - # This is hackish, but needed as both go-llama and go-gpt4allj have their own version of ggml.. - @find ./gpt4all -type f -name "*.c" -exec sed -i'' -e 's/ggml_/ggml_gpt4all_/g' {} + - @find ./gpt4all -type f -name "*.cpp" -exec sed -i'' -e 's/ggml_/ggml_gpt4all_/g' {} + - @find ./gpt4all -type f -name "*.m" -exec sed -i'' -e 's/ggml_/ggml_gpt4all_/g' {} + - @find ./gpt4all -type f -name "*.h" -exec sed -i'' -e 's/ggml_/ggml_gpt4all_/g' {} + - @find ./gpt4all -type f -name "*.c" -exec sed -i'' -e 's/llama_/llama_gpt4all_/g' {} + - @find ./gpt4all -type f -name "*.cpp" -exec sed -i'' -e 's/llama_/llama_gpt4all_/g' {} + - @find ./gpt4all -type f -name "*.h" -exec sed -i'' -e 's/llama_/llama_gpt4all_/g' {} + - @find ./gpt4all/gpt4all-backend -type f -name "llama_util.h" -execdir mv {} "llama_gpt4all_util.h" \; - @find ./gpt4all -type f -name "*.cmake" -exec sed -i'' -e 's/llama_util/llama_gpt4all_util/g' {} + - @find ./gpt4all -type f -name "*.txt" -exec sed -i'' -e 's/llama_util/llama_gpt4all_util/g' {} + - @find ./gpt4all/gpt4all-bindings/golang -type f -name "*.cpp" -exec sed -i'' -e 's/load_model/load_gpt4all_model/g' {} + - @find ./gpt4all/gpt4all-bindings/golang -type f -name "*.go" -exec sed -i'' -e 's/load_model/load_gpt4all_model/g' {} + - @find ./gpt4all/gpt4all-bindings/golang -type f -name "*.h" -exec sed -i'' -e 's/load_model/load_gpt4all_model/g' {} + - @find ./gpt4all/gpt4all-bindings/golang -type f -name "*.h" -exec sed -i'' -e 's/set_numa_thread_affinity/gpt4all_set_numa_thread_affinity/g' {} + - @find ./gpt4all/gpt4all-bindings/golang -type f -name "*.c" -exec sed -i'' -e 's/set_numa_thread_affinity/gpt4all__set_numa_thread_affinity/g' {} + - @find ./gpt4all/gpt4all-bindings/golang -type f -name "*.c" -exec sed -i'' -e 's/clear_numa_thread_affinity/gpt4all__clear_numa_thread_affinity/g' {} + - @find ./gpt4all/gpt4all-bindings/golang -type f -name "*.h" -exec sed -i'' -e 's/clear_numa_thread_affinity/gpt4all__clear_numa_thread_affinity/g' {} + ## go-ggllm go-ggllm: @@ -282,7 +264,7 @@ rebuild: ## Rebuilds the project $(MAKE) -C go-ggllm clean $(MAKE) build -prepare: prepare-sources backend-assets/gpt4all grpcs $(OPTIONAL_TARGETS) go-ggllm/libggllm.a go-llama/libbinding.a go-bert/libgobert.a go-ggml-transformers/libtransformers.a go-rwkv/librwkv.a whisper.cpp/libwhisper.a bloomz/libbloomz.a ## Prepares for building +prepare: prepare-sources grpcs go-bert/libgobert.a go-ggml-transformers/libtransformers.a go-rwkv/librwkv.a whisper.cpp/libwhisper.a bloomz/libbloomz.a $(OPTIONAL_TARGETS) touch $@ clean: ## Remove build related file @@ -365,12 +347,16 @@ protogen: backend-assets/grpc: mkdir -p backend-assets/grpc -falcon-grpc: backend-assets/grpc +falcon-grpc: backend-assets/grpc go-ggllm/libggllm.a CGO_LDFLAGS="$(CGO_LDFLAGS)" C_INCLUDE_PATH=$(shell pwd)/go-ggllm LIBRARY_PATH=$(shell pwd)/go-ggllm \ $(GOCMD) build -x -ldflags "$(LD_FLAGS)" -tags "$(GO_TAGS)" -o backend-assets/grpc/falcon ./cmd/grpc/falcon/ -llama-grpc: backend-assets/grpc +llama-grpc: backend-assets/grpc go-llama/libbinding.a CGO_LDFLAGS="$(CGO_LDFLAGS)" C_INCLUDE_PATH=$(shell pwd)/go-llama LIBRARY_PATH=$(shell pwd)/go-llama \ $(GOCMD) build -x -ldflags "$(LD_FLAGS)" -tags "$(GO_TAGS)" -o backend-assets/grpc/llama ./cmd/grpc/llama/ -grpcs: falcon-grpc llama-grpc \ No newline at end of file +gpt4all-grpc: backend-assets/grpc backend-assets/gpt4all gpt4all/gpt4all-bindings/golang/libgpt4all.a + CGO_LDFLAGS="$(CGO_LDFLAGS)" C_INCLUDE_PATH=$(shell pwd)/gpt4all/gpt4all-bindings/golang/ LIBRARY_PATH=$(shell pwd)/gpt4all/gpt4all-bindings/golang/ \ + $(GOCMD) build -x -ldflags "$(LD_FLAGS)" -tags "$(GO_TAGS)" -o backend-assets/grpc/gpt4all ./cmd/grpc/gpt4all/ + +grpcs: falcon-grpc llama-grpc gpt4all-grpc \ No newline at end of file diff --git a/api/prediction.go b/api/prediction.go index 970f06e6..f24376ca 100644 --- a/api/prediction.go +++ b/api/prediction.go @@ -18,8 +18,6 @@ import ( "github.com/go-skynet/bloomz.cpp" bert "github.com/go-skynet/go-bert.cpp" transformers "github.com/go-skynet/go-ggml-transformers.cpp" - - gpt4all "github.com/nomic-ai/gpt4all/gpt4all-bindings/golang" ) // mutex still needed, see: https://github.com/ggerganov/llama.cpp/discussions/784 @@ -43,6 +41,7 @@ func gRPCModelOpts(c Config) *pb.ModelOptions { NGPULayers: int32(c.NGPULayers), MMap: c.MMap, MainGPU: c.MainGPU, + Threads: int32(c.Threads), TensorSplit: c.TensorSplit, } } @@ -492,36 +491,6 @@ func ModelInference(s string, loader *model.ModelLoader, c Config, o *Option, to predictOptions..., ) } - case *gpt4all.Model: - supportStreams = true - - fn = func() (string, error) { - if tokenCallback != nil { - model.SetTokenCallback(tokenCallback) - } - - // Generate the prediction using the language model - predictOptions := []gpt4all.PredictOption{ - gpt4all.SetTemperature(c.Temperature), - gpt4all.SetTopP(c.TopP), - gpt4all.SetTopK(c.TopK), - gpt4all.SetTokens(c.Maxtokens), - } - - if c.Batch != 0 { - predictOptions = append(predictOptions, gpt4all.SetBatch(c.Batch)) - } - - str, er := model.Predict( - s, - predictOptions..., - ) - // Seems that if we don't free the callback explicitly we leave functions registered (that might try to send on closed channels) - // For instance otherwise the API returns: {"error":{"code":500,"message":"send on closed channel","type":""}} - // after a stream event has occurred - model.SetTokenCallback(nil) - return str, er - } case *grpc.Client: // in GRPC, the backend is supposed to answer to 1 single token if stream is not supported supportStreams = true diff --git a/cmd/grpc/gpt4all/main.go b/cmd/grpc/gpt4all/main.go new file mode 100644 index 00000000..a784d401 --- /dev/null +++ b/cmd/grpc/gpt4all/main.go @@ -0,0 +1,23 @@ +package main + +// Note: this is started internally by LocalAI and a server is allocated for each model + +import ( + "flag" + + gpt4all "github.com/go-skynet/LocalAI/pkg/grpc/llm/gpt4all" + + grpc "github.com/go-skynet/LocalAI/pkg/grpc" +) + +var ( + addr = flag.String("addr", "localhost:50051", "the address to connect to") +) + +func main() { + flag.Parse() + + if err := grpc.StartServer(*addr, &gpt4all.LLM{}); err != nil { + panic(err) + } +} diff --git a/pkg/grpc/llm/gpt4all/gpt4all.go b/pkg/grpc/llm/gpt4all/gpt4all.go new file mode 100644 index 00000000..0d7dac58 --- /dev/null +++ b/pkg/grpc/llm/gpt4all/gpt4all.go @@ -0,0 +1,61 @@ +package gpt4all + +// This is a wrapper to statisfy the GRPC service interface +// It is meant to be used by the main executable that is the server for the specific backend type (falcon, gpt3, etc) +import ( + "fmt" + + pb "github.com/go-skynet/LocalAI/pkg/grpc/proto" + gpt4all "github.com/nomic-ai/gpt4all/gpt4all-bindings/golang" +) + +type LLM struct { + gpt4all *gpt4all.Model +} + +func (llm *LLM) Load(opts *pb.ModelOptions) error { + model, err := gpt4all.New(opts.Model, + gpt4all.SetThreads(int(opts.Threads)), + gpt4all.SetLibrarySearchPath(opts.LibrarySearchPath)) + llm.gpt4all = model + return err +} + +func buildPredictOptions(opts *pb.PredictOptions) []gpt4all.PredictOption { + predictOptions := []gpt4all.PredictOption{ + gpt4all.SetTemperature(float64(opts.Temperature)), + gpt4all.SetTopP(float64(opts.TopP)), + gpt4all.SetTopK(int(opts.TopK)), + gpt4all.SetTokens(int(opts.Tokens)), + } + + if opts.Batch != 0 { + predictOptions = append(predictOptions, gpt4all.SetBatch(int(opts.Batch))) + } + return predictOptions +} + +func (llm *LLM) Predict(opts *pb.PredictOptions) (string, error) { + return llm.gpt4all.Predict(opts.Prompt, buildPredictOptions(opts)...) +} + +func (llm *LLM) PredictStream(opts *pb.PredictOptions, results chan string) { + predictOptions := buildPredictOptions(opts) + + go func() { + llm.gpt4all.SetTokenCallback(func(token string) bool { + results <- token + return true + }) + _, err := llm.gpt4all.Predict(opts.Prompt, predictOptions...) + if err != nil { + fmt.Println("err: ", err) + } + llm.gpt4all.SetTokenCallback(nil) + close(results) + }() +} + +func (llm *LLM) Embeddings(opts *pb.PredictOptions) ([]float32, error) { + return []float32{}, fmt.Errorf("not implemented") +} diff --git a/pkg/grpc/proto/llmserver.pb.go b/pkg/grpc/proto/llmserver.pb.go index d54c3937..d8bdcd22 100644 --- a/pkg/grpc/proto/llmserver.pb.go +++ b/pkg/grpc/proto/llmserver.pb.go @@ -431,20 +431,22 @@ type ModelOptions struct { sizeCache protoimpl.SizeCache unknownFields protoimpl.UnknownFields - Model string `protobuf:"bytes,1,opt,name=Model,proto3" json:"Model,omitempty"` - ContextSize int32 `protobuf:"varint,2,opt,name=ContextSize,proto3" json:"ContextSize,omitempty"` - Seed int32 `protobuf:"varint,3,opt,name=Seed,proto3" json:"Seed,omitempty"` - NBatch int32 `protobuf:"varint,4,opt,name=NBatch,proto3" json:"NBatch,omitempty"` - F16Memory bool `protobuf:"varint,5,opt,name=F16Memory,proto3" json:"F16Memory,omitempty"` - MLock bool `protobuf:"varint,6,opt,name=MLock,proto3" json:"MLock,omitempty"` - MMap bool `protobuf:"varint,7,opt,name=MMap,proto3" json:"MMap,omitempty"` - VocabOnly bool `protobuf:"varint,8,opt,name=VocabOnly,proto3" json:"VocabOnly,omitempty"` - LowVRAM bool `protobuf:"varint,9,opt,name=LowVRAM,proto3" json:"LowVRAM,omitempty"` - Embeddings bool `protobuf:"varint,10,opt,name=Embeddings,proto3" json:"Embeddings,omitempty"` - NUMA bool `protobuf:"varint,11,opt,name=NUMA,proto3" json:"NUMA,omitempty"` - NGPULayers int32 `protobuf:"varint,12,opt,name=NGPULayers,proto3" json:"NGPULayers,omitempty"` - MainGPU string `protobuf:"bytes,13,opt,name=MainGPU,proto3" json:"MainGPU,omitempty"` - TensorSplit string `protobuf:"bytes,14,opt,name=TensorSplit,proto3" json:"TensorSplit,omitempty"` + Model string `protobuf:"bytes,1,opt,name=Model,proto3" json:"Model,omitempty"` + ContextSize int32 `protobuf:"varint,2,opt,name=ContextSize,proto3" json:"ContextSize,omitempty"` + Seed int32 `protobuf:"varint,3,opt,name=Seed,proto3" json:"Seed,omitempty"` + NBatch int32 `protobuf:"varint,4,opt,name=NBatch,proto3" json:"NBatch,omitempty"` + F16Memory bool `protobuf:"varint,5,opt,name=F16Memory,proto3" json:"F16Memory,omitempty"` + MLock bool `protobuf:"varint,6,opt,name=MLock,proto3" json:"MLock,omitempty"` + MMap bool `protobuf:"varint,7,opt,name=MMap,proto3" json:"MMap,omitempty"` + VocabOnly bool `protobuf:"varint,8,opt,name=VocabOnly,proto3" json:"VocabOnly,omitempty"` + LowVRAM bool `protobuf:"varint,9,opt,name=LowVRAM,proto3" json:"LowVRAM,omitempty"` + Embeddings bool `protobuf:"varint,10,opt,name=Embeddings,proto3" json:"Embeddings,omitempty"` + NUMA bool `protobuf:"varint,11,opt,name=NUMA,proto3" json:"NUMA,omitempty"` + NGPULayers int32 `protobuf:"varint,12,opt,name=NGPULayers,proto3" json:"NGPULayers,omitempty"` + MainGPU string `protobuf:"bytes,13,opt,name=MainGPU,proto3" json:"MainGPU,omitempty"` + TensorSplit string `protobuf:"bytes,14,opt,name=TensorSplit,proto3" json:"TensorSplit,omitempty"` + Threads int32 `protobuf:"varint,15,opt,name=Threads,proto3" json:"Threads,omitempty"` + LibrarySearchPath string `protobuf:"bytes,16,opt,name=LibrarySearchPath,proto3" json:"LibrarySearchPath,omitempty"` } func (x *ModelOptions) Reset() { @@ -577,6 +579,20 @@ func (x *ModelOptions) GetTensorSplit() string { return "" } +func (x *ModelOptions) GetThreads() int32 { + if x != nil { + return x.Threads + } + return 0 +} + +func (x *ModelOptions) GetLibrarySearchPath() string { + if x != nil { + return x.LibrarySearchPath + } + return "" +} + type Result struct { state protoimpl.MessageState sizeCache protoimpl.SizeCache @@ -753,7 +769,7 @@ var file_pkg_grpc_proto_llmserver_proto_rawDesc = []byte{ 0x65, 0x64, 0x64, 0x69, 0x6e, 0x67, 0x73, 0x18, 0x24, 0x20, 0x01, 0x28, 0x09, 0x52, 0x0a, 0x45, 0x6d, 0x62, 0x65, 0x64, 0x64, 0x69, 0x6e, 0x67, 0x73, 0x22, 0x21, 0x0a, 0x05, 0x52, 0x65, 0x70, 0x6c, 0x79, 0x12, 0x18, 0x0a, 0x07, 0x6d, 0x65, 0x73, 0x73, 0x61, 0x67, 0x65, 0x18, 0x01, 0x20, - 0x01, 0x28, 0x09, 0x52, 0x07, 0x6d, 0x65, 0x73, 0x73, 0x61, 0x67, 0x65, 0x22, 0x82, 0x03, 0x0a, + 0x01, 0x28, 0x09, 0x52, 0x07, 0x6d, 0x65, 0x73, 0x73, 0x61, 0x67, 0x65, 0x22, 0xca, 0x03, 0x0a, 0x0c, 0x4d, 0x6f, 0x64, 0x65, 0x6c, 0x4f, 0x70, 0x74, 0x69, 0x6f, 0x6e, 0x73, 0x12, 0x14, 0x0a, 0x05, 0x4d, 0x6f, 0x64, 0x65, 0x6c, 0x18, 0x01, 0x20, 0x01, 0x28, 0x09, 0x52, 0x05, 0x4d, 0x6f, 0x64, 0x65, 0x6c, 0x12, 0x20, 0x0a, 0x0b, 0x43, 0x6f, 0x6e, 0x74, 0x65, 0x78, 0x74, 0x53, 0x69, @@ -778,36 +794,40 @@ var file_pkg_grpc_proto_llmserver_proto_rawDesc = []byte{ 0x18, 0x0d, 0x20, 0x01, 0x28, 0x09, 0x52, 0x07, 0x4d, 0x61, 0x69, 0x6e, 0x47, 0x50, 0x55, 0x12, 0x20, 0x0a, 0x0b, 0x54, 0x65, 0x6e, 0x73, 0x6f, 0x72, 0x53, 0x70, 0x6c, 0x69, 0x74, 0x18, 0x0e, 0x20, 0x01, 0x28, 0x09, 0x52, 0x0b, 0x54, 0x65, 0x6e, 0x73, 0x6f, 0x72, 0x53, 0x70, 0x6c, 0x69, - 0x74, 0x22, 0x3c, 0x0a, 0x06, 0x52, 0x65, 0x73, 0x75, 0x6c, 0x74, 0x12, 0x18, 0x0a, 0x07, 0x6d, - 0x65, 0x73, 0x73, 0x61, 0x67, 0x65, 0x18, 0x01, 0x20, 0x01, 0x28, 0x09, 0x52, 0x07, 0x6d, 0x65, - 0x73, 0x73, 0x61, 0x67, 0x65, 0x12, 0x18, 0x0a, 0x07, 0x73, 0x75, 0x63, 0x63, 0x65, 0x73, 0x73, - 0x18, 0x02, 0x20, 0x01, 0x28, 0x08, 0x52, 0x07, 0x73, 0x75, 0x63, 0x63, 0x65, 0x73, 0x73, 0x22, - 0x31, 0x0a, 0x0f, 0x45, 0x6d, 0x62, 0x65, 0x64, 0x64, 0x69, 0x6e, 0x67, 0x52, 0x65, 0x73, 0x75, - 0x6c, 0x74, 0x12, 0x1e, 0x0a, 0x0a, 0x65, 0x6d, 0x62, 0x65, 0x64, 0x64, 0x69, 0x6e, 0x67, 0x73, - 0x18, 0x01, 0x20, 0x03, 0x28, 0x02, 0x52, 0x0a, 0x65, 0x6d, 0x62, 0x65, 0x64, 0x64, 0x69, 0x6e, - 0x67, 0x73, 0x32, 0xfe, 0x01, 0x0a, 0x03, 0x4c, 0x4c, 0x4d, 0x12, 0x2a, 0x0a, 0x06, 0x48, 0x65, - 0x61, 0x6c, 0x74, 0x68, 0x12, 0x12, 0x2e, 0x6c, 0x6c, 0x6d, 0x2e, 0x48, 0x65, 0x61, 0x6c, 0x74, - 0x68, 0x4d, 0x65, 0x73, 0x73, 0x61, 0x67, 0x65, 0x1a, 0x0a, 0x2e, 0x6c, 0x6c, 0x6d, 0x2e, 0x52, - 0x65, 0x70, 0x6c, 0x79, 0x22, 0x00, 0x12, 0x2c, 0x0a, 0x07, 0x50, 0x72, 0x65, 0x64, 0x69, 0x63, - 0x74, 0x12, 0x13, 0x2e, 0x6c, 0x6c, 0x6d, 0x2e, 0x50, 0x72, 0x65, 0x64, 0x69, 0x63, 0x74, 0x4f, - 0x70, 0x74, 0x69, 0x6f, 0x6e, 0x73, 0x1a, 0x0a, 0x2e, 0x6c, 0x6c, 0x6d, 0x2e, 0x52, 0x65, 0x70, - 0x6c, 0x79, 0x22, 0x00, 0x12, 0x2d, 0x0a, 0x09, 0x4c, 0x6f, 0x61, 0x64, 0x4d, 0x6f, 0x64, 0x65, - 0x6c, 0x12, 0x11, 0x2e, 0x6c, 0x6c, 0x6d, 0x2e, 0x4d, 0x6f, 0x64, 0x65, 0x6c, 0x4f, 0x70, 0x74, - 0x69, 0x6f, 0x6e, 0x73, 0x1a, 0x0b, 0x2e, 0x6c, 0x6c, 0x6d, 0x2e, 0x52, 0x65, 0x73, 0x75, 0x6c, - 0x74, 0x22, 0x00, 0x12, 0x34, 0x0a, 0x0d, 0x50, 0x72, 0x65, 0x64, 0x69, 0x63, 0x74, 0x53, 0x74, - 0x72, 0x65, 0x61, 0x6d, 0x12, 0x13, 0x2e, 0x6c, 0x6c, 0x6d, 0x2e, 0x50, 0x72, 0x65, 0x64, 0x69, - 0x63, 0x74, 0x4f, 0x70, 0x74, 0x69, 0x6f, 0x6e, 0x73, 0x1a, 0x0a, 0x2e, 0x6c, 0x6c, 0x6d, 0x2e, - 0x52, 0x65, 0x70, 0x6c, 0x79, 0x22, 0x00, 0x30, 0x01, 0x12, 0x38, 0x0a, 0x09, 0x45, 0x6d, 0x62, - 0x65, 0x64, 0x64, 0x69, 0x6e, 0x67, 0x12, 0x13, 0x2e, 0x6c, 0x6c, 0x6d, 0x2e, 0x50, 0x72, 0x65, - 0x64, 0x69, 0x63, 0x74, 0x4f, 0x70, 0x74, 0x69, 0x6f, 0x6e, 0x73, 0x1a, 0x14, 0x2e, 0x6c, 0x6c, - 0x6d, 0x2e, 0x45, 0x6d, 0x62, 0x65, 0x64, 0x64, 0x69, 0x6e, 0x67, 0x52, 0x65, 0x73, 0x75, 0x6c, - 0x74, 0x22, 0x00, 0x42, 0x57, 0x0a, 0x1b, 0x69, 0x6f, 0x2e, 0x73, 0x6b, 0x79, 0x6e, 0x65, 0x74, - 0x2e, 0x6c, 0x6f, 0x63, 0x61, 0x6c, 0x61, 0x69, 0x2e, 0x6c, 0x6c, 0x6d, 0x73, 0x65, 0x72, 0x76, - 0x65, 0x72, 0x42, 0x09, 0x4c, 0x4c, 0x4d, 0x53, 0x65, 0x72, 0x76, 0x65, 0x72, 0x50, 0x01, 0x5a, - 0x2b, 0x67, 0x69, 0x74, 0x68, 0x75, 0x62, 0x2e, 0x63, 0x6f, 0x6d, 0x2f, 0x67, 0x6f, 0x2d, 0x73, - 0x6b, 0x79, 0x6e, 0x65, 0x74, 0x2f, 0x4c, 0x6f, 0x63, 0x61, 0x6c, 0x41, 0x49, 0x2f, 0x70, 0x6b, - 0x67, 0x2f, 0x67, 0x72, 0x70, 0x63, 0x2f, 0x70, 0x72, 0x6f, 0x74, 0x6f, 0x62, 0x06, 0x70, 0x72, - 0x6f, 0x74, 0x6f, 0x33, + 0x74, 0x12, 0x18, 0x0a, 0x07, 0x54, 0x68, 0x72, 0x65, 0x61, 0x64, 0x73, 0x18, 0x0f, 0x20, 0x01, + 0x28, 0x05, 0x52, 0x07, 0x54, 0x68, 0x72, 0x65, 0x61, 0x64, 0x73, 0x12, 0x2c, 0x0a, 0x11, 0x4c, + 0x69, 0x62, 0x72, 0x61, 0x72, 0x79, 0x53, 0x65, 0x61, 0x72, 0x63, 0x68, 0x50, 0x61, 0x74, 0x68, + 0x18, 0x10, 0x20, 0x01, 0x28, 0x09, 0x52, 0x11, 0x4c, 0x69, 0x62, 0x72, 0x61, 0x72, 0x79, 0x53, + 0x65, 0x61, 0x72, 0x63, 0x68, 0x50, 0x61, 0x74, 0x68, 0x22, 0x3c, 0x0a, 0x06, 0x52, 0x65, 0x73, + 0x75, 0x6c, 0x74, 0x12, 0x18, 0x0a, 0x07, 0x6d, 0x65, 0x73, 0x73, 0x61, 0x67, 0x65, 0x18, 0x01, + 0x20, 0x01, 0x28, 0x09, 0x52, 0x07, 0x6d, 0x65, 0x73, 0x73, 0x61, 0x67, 0x65, 0x12, 0x18, 0x0a, + 0x07, 0x73, 0x75, 0x63, 0x63, 0x65, 0x73, 0x73, 0x18, 0x02, 0x20, 0x01, 0x28, 0x08, 0x52, 0x07, + 0x73, 0x75, 0x63, 0x63, 0x65, 0x73, 0x73, 0x22, 0x31, 0x0a, 0x0f, 0x45, 0x6d, 0x62, 0x65, 0x64, + 0x64, 0x69, 0x6e, 0x67, 0x52, 0x65, 0x73, 0x75, 0x6c, 0x74, 0x12, 0x1e, 0x0a, 0x0a, 0x65, 0x6d, + 0x62, 0x65, 0x64, 0x64, 0x69, 0x6e, 0x67, 0x73, 0x18, 0x01, 0x20, 0x03, 0x28, 0x02, 0x52, 0x0a, + 0x65, 0x6d, 0x62, 0x65, 0x64, 0x64, 0x69, 0x6e, 0x67, 0x73, 0x32, 0xfe, 0x01, 0x0a, 0x03, 0x4c, + 0x4c, 0x4d, 0x12, 0x2a, 0x0a, 0x06, 0x48, 0x65, 0x61, 0x6c, 0x74, 0x68, 0x12, 0x12, 0x2e, 0x6c, + 0x6c, 0x6d, 0x2e, 0x48, 0x65, 0x61, 0x6c, 0x74, 0x68, 0x4d, 0x65, 0x73, 0x73, 0x61, 0x67, 0x65, + 0x1a, 0x0a, 0x2e, 0x6c, 0x6c, 0x6d, 0x2e, 0x52, 0x65, 0x70, 0x6c, 0x79, 0x22, 0x00, 0x12, 0x2c, + 0x0a, 0x07, 0x50, 0x72, 0x65, 0x64, 0x69, 0x63, 0x74, 0x12, 0x13, 0x2e, 0x6c, 0x6c, 0x6d, 0x2e, + 0x50, 0x72, 0x65, 0x64, 0x69, 0x63, 0x74, 0x4f, 0x70, 0x74, 0x69, 0x6f, 0x6e, 0x73, 0x1a, 0x0a, + 0x2e, 0x6c, 0x6c, 0x6d, 0x2e, 0x52, 0x65, 0x70, 0x6c, 0x79, 0x22, 0x00, 0x12, 0x2d, 0x0a, 0x09, + 0x4c, 0x6f, 0x61, 0x64, 0x4d, 0x6f, 0x64, 0x65, 0x6c, 0x12, 0x11, 0x2e, 0x6c, 0x6c, 0x6d, 0x2e, + 0x4d, 0x6f, 0x64, 0x65, 0x6c, 0x4f, 0x70, 0x74, 0x69, 0x6f, 0x6e, 0x73, 0x1a, 0x0b, 0x2e, 0x6c, + 0x6c, 0x6d, 0x2e, 0x52, 0x65, 0x73, 0x75, 0x6c, 0x74, 0x22, 0x00, 0x12, 0x34, 0x0a, 0x0d, 0x50, + 0x72, 0x65, 0x64, 0x69, 0x63, 0x74, 0x53, 0x74, 0x72, 0x65, 0x61, 0x6d, 0x12, 0x13, 0x2e, 0x6c, + 0x6c, 0x6d, 0x2e, 0x50, 0x72, 0x65, 0x64, 0x69, 0x63, 0x74, 0x4f, 0x70, 0x74, 0x69, 0x6f, 0x6e, + 0x73, 0x1a, 0x0a, 0x2e, 0x6c, 0x6c, 0x6d, 0x2e, 0x52, 0x65, 0x70, 0x6c, 0x79, 0x22, 0x00, 0x30, + 0x01, 0x12, 0x38, 0x0a, 0x09, 0x45, 0x6d, 0x62, 0x65, 0x64, 0x64, 0x69, 0x6e, 0x67, 0x12, 0x13, + 0x2e, 0x6c, 0x6c, 0x6d, 0x2e, 0x50, 0x72, 0x65, 0x64, 0x69, 0x63, 0x74, 0x4f, 0x70, 0x74, 0x69, + 0x6f, 0x6e, 0x73, 0x1a, 0x14, 0x2e, 0x6c, 0x6c, 0x6d, 0x2e, 0x45, 0x6d, 0x62, 0x65, 0x64, 0x64, + 0x69, 0x6e, 0x67, 0x52, 0x65, 0x73, 0x75, 0x6c, 0x74, 0x22, 0x00, 0x42, 0x57, 0x0a, 0x1b, 0x69, + 0x6f, 0x2e, 0x73, 0x6b, 0x79, 0x6e, 0x65, 0x74, 0x2e, 0x6c, 0x6f, 0x63, 0x61, 0x6c, 0x61, 0x69, + 0x2e, 0x6c, 0x6c, 0x6d, 0x73, 0x65, 0x72, 0x76, 0x65, 0x72, 0x42, 0x09, 0x4c, 0x4c, 0x4d, 0x53, + 0x65, 0x72, 0x76, 0x65, 0x72, 0x50, 0x01, 0x5a, 0x2b, 0x67, 0x69, 0x74, 0x68, 0x75, 0x62, 0x2e, + 0x63, 0x6f, 0x6d, 0x2f, 0x67, 0x6f, 0x2d, 0x73, 0x6b, 0x79, 0x6e, 0x65, 0x74, 0x2f, 0x4c, 0x6f, + 0x63, 0x61, 0x6c, 0x41, 0x49, 0x2f, 0x70, 0x6b, 0x67, 0x2f, 0x67, 0x72, 0x70, 0x63, 0x2f, 0x70, + 0x72, 0x6f, 0x74, 0x6f, 0x62, 0x06, 0x70, 0x72, 0x6f, 0x74, 0x6f, 0x33, } var ( diff --git a/pkg/grpc/proto/llmserver.proto b/pkg/grpc/proto/llmserver.proto index b6fa4cd6..32fe0ff7 100644 --- a/pkg/grpc/proto/llmserver.proto +++ b/pkg/grpc/proto/llmserver.proto @@ -76,6 +76,8 @@ message ModelOptions { int32 NGPULayers = 12; string MainGPU = 13; string TensorSplit = 14; + int32 Threads = 15; + string LibrarySearchPath = 16; } message Result { diff --git a/pkg/model/initializers.go b/pkg/model/initializers.go index 1acde4c9..3a0c5eaa 100644 --- a/pkg/model/initializers.go +++ b/pkg/model/initializers.go @@ -19,7 +19,6 @@ import ( transformers "github.com/go-skynet/go-ggml-transformers.cpp" "github.com/hashicorp/go-multierror" "github.com/hpcloud/tail" - gpt4all "github.com/nomic-ai/gpt4all/gpt4all-bindings/golang" "github.com/phayes/freeport" "github.com/rs/zerolog/log" @@ -140,11 +139,11 @@ var lcHuggingFace = func(repoId string) (interface{}, error) { // } // } -func gpt4allLM(opts ...gpt4all.ModelOption) func(string) (interface{}, error) { - return func(s string) (interface{}, error) { - return gpt4all.New(s, opts...) - } -} +// func gpt4allLM(opts ...gpt4all.ModelOption) func(string) (interface{}, error) { +// return func(s string) (interface{}, error) { +// return gpt4all.New(s, opts...) +// } +// } func rwkvLM(tokenFile string, threads uint32) func(string) (interface{}, error) { return func(s string) (interface{}, error) { @@ -287,7 +286,10 @@ func (ml *ModelLoader) BackendLoader(opts ...Option) (model interface{}, err err case StarcoderBackend: return ml.LoadModel(o.modelFile, starCoder) case Gpt4AllLlamaBackend, Gpt4AllMptBackend, Gpt4AllJBackend, Gpt4All: - return ml.LoadModel(o.modelFile, gpt4allLM(gpt4all.SetThreads(int(o.threads)), gpt4all.SetLibrarySearchPath(filepath.Join(o.assetDir, "backend-assets", "gpt4all")))) + o.gRPCOptions.LibrarySearchPath = filepath.Join(o.assetDir, "backend-assets", "gpt4all") + return ml.LoadModel(o.modelFile, ml.grpcModel(Gpt4All, o)) + + // return ml.LoadModel(o.modelFile, gpt4allLM(gpt4all.SetThreads(int(o.threads)), gpt4all.SetLibrarySearchPath(filepath.Join(o.assetDir, "backend-assets", "gpt4all")))) case BertEmbeddingsBackend: return ml.LoadModel(o.modelFile, bertEmbeddings) case RwkvBackend: