diff --git a/Makefile b/Makefile index 476caac6..bcbdbe83 100644 --- a/Makefile +++ b/Makefile @@ -10,10 +10,6 @@ GOLLAMA_REPO?=https://github.com/go-skynet/go-llama.cpp GOLLAMA_VERSION?=2b57a8ae43e4699d3dc5d1496a1ccd42922993be CPPLLAMA_VERSION?=1e6f6554aa11fa10160a5fda689e736c3c34169f -# gpt4all version -GPT4ALL_REPO?=https://github.com/nomic-ai/gpt4all -GPT4ALL_VERSION?=27a8b020c36b0df8f8b82a252d261cda47cf44b8 - # go-rwkv version RWKV_REPO?=https://github.com/donomii/go-rwkv.cpp RWKV_VERSION?=661e7ae26d442f5cfebd2a0881b44e8c55949ec6 @@ -190,7 +186,6 @@ ALL_GRPC_BACKENDS+=backend-assets/grpc/llama-cpp-fallback ALL_GRPC_BACKENDS+=backend-assets/grpc/llama-ggml ALL_GRPC_BACKENDS+=backend-assets/grpc/llama-cpp-grpc ALL_GRPC_BACKENDS+=backend-assets/util/llama-cpp-rpc-server -ALL_GRPC_BACKENDS+=backend-assets/grpc/gpt4all ALL_GRPC_BACKENDS+=backend-assets/grpc/rwkv ALL_GRPC_BACKENDS+=backend-assets/grpc/whisper ALL_GRPC_BACKENDS+=backend-assets/grpc/local-store @@ -253,18 +248,6 @@ sources/go-piper: sources/go-piper/libpiper_binding.a: sources/go-piper $(MAKE) -C sources/go-piper libpiper_binding.a example/main piper.o -## GPT4ALL -sources/gpt4all: - mkdir -p sources/gpt4all - cd sources/gpt4all && \ - git init && \ - git remote add origin $(GPT4ALL_REPO) && \ - git fetch origin && \ - git checkout $(GPT4ALL_VERSION) && \ - git submodule update --init --recursive --depth 1 --single-branch - -sources/gpt4all/gpt4all-bindings/golang/libgpt4all.a: sources/gpt4all - $(MAKE) -C sources/gpt4all/gpt4all-bindings/golang/ libgpt4all.a ## RWKV sources/go-rwkv.cpp: @@ -318,7 +301,7 @@ sources/whisper.cpp: sources/whisper.cpp/libwhisper.a: sources/whisper.cpp cd sources/whisper.cpp && $(MAKE) libwhisper.a libggml.a -get-sources: sources/go-llama.cpp sources/gpt4all sources/go-piper sources/go-rwkv.cpp sources/whisper.cpp sources/go-bert.cpp sources/go-stable-diffusion sources/go-tiny-dream backend/cpp/llama/llama.cpp +get-sources: sources/go-llama.cpp sources/go-piper sources/go-rwkv.cpp sources/whisper.cpp sources/go-bert.cpp sources/go-stable-diffusion sources/go-tiny-dream backend/cpp/llama/llama.cpp replace: $(GOCMD) mod edit -replace github.com/donomii/go-rwkv.cpp=$(CURDIR)/sources/go-rwkv.cpp @@ -328,7 +311,6 @@ replace: $(GOCMD) mod edit -replace github.com/M0Rf30/go-tiny-dream=$(CURDIR)/sources/go-tiny-dream $(GOCMD) mod edit -replace github.com/mudler/go-piper=$(CURDIR)/sources/go-piper $(GOCMD) mod edit -replace github.com/mudler/go-stable-diffusion=$(CURDIR)/sources/go-stable-diffusion - $(GOCMD) mod edit -replace github.com/nomic-ai/gpt4all/gpt4all-bindings/golang=$(CURDIR)/sources/gpt4all/gpt4all-bindings/golang $(GOCMD) mod edit -replace github.com/go-skynet/go-llama.cpp=$(CURDIR)/sources/go-llama.cpp dropreplace: @@ -339,7 +321,6 @@ dropreplace: $(GOCMD) mod edit -dropreplace github.com/M0Rf30/go-tiny-dream $(GOCMD) mod edit -dropreplace github.com/mudler/go-piper $(GOCMD) mod edit -dropreplace github.com/mudler/go-stable-diffusion - $(GOCMD) mod edit -dropreplace github.com/nomic-ai/gpt4all/gpt4all-bindings/golang $(GOCMD) mod edit -dropreplace github.com/go-skynet/go-llama.cpp prepare-sources: get-sources replace @@ -349,7 +330,6 @@ prepare-sources: get-sources replace rebuild: ## Rebuilds the project $(GOCMD) clean -cache $(MAKE) -C sources/go-llama.cpp clean - $(MAKE) -C sources/gpt4all/gpt4all-bindings/golang/ clean $(MAKE) -C sources/go-rwkv.cpp clean $(MAKE) -C sources/whisper.cpp clean $(MAKE) -C sources/go-stable-diffusion clean @@ -469,8 +449,7 @@ test: prepare test-models/testmodel.ggml grpcs export GO_TAGS="tts stablediffusion debug" $(MAKE) prepare-test HUGGINGFACE_GRPC=$(abspath ./)/backend/python/sentencetransformers/run.sh TEST_DIR=$(abspath ./)/test-dir/ FIXTURES=$(abspath ./)/tests/fixtures CONFIG_FILE=$(abspath ./)/test-models/config.yaml MODELS_PATH=$(abspath ./)/test-models \ - $(GOCMD) run github.com/onsi/ginkgo/v2/ginkgo --label-filter="!gpt4all && !llama && !llama-gguf" --flake-attempts $(TEST_FLAKES) --fail-fast -v -r $(TEST_PATHS) - $(MAKE) test-gpt4all + $(GOCMD) run github.com/onsi/ginkgo/v2/ginkgo --label-filter="!llama && !llama-gguf" --flake-attempts $(TEST_FLAKES) --fail-fast -v -r $(TEST_PATHS) $(MAKE) test-llama $(MAKE) test-llama-gguf $(MAKE) test-tts @@ -500,10 +479,6 @@ teardown-e2e: rm -rf $(TEST_DIR) || true docker stop $$(docker ps -q --filter ancestor=localai-tests) -test-gpt4all: prepare-test - TEST_DIR=$(abspath ./)/test-dir/ FIXTURES=$(abspath ./)/tests/fixtures CONFIG_FILE=$(abspath ./)/test-models/config.yaml MODELS_PATH=$(abspath ./)/test-models \ - $(GOCMD) run github.com/onsi/ginkgo/v2/ginkgo --label-filter="gpt4all" --flake-attempts 5 -v -r $(TEST_PATHS) - test-llama: prepare-test TEST_DIR=$(abspath ./)/test-dir/ FIXTURES=$(abspath ./)/tests/fixtures CONFIG_FILE=$(abspath ./)/test-models/config.yaml MODELS_PATH=$(abspath ./)/test-models \ $(GOCMD) run github.com/onsi/ginkgo/v2/ginkgo --label-filter="llama" --flake-attempts 5 -v -r $(TEST_PATHS) @@ -730,12 +705,6 @@ backend-assets/espeak-ng-data: sources/go-piper sources/go-piper/libpiper_bindin mkdir -p backend-assets/espeak-ng-data @cp -rf sources/go-piper/piper-phonemize/pi/share/espeak-ng-data/. backend-assets/espeak-ng-data -backend-assets/gpt4all: sources/gpt4all sources/gpt4all/gpt4all-bindings/golang/libgpt4all.a - mkdir -p backend-assets/gpt4all - @cp sources/gpt4all/gpt4all-bindings/golang/buildllm/*.so backend-assets/gpt4all/ || true - @cp sources/gpt4all/gpt4all-bindings/golang/buildllm/*.dylib backend-assets/gpt4all/ || true - @cp sources/gpt4all/gpt4all-bindings/golang/buildllm/*.dll backend-assets/gpt4all/ || true - backend-assets/grpc: protogen-go replace mkdir -p backend-assets/grpc @@ -746,13 +715,6 @@ ifneq ($(UPX),) $(UPX) backend-assets/grpc/bert-embeddings endif -backend-assets/grpc/gpt4all: sources/gpt4all sources/gpt4all/gpt4all-bindings/golang/libgpt4all.a backend-assets/gpt4all backend-assets/grpc - CGO_LDFLAGS="$(CGO_LDFLAGS)" C_INCLUDE_PATH=$(CURDIR)/sources/gpt4all/gpt4all-bindings/golang/ LIBRARY_PATH=$(CURDIR)/sources/gpt4all/gpt4all-bindings/golang/ \ - $(GOCMD) build -ldflags "$(LD_FLAGS)" -tags "$(GO_TAGS)" -o backend-assets/grpc/gpt4all ./backend/go/llm/gpt4all/ -ifneq ($(UPX),) - $(UPX) backend-assets/grpc/gpt4all -endif - backend-assets/grpc/huggingface: backend-assets/grpc $(GOCMD) build -ldflags "$(LD_FLAGS)" -tags "$(GO_TAGS)" -o backend-assets/grpc/huggingface ./backend/go/llm/langchain/ ifneq ($(UPX),) diff --git a/backend/go/llm/gpt4all/gpt4all.go b/backend/go/llm/gpt4all/gpt4all.go deleted file mode 100644 index 9caab48c..00000000 --- a/backend/go/llm/gpt4all/gpt4all.go +++ /dev/null @@ -1,62 +0,0 @@ -package main - -// This is a wrapper to statisfy the GRPC service interface -// It is meant to be used by the main executable that is the server for the specific backend type (falcon, gpt3, etc) -import ( - "fmt" - - "github.com/mudler/LocalAI/pkg/grpc/base" - pb "github.com/mudler/LocalAI/pkg/grpc/proto" - gpt4all "github.com/nomic-ai/gpt4all/gpt4all-bindings/golang" -) - -type LLM struct { - base.SingleThread - - gpt4all *gpt4all.Model -} - -func (llm *LLM) Load(opts *pb.ModelOptions) error { - model, err := gpt4all.New(opts.ModelFile, - gpt4all.SetThreads(int(opts.Threads)), - gpt4all.SetLibrarySearchPath(opts.LibrarySearchPath)) - llm.gpt4all = model - return err -} - -func buildPredictOptions(opts *pb.PredictOptions) []gpt4all.PredictOption { - predictOptions := []gpt4all.PredictOption{ - gpt4all.SetTemperature(float64(opts.Temperature)), - gpt4all.SetTopP(float64(opts.TopP)), - gpt4all.SetTopK(int(opts.TopK)), - gpt4all.SetTokens(int(opts.Tokens)), - } - - if opts.Batch != 0 { - predictOptions = append(predictOptions, gpt4all.SetBatch(int(opts.Batch))) - } - return predictOptions -} - -func (llm *LLM) Predict(opts *pb.PredictOptions) (string, error) { - return llm.gpt4all.Predict(opts.Prompt, buildPredictOptions(opts)...) -} - -func (llm *LLM) PredictStream(opts *pb.PredictOptions, results chan string) error { - predictOptions := buildPredictOptions(opts) - - go func() { - llm.gpt4all.SetTokenCallback(func(token string) bool { - results <- token - return true - }) - _, err := llm.gpt4all.Predict(opts.Prompt, predictOptions...) - if err != nil { - fmt.Println("err: ", err) - } - llm.gpt4all.SetTokenCallback(nil) - close(results) - }() - - return nil -} diff --git a/backend/go/llm/gpt4all/main.go b/backend/go/llm/gpt4all/main.go deleted file mode 100644 index acf44087..00000000 --- a/backend/go/llm/gpt4all/main.go +++ /dev/null @@ -1,21 +0,0 @@ -package main - -// Note: this is started internally by LocalAI and a server is allocated for each model - -import ( - "flag" - - grpc "github.com/mudler/LocalAI/pkg/grpc" -) - -var ( - addr = flag.String("addr", "localhost:50051", "the address to connect to") -) - -func main() { - flag.Parse() - - if err := grpc.StartServer(*addr, &LLM{}); err != nil { - panic(err) - } -} diff --git a/core/cli/worker/worker_llamacpp.go b/core/cli/worker/worker_llamacpp.go index 5598a485..2baf51ec 100644 --- a/core/cli/worker/worker_llamacpp.go +++ b/core/cli/worker/worker_llamacpp.go @@ -21,7 +21,7 @@ func (r *LLamaCPP) Run(ctx *cliContext.Context) error { err := assets.ExtractFiles(ctx.BackendAssets, r.BackendAssetsPath) log.Debug().Msgf("Extracting backend assets files to %s", r.BackendAssetsPath) if err != nil { - log.Warn().Msgf("Failed extracting backend assets files: %s (might be required for some backends to work properly, like gpt4all)", err) + log.Warn().Msgf("Failed extracting backend assets files: %s (might be required for some backends to work properly)", err) } if len(os.Args) < 4 { diff --git a/core/cli/worker/worker_p2p.go b/core/cli/worker/worker_p2p.go index ddb3518c..93a365cb 100644 --- a/core/cli/worker/worker_p2p.go +++ b/core/cli/worker/worker_p2p.go @@ -33,7 +33,7 @@ func (r *P2P) Run(ctx *cliContext.Context) error { err := assets.ExtractFiles(ctx.BackendAssets, r.BackendAssetsPath) log.Debug().Msgf("Extracting backend assets files to %s", r.BackendAssetsPath) if err != nil { - log.Warn().Msgf("Failed extracting backend assets files: %s (might be required for some backends to work properly, like gpt4all)", err) + log.Warn().Msgf("Failed extracting backend assets files: %s (might be required for some backends to work properly)", err) } // Check if the token is set diff --git a/core/http/app_test.go b/core/http/app_test.go index b21ad25a..a837e20c 100644 --- a/core/http/app_test.go +++ b/core/http/app_test.go @@ -563,32 +563,6 @@ var _ = Describe("API test", func() { Expect(res["unit"]).To(Equal("celcius"), fmt.Sprint(res)) Expect(string(resp2.Choices[0].FinishReason)).To(Equal("function_call"), fmt.Sprint(resp2.Choices[0].FinishReason)) }) - - It("runs gpt4all", Label("gpt4all"), func() { - if runtime.GOOS != "linux" { - Skip("test supported only on linux") - } - - response := postModelApplyRequest("http://127.0.0.1:9090/models/apply", modelApplyRequest{ - URL: "github:go-skynet/model-gallery/gpt4all-j.yaml", - Name: "gpt4all-j", - }) - - Expect(response["uuid"]).ToNot(BeEmpty(), fmt.Sprint(response)) - - uuid := response["uuid"].(string) - - Eventually(func() bool { - response := getModelStatus("http://127.0.0.1:9090/models/jobs/" + uuid) - return response["processed"].(bool) - }, "960s", "10s").Should(Equal(true)) - - resp, err := client.CreateChatCompletion(context.TODO(), openai.ChatCompletionRequest{Model: "gpt4all-j", Messages: []openai.ChatCompletionMessage{openai.ChatCompletionMessage{Role: "user", Content: "How are you?"}}}) - Expect(err).ToNot(HaveOccurred()) - Expect(len(resp.Choices)).To(Equal(1)) - Expect(resp.Choices[0].Message.Content).To(ContainSubstring("well")) - }) - }) }) @@ -792,20 +766,6 @@ var _ = Describe("API test", func() { Expect(resp.Choices[0].Message.Content).ToNot(BeEmpty()) }) - It("can generate completions from model configs", func() { - resp, err := client.CreateCompletion(context.TODO(), openai.CompletionRequest{Model: "gpt4all", Prompt: testPrompt}) - Expect(err).ToNot(HaveOccurred()) - Expect(len(resp.Choices)).To(Equal(1)) - Expect(resp.Choices[0].Text).ToNot(BeEmpty()) - }) - - It("can generate chat completions from model configs", func() { - resp, err := client.CreateChatCompletion(context.TODO(), openai.ChatCompletionRequest{Model: "gpt4all-2", Messages: []openai.ChatCompletionMessage{openai.ChatCompletionMessage{Role: "user", Content: testPrompt}}}) - Expect(err).ToNot(HaveOccurred()) - Expect(len(resp.Choices)).To(Equal(1)) - Expect(resp.Choices[0].Message.Content).ToNot(BeEmpty()) - }) - It("returns errors", func() { _, err := client.CreateCompletion(context.TODO(), openai.CompletionRequest{Model: "foomodel", Prompt: testPrompt}) Expect(err).To(HaveOccurred()) diff --git a/core/http/routes/ui.go b/core/http/routes/ui.go index 4f8afd3c..2996e9dc 100644 --- a/core/http/routes/ui.go +++ b/core/http/routes/ui.go @@ -267,7 +267,7 @@ func RegisterUIRoutes(app *fiber.App, return c.SendString(elements.ProgressBar("100")) } if status.Error != nil { - // TODO: instead of deleting the job, we should keep it in the cache and make it dismissable + // TODO: instead of deleting the job, we should keep it in the cache and make it dismissable by the user processingModels.DeleteUUID(jobUID) return c.SendString(elements.ErrorProgress(status.Error.Error(), status.GalleryModelName)) } diff --git a/core/startup/startup.go b/core/startup/startup.go index 55f930a4..3565d196 100644 --- a/core/startup/startup.go +++ b/core/startup/startup.go @@ -106,7 +106,7 @@ func Startup(opts ...config.AppOption) (*config.BackendConfigLoader, *model.Mode err := assets.ExtractFiles(options.BackendAssets, options.AssetsDestination) log.Debug().Msgf("Extracting backend assets files to %s", options.AssetsDestination) if err != nil { - log.Warn().Msgf("Failed extracting backend assets files: %s (might be required for some backends to work properly, like gpt4all)", err) + log.Warn().Msgf("Failed extracting backend assets files: %s (might be required for some backends to work properly)", err) } } diff --git a/pkg/model/initializers.go b/pkg/model/initializers.go index 88a08f28..11980f03 100644 --- a/pkg/model/initializers.go +++ b/pkg/model/initializers.go @@ -45,11 +45,6 @@ const ( LLamaCPPGRPC = "llama-cpp-grpc" - Gpt4AllLlamaBackend = "gpt4all-llama" - Gpt4AllMptBackend = "gpt4all-mpt" - Gpt4AllJBackend = "gpt4all-j" - Gpt4All = "gpt4all" - BertEmbeddingsBackend = "bert-embeddings" RwkvBackend = "rwkv" WhisperBackend = "whisper" @@ -144,11 +139,10 @@ ENTRY: // sets a priority list - first has more priority priorityList := []string{ - // First llama.cpp(variants) and llama-ggml to follow. // We keep the fallback to prevent that if the llama.cpp variants // that depends on shared libs if breaks have still a safety net. - LLamaCPP, LlamaGGML, Gpt4All, LLamaCPPFallback, + LLamaCPP, LlamaGGML, LLamaCPPFallback, } toTheEnd := []string{ @@ -434,9 +428,6 @@ func (ml *ModelLoader) BackendLoader(opts ...Option) (client grpc.Backend, err e var backendToConsume string switch backend { - case Gpt4AllLlamaBackend, Gpt4AllMptBackend, Gpt4AllJBackend, Gpt4All: - o.gRPCOptions.LibrarySearchPath = filepath.Join(o.assetDir, "backend-assets", "gpt4all") - backendToConsume = Gpt4All case PiperBackend: o.gRPCOptions.LibrarySearchPath = filepath.Join(o.assetDir, "backend-assets", "espeak-ng-data") backendToConsume = PiperBackend