diff --git a/.github/workflows/bump_deps.yaml b/.github/workflows/bump_deps.yaml index 3c0f4202..5e876f15 100644 --- a/.github/workflows/bump_deps.yaml +++ b/.github/workflows/bump_deps.yaml @@ -12,8 +12,8 @@ jobs: - repository: "go-skynet/go-llama.cpp" variable: "GOLLAMA_VERSION" branch: "master" - - repository: "go-skynet/go-gpt2.cpp" - variable: "GOGPT2_VERSION" + - repository: "go-skynet/go-ggml-transformers.cpp" + variable: "GOGGMLTRANSFORMERS_VERSION" branch: "master" - repository: "donomii/go-rwkv.cpp" variable: "RWKV_VERSION" diff --git a/.github/workflows/release.yaml b/.github/workflows/release.yaml index 7444304d..ea49088b 100644 --- a/.github/workflows/release.yaml +++ b/.github/workflows/release.yaml @@ -2,6 +2,9 @@ name: Build and Release on: push +permissions: + contents: write + jobs: build-linux: strategy: diff --git a/Makefile b/Makefile index 4a9ae10b..03d0c851 100644 --- a/Makefile +++ b/Makefile @@ -6,7 +6,7 @@ BINARY_NAME=local-ai GOLLAMA_VERSION?=ccf23adfb278c0165d388389a5d60f3fe38e4854 GPT4ALL_REPO?=https://github.com/nomic-ai/gpt4all GPT4ALL_VERSION?=8119ff4df0a99bde44255db2b8c7290b5582ac2b -GOGPT2_VERSION?=7bff56f0224502c1c9ed6258d2a17e8084628827 +GOGGMLTRANSFORMERS_VERSION?=14fd6c9 RWKV_REPO?=https://github.com/donomii/go-rwkv.cpp RWKV_VERSION?=07166da10cb2a9e8854395a4f210464dcea76e47 WHISPER_CPP_VERSION?=041be06d5881d3c759cc4ed45d655804361237cd @@ -29,8 +29,8 @@ WHITE := $(shell tput -Txterm setaf 7) CYAN := $(shell tput -Txterm setaf 6) RESET := $(shell tput -Txterm sgr0) -C_INCLUDE_PATH=$(shell pwd)/go-llama:$(shell pwd)/go-stable-diffusion/:$(shell pwd)/gpt4all/gpt4all-bindings/golang/:$(shell pwd)/go-gpt2:$(shell pwd)/go-rwkv:$(shell pwd)/whisper.cpp:$(shell pwd)/go-bert:$(shell pwd)/bloomz -LIBRARY_PATH=$(shell pwd)/go-llama:$(shell pwd)/go-stable-diffusion/:$(shell pwd)/gpt4all/gpt4all-bindings/golang/:$(shell pwd)/go-gpt2:$(shell pwd)/go-rwkv:$(shell pwd)/whisper.cpp:$(shell pwd)/go-bert:$(shell pwd)/bloomz +C_INCLUDE_PATH=$(shell pwd)/go-llama:$(shell pwd)/go-stable-diffusion/:$(shell pwd)/gpt4all/gpt4all-bindings/golang/:$(shell pwd)/go-ggml-transformers:$(shell pwd)/go-rwkv:$(shell pwd)/whisper.cpp:$(shell pwd)/go-bert:$(shell pwd)/bloomz +LIBRARY_PATH=$(shell pwd)/go-llama:$(shell pwd)/go-stable-diffusion/:$(shell pwd)/gpt4all/gpt4all-bindings/golang/:$(shell pwd)/go-ggml-transformers:$(shell pwd)/go-rwkv:$(shell pwd)/whisper.cpp:$(shell pwd)/go-bert:$(shell pwd)/bloomz ifeq ($(BUILD_TYPE),openblas) CGO_LDFLAGS+=-lopenblas @@ -117,23 +117,23 @@ gpt4all/gpt4all-bindings/golang/libgpt4all.a: gpt4all $(MAKE) -C gpt4all/gpt4all-bindings/golang/ libgpt4all.a ## CEREBRAS GPT -go-gpt2: - git clone --recurse-submodules https://github.com/go-skynet/go-gpt2.cpp go-gpt2 - cd go-gpt2 && git checkout -b build $(GOGPT2_VERSION) && git submodule update --init --recursive --depth 1 +go-ggml-transformers: + git clone --recurse-submodules https://github.com/go-skynet/go-ggml-transformers.cpp go-ggml-transformers + cd go-ggml-transformers && git checkout -b build $(GOGPT2_VERSION) && git submodule update --init --recursive --depth 1 # This is hackish, but needed as both go-llama and go-gpt4allj have their own version of ggml.. - @find ./go-gpt2 -type f -name "*.c" -exec sed -i'' -e 's/ggml_/ggml_gpt2_/g' {} + - @find ./go-gpt2 -type f -name "*.cpp" -exec sed -i'' -e 's/ggml_/ggml_gpt2_/g' {} + - @find ./go-gpt2 -type f -name "*.h" -exec sed -i'' -e 's/ggml_/ggml_gpt2_/g' {} + - @find ./go-gpt2 -type f -name "*.cpp" -exec sed -i'' -e 's/gpt_print_usage/gpt2_print_usage/g' {} + - @find ./go-gpt2 -type f -name "*.h" -exec sed -i'' -e 's/gpt_print_usage/gpt2_print_usage/g' {} + - @find ./go-gpt2 -type f -name "*.cpp" -exec sed -i'' -e 's/gpt_params_parse/gpt2_params_parse/g' {} + - @find ./go-gpt2 -type f -name "*.h" -exec sed -i'' -e 's/gpt_params_parse/gpt2_params_parse/g' {} + - @find ./go-gpt2 -type f -name "*.cpp" -exec sed -i'' -e 's/gpt_random_prompt/gpt2_random_prompt/g' {} + - @find ./go-gpt2 -type f -name "*.h" -exec sed -i'' -e 's/gpt_random_prompt/gpt2_random_prompt/g' {} + - @find ./go-gpt2 -type f -name "*.cpp" -exec sed -i'' -e 's/json_/json_gpt2_/g' {} + + @find ./go-ggml-transformers -type f -name "*.c" -exec sed -i'' -e 's/ggml_/ggml_gpt2_/g' {} + + @find ./go-ggml-transformers -type f -name "*.cpp" -exec sed -i'' -e 's/ggml_/ggml_gpt2_/g' {} + + @find ./go-ggml-transformers -type f -name "*.h" -exec sed -i'' -e 's/ggml_/ggml_gpt2_/g' {} + + @find ./go-ggml-transformers -type f -name "*.cpp" -exec sed -i'' -e 's/gpt_print_usage/gpt2_print_usage/g' {} + + @find ./go-ggml-transformers -type f -name "*.h" -exec sed -i'' -e 's/gpt_print_usage/gpt2_print_usage/g' {} + + @find ./go-ggml-transformers -type f -name "*.cpp" -exec sed -i'' -e 's/gpt_params_parse/gpt2_params_parse/g' {} + + @find ./go-ggml-transformers -type f -name "*.h" -exec sed -i'' -e 's/gpt_params_parse/gpt2_params_parse/g' {} + + @find ./go-ggml-transformers -type f -name "*.cpp" -exec sed -i'' -e 's/gpt_random_prompt/gpt2_random_prompt/g' {} + + @find ./go-ggml-transformers -type f -name "*.h" -exec sed -i'' -e 's/gpt_random_prompt/gpt2_random_prompt/g' {} + + @find ./go-ggml-transformers -type f -name "*.cpp" -exec sed -i'' -e 's/json_/json_gpt2_/g' {} + -go-gpt2/libgpt2.a: go-gpt2 - $(MAKE) -C go-gpt2 libgpt2.a +go-ggml-transformers/libtransformers.a: go-ggml-transformers + $(MAKE) -C go-ggml-transformers libtransformers.a whisper.cpp: git clone https://github.com/ggerganov/whisper.cpp.git @@ -155,21 +155,21 @@ go-llama/libbinding.a: go-llama replace: $(GOCMD) mod edit -replace github.com/go-skynet/go-llama.cpp=$(shell pwd)/go-llama $(GOCMD) mod edit -replace github.com/nomic-ai/gpt4all/gpt4all-bindings/golang=$(shell pwd)/gpt4all/gpt4all-bindings/golang - $(GOCMD) mod edit -replace github.com/go-skynet/go-gpt2.cpp=$(shell pwd)/go-gpt2 + $(GOCMD) mod edit -replace github.com/go-skynet/go-ggml-transformers.cpp=$(shell pwd)/go-ggml-transformers $(GOCMD) mod edit -replace github.com/donomii/go-rwkv.cpp=$(shell pwd)/go-rwkv $(GOCMD) mod edit -replace github.com/ggerganov/whisper.cpp=$(shell pwd)/whisper.cpp $(GOCMD) mod edit -replace github.com/go-skynet/go-bert.cpp=$(shell pwd)/go-bert $(GOCMD) mod edit -replace github.com/go-skynet/bloomz.cpp=$(shell pwd)/bloomz $(GOCMD) mod edit -replace github.com/mudler/go-stable-diffusion=$(shell pwd)/go-stable-diffusion -prepare-sources: go-llama go-gpt2 gpt4all go-rwkv whisper.cpp go-bert bloomz go-stable-diffusion replace +prepare-sources: go-llama go-ggml-transformers gpt4all go-rwkv whisper.cpp go-bert bloomz go-stable-diffusion replace $(GOCMD) mod download ## GENERIC rebuild: ## Rebuilds the project $(MAKE) -C go-llama clean $(MAKE) -C gpt4all/gpt4all-bindings/golang/ clean - $(MAKE) -C go-gpt2 clean + $(MAKE) -C go-ggml-transformers clean $(MAKE) -C go-rwkv clean $(MAKE) -C whisper.cpp clean $(MAKE) -C go-stable-diffusion clean @@ -177,13 +177,13 @@ rebuild: ## Rebuilds the project $(MAKE) -C bloomz clean $(MAKE) build -prepare: prepare-sources gpt4all/gpt4all-bindings/golang/libgpt4all.a $(OPTIONAL_TARGETS) go-llama/libbinding.a go-bert/libgobert.a go-gpt2/libgpt2.a go-rwkv/librwkv.a whisper.cpp/libwhisper.a bloomz/libbloomz.a ## Prepares for building +prepare: prepare-sources gpt4all/gpt4all-bindings/golang/libgpt4all.a $(OPTIONAL_TARGETS) go-llama/libbinding.a go-bert/libgobert.a go-ggml-transformers/libtransformers.a go-rwkv/librwkv.a whisper.cpp/libwhisper.a bloomz/libbloomz.a ## Prepares for building clean: ## Remove build related file rm -fr ./go-llama rm -rf ./gpt4all rm -rf ./go-stable-diffusion - rm -rf ./go-gpt2 + rm -rf ./go-ggml-transformers rm -rf ./go-rwkv rm -rf ./go-bert rm -rf ./bloomz @@ -213,7 +213,7 @@ run: prepare ## run local-ai test-models/testmodel: mkdir test-models mkdir test-dir - wget https://huggingface.co/concedo/cerebras-111M-ggml/resolve/main/cerberas-111m-q4_0.bin -O test-models/testmodel + wget https://huggingface.co/nnakasato/ggml-model-test/resolve/main/ggml-model-q4.bin -O test-models/testmodel wget https://huggingface.co/ggerganov/whisper.cpp/resolve/main/ggml-base.en.bin -O test-models/whisper-en wget https://huggingface.co/skeskinen/ggml/resolve/main/all-MiniLM-L6-v2/ggml-model-q4_0.bin -O test-models/bert wget https://cdn.openai.com/whisper/draft-20220913a/micro-machines.wav -O test-dir/audio.wav diff --git a/README.md b/README.md index dee37a12..7b9287e1 100644 --- a/README.md +++ b/README.md @@ -129,13 +129,13 @@ Depending on the model you are attempting to run might need more RAM or CPU reso | [gpt4all-llama](https://github.com/nomic-ai/gpt4all) | Vicuna, Alpaca, LLaMa | yes | no | no | yes | | [gpt4all-mpt](https://github.com/nomic-ai/gpt4all) | MPT | yes | no | no | yes | | [gpt4all-j](https://github.com/nomic-ai/gpt4all) | GPT4ALL-J | yes | no | no | yes | -| [gpt2](https://github.com/ggerganov/ggml) ([binding](https://github.com/go-skynet/go-gpt2.cpp)) | GPT/NeoX, Cerebras | yes | no | no | no | -| [dolly](https://github.com/ggerganov/ggml) ([binding](https://github.com/go-skynet/go-gpt2.cpp)) | Dolly | yes | no | no | no | -| [redpajama](https://github.com/ggerganov/ggml) ([binding](https://github.com/go-skynet/go-gpt2.cpp)) | RedPajama | yes | no | no | no | -| [stableLM](https://github.com/ggerganov/ggml) ([binding](https://github.com/go-skynet/go-gpt2.cpp)) | StableLM GPT/NeoX | yes | no | no | no | -| [replit](https://github.com/ggerganov/ggml) ([binding](https://github.com/go-skynet/go-gpt2.cpp)) | Replit | yes | no | no | no | -| [gptneox](https://github.com/ggerganov/ggml) ([binding](https://github.com/go-skynet/go-gpt2.cpp)) | GPT NeoX | yes | no | no | no | -| [starcoder](https://github.com/ggerganov/ggml) ([binding](https://github.com/go-skynet/go-gpt2.cpp)) | Starcoder | yes | no | no | no | +| [gpt2](https://github.com/ggerganov/ggml) ([binding](https://github.com/go-skynet/go-ggml-transformers.cpp)) | GPT/NeoX, Cerebras | yes | no | no | no | +| [dolly](https://github.com/ggerganov/ggml) ([binding](https://github.com/go-skynet/go-ggml-transformers.cpp)) | Dolly | yes | no | no | no | +| [gptj](https://github.com/ggerganov/ggml) ([binding](https://github.com/go-skynet/go-ggml-transformers.cpp)) | GPTJ | yes | no | no | no | +| [mpt](https://github.com/ggerganov/ggml) ([binding](https://github.com/go-skynet/go-ggml-transformers.cpp)) | MPT | yes | no | no | no | +| [replit](https://github.com/ggerganov/ggml) ([binding](https://github.com/go-skynet/go-ggml-transformers.cpp)) | Replit | yes | no | no | no | +| [gptneox](https://github.com/ggerganov/ggml) ([binding](https://github.com/go-skynet/go-ggml-transformers.cpp)) | GPT NeoX, RedPajama, StableLM | yes | no | no | no | +| [starcoder](https://github.com/ggerganov/ggml) ([binding](https://github.com/go-skynet/go-ggml-transformers.cpp)) | Starcoder | yes | no | no | no | | [bloomz](https://github.com/NouamaneTazi/bloomz.cpp) ([binding](https://github.com/go-skynet/bloomz.cpp)) | Bloom | yes | no | no | no | | [rwkv](https://github.com/saharNooby/rwkv.cpp) ([binding](https://github.com/donomii/go-rw)) | rwkv | yes | no | no | yes | | [bert](https://github.com/skeskinen/bert.cpp) ([binding](https://github.com/go-skynet/go-bert.cpp) | bert | no | no | yes | no | @@ -1045,7 +1045,7 @@ MIT - [go-skynet/go-llama.cpp](https://github.com/go-skynet/go-llama.cpp) - [go-skynet/go-gpt4all-j.cpp](https://github.com/go-skynet/go-gpt4all-j.cpp) -- [go-skynet/go-gpt2.cpp](https://github.com/go-skynet/go-gpt2.cpp) +- [go-skynet/go-ggml-transformers.cpp](https://github.com/go-skynet/go-ggml-transformers.cpp) - [go-skynet/go-bert.cpp](https://github.com/go-skynet/go-bert.cpp) - [donomii/go-rwkv.cpp](https://github.com/donomii/go-rwkv.cpp) diff --git a/api/prediction.go b/api/prediction.go index c279e08d..08a01e06 100644 --- a/api/prediction.go +++ b/api/prediction.go @@ -11,7 +11,7 @@ import ( "github.com/go-skynet/LocalAI/pkg/stablediffusion" "github.com/go-skynet/bloomz.cpp" bert "github.com/go-skynet/go-bert.cpp" - gpt2 "github.com/go-skynet/go-gpt2.cpp" + transformers "github.com/go-skynet/go-ggml-transformers.cpp" llama "github.com/go-skynet/go-llama.cpp" gpt4all "github.com/nomic-ai/gpt4all/gpt4all-bindings/golang" ) @@ -243,23 +243,23 @@ func ModelInference(s string, loader *model.ModelLoader, c Config, tokenCallback return response, nil } - case *gpt2.GPTNeoX: + case *transformers.GPTNeoX: fn = func() (string, error) { // Generate the prediction using the language model - predictOptions := []gpt2.PredictOption{ - gpt2.SetTemperature(c.Temperature), - gpt2.SetTopP(c.TopP), - gpt2.SetTopK(c.TopK), - gpt2.SetTokens(c.Maxtokens), - gpt2.SetThreads(c.Threads), + predictOptions := []transformers.PredictOption{ + transformers.SetTemperature(c.Temperature), + transformers.SetTopP(c.TopP), + transformers.SetTopK(c.TopK), + transformers.SetTokens(c.Maxtokens), + transformers.SetThreads(c.Threads), } if c.Batch != 0 { - predictOptions = append(predictOptions, gpt2.SetBatch(c.Batch)) + predictOptions = append(predictOptions, transformers.SetBatch(c.Batch)) } if c.Seed != 0 { - predictOptions = append(predictOptions, gpt2.SetSeed(c.Seed)) + predictOptions = append(predictOptions, transformers.SetSeed(c.Seed)) } return model.Predict( @@ -267,23 +267,23 @@ func ModelInference(s string, loader *model.ModelLoader, c Config, tokenCallback predictOptions..., ) } - case *gpt2.Replit: + case *transformers.Replit: fn = func() (string, error) { // Generate the prediction using the language model - predictOptions := []gpt2.PredictOption{ - gpt2.SetTemperature(c.Temperature), - gpt2.SetTopP(c.TopP), - gpt2.SetTopK(c.TopK), - gpt2.SetTokens(c.Maxtokens), - gpt2.SetThreads(c.Threads), + predictOptions := []transformers.PredictOption{ + transformers.SetTemperature(c.Temperature), + transformers.SetTopP(c.TopP), + transformers.SetTopK(c.TopK), + transformers.SetTokens(c.Maxtokens), + transformers.SetThreads(c.Threads), } if c.Batch != 0 { - predictOptions = append(predictOptions, gpt2.SetBatch(c.Batch)) + predictOptions = append(predictOptions, transformers.SetBatch(c.Batch)) } if c.Seed != 0 { - predictOptions = append(predictOptions, gpt2.SetSeed(c.Seed)) + predictOptions = append(predictOptions, transformers.SetSeed(c.Seed)) } return model.Predict( @@ -291,23 +291,23 @@ func ModelInference(s string, loader *model.ModelLoader, c Config, tokenCallback predictOptions..., ) } - case *gpt2.Starcoder: + case *transformers.Starcoder: fn = func() (string, error) { // Generate the prediction using the language model - predictOptions := []gpt2.PredictOption{ - gpt2.SetTemperature(c.Temperature), - gpt2.SetTopP(c.TopP), - gpt2.SetTopK(c.TopK), - gpt2.SetTokens(c.Maxtokens), - gpt2.SetThreads(c.Threads), + predictOptions := []transformers.PredictOption{ + transformers.SetTemperature(c.Temperature), + transformers.SetTopP(c.TopP), + transformers.SetTopK(c.TopK), + transformers.SetTokens(c.Maxtokens), + transformers.SetThreads(c.Threads), } if c.Batch != 0 { - predictOptions = append(predictOptions, gpt2.SetBatch(c.Batch)) + predictOptions = append(predictOptions, transformers.SetBatch(c.Batch)) } if c.Seed != 0 { - predictOptions = append(predictOptions, gpt2.SetSeed(c.Seed)) + predictOptions = append(predictOptions, transformers.SetSeed(c.Seed)) } return model.Predict( @@ -315,23 +315,23 @@ func ModelInference(s string, loader *model.ModelLoader, c Config, tokenCallback predictOptions..., ) } - case *gpt2.RedPajama: + case *transformers.MPT: fn = func() (string, error) { // Generate the prediction using the language model - predictOptions := []gpt2.PredictOption{ - gpt2.SetTemperature(c.Temperature), - gpt2.SetTopP(c.TopP), - gpt2.SetTopK(c.TopK), - gpt2.SetTokens(c.Maxtokens), - gpt2.SetThreads(c.Threads), + predictOptions := []transformers.PredictOption{ + transformers.SetTemperature(c.Temperature), + transformers.SetTopP(c.TopP), + transformers.SetTopK(c.TopK), + transformers.SetTokens(c.Maxtokens), + transformers.SetThreads(c.Threads), } if c.Batch != 0 { - predictOptions = append(predictOptions, gpt2.SetBatch(c.Batch)) + predictOptions = append(predictOptions, transformers.SetBatch(c.Batch)) } if c.Seed != 0 { - predictOptions = append(predictOptions, gpt2.SetSeed(c.Seed)) + predictOptions = append(predictOptions, transformers.SetSeed(c.Seed)) } return model.Predict( @@ -359,23 +359,23 @@ func ModelInference(s string, loader *model.ModelLoader, c Config, tokenCallback predictOptions..., ) } - case *gpt2.StableLM: + case *transformers.GPTJ: fn = func() (string, error) { // Generate the prediction using the language model - predictOptions := []gpt2.PredictOption{ - gpt2.SetTemperature(c.Temperature), - gpt2.SetTopP(c.TopP), - gpt2.SetTopK(c.TopK), - gpt2.SetTokens(c.Maxtokens), - gpt2.SetThreads(c.Threads), + predictOptions := []transformers.PredictOption{ + transformers.SetTemperature(c.Temperature), + transformers.SetTopP(c.TopP), + transformers.SetTopK(c.TopK), + transformers.SetTokens(c.Maxtokens), + transformers.SetThreads(c.Threads), } if c.Batch != 0 { - predictOptions = append(predictOptions, gpt2.SetBatch(c.Batch)) + predictOptions = append(predictOptions, transformers.SetBatch(c.Batch)) } if c.Seed != 0 { - predictOptions = append(predictOptions, gpt2.SetSeed(c.Seed)) + predictOptions = append(predictOptions, transformers.SetSeed(c.Seed)) } return model.Predict( @@ -383,23 +383,23 @@ func ModelInference(s string, loader *model.ModelLoader, c Config, tokenCallback predictOptions..., ) } - case *gpt2.Dolly: + case *transformers.Dolly: fn = func() (string, error) { // Generate the prediction using the language model - predictOptions := []gpt2.PredictOption{ - gpt2.SetTemperature(c.Temperature), - gpt2.SetTopP(c.TopP), - gpt2.SetTopK(c.TopK), - gpt2.SetTokens(c.Maxtokens), - gpt2.SetThreads(c.Threads), + predictOptions := []transformers.PredictOption{ + transformers.SetTemperature(c.Temperature), + transformers.SetTopP(c.TopP), + transformers.SetTopK(c.TopK), + transformers.SetTokens(c.Maxtokens), + transformers.SetThreads(c.Threads), } if c.Batch != 0 { - predictOptions = append(predictOptions, gpt2.SetBatch(c.Batch)) + predictOptions = append(predictOptions, transformers.SetBatch(c.Batch)) } if c.Seed != 0 { - predictOptions = append(predictOptions, gpt2.SetSeed(c.Seed)) + predictOptions = append(predictOptions, transformers.SetSeed(c.Seed)) } return model.Predict( @@ -407,23 +407,23 @@ func ModelInference(s string, loader *model.ModelLoader, c Config, tokenCallback predictOptions..., ) } - case *gpt2.GPT2: + case *transformers.GPT2: fn = func() (string, error) { // Generate the prediction using the language model - predictOptions := []gpt2.PredictOption{ - gpt2.SetTemperature(c.Temperature), - gpt2.SetTopP(c.TopP), - gpt2.SetTopK(c.TopK), - gpt2.SetTokens(c.Maxtokens), - gpt2.SetThreads(c.Threads), + predictOptions := []transformers.PredictOption{ + transformers.SetTemperature(c.Temperature), + transformers.SetTopP(c.TopP), + transformers.SetTopK(c.TopK), + transformers.SetTokens(c.Maxtokens), + transformers.SetThreads(c.Threads), } if c.Batch != 0 { - predictOptions = append(predictOptions, gpt2.SetBatch(c.Batch)) + predictOptions = append(predictOptions, transformers.SetBatch(c.Batch)) } if c.Seed != 0 { - predictOptions = append(predictOptions, gpt2.SetSeed(c.Seed)) + predictOptions = append(predictOptions, transformers.SetSeed(c.Seed)) } return model.Predict( diff --git a/go.mod b/go.mod index 9b65583e..fe5afd43 100644 --- a/go.mod +++ b/go.mod @@ -8,7 +8,7 @@ require ( github.com/go-audio/wav v1.1.0 github.com/go-skynet/bloomz.cpp v0.0.0-20230510223001-e9366e82abdf github.com/go-skynet/go-bert.cpp v0.0.0-20230516063724-cea1ed76a7f4 - github.com/go-skynet/go-gpt2.cpp v0.0.0-20230512145559-7bff56f02245 + github.com/go-skynet/go-ggml-transformers.cpp v0.0.0-20230523150735-8bfcb3ea6127 github.com/go-skynet/go-llama.cpp v0.0.0-20230520155239-ccf23adfb278 github.com/gofiber/fiber/v2 v2.46.0 github.com/google/uuid v1.3.0 @@ -41,6 +41,7 @@ require ( github.com/go-openapi/jsonreference v0.19.6 // indirect github.com/go-openapi/spec v0.20.4 // indirect github.com/go-openapi/swag v0.19.15 // indirect + github.com/go-skynet/go-gpt2.cpp v0.0.0-20230523153133-3eb3a32c0874 // indirect github.com/go-task/slim-sprig v0.0.0-20230315185526-52ccab3ef572 // indirect github.com/google/go-cmp v0.5.9 // indirect github.com/google/pprof v0.0.0-20210407192527-94a9f03dee38 // indirect diff --git a/go.sum b/go.sum index e09fba34..78cc4d1c 100644 --- a/go.sum +++ b/go.sum @@ -16,12 +16,6 @@ github.com/creack/pty v1.1.9/go.mod h1:oKZEueFk5CKHvIhNR5MUki03XCEU+Q6VDXinZuGJ3 github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c= github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= -github.com/donomii/go-rwkv.cpp v0.0.0-20230515123100-6fdd0c338e56 h1:s8/MZdicstKi5fn9D9mKGIQ/q6IWCYCk/BM68i8v51w= -github.com/donomii/go-rwkv.cpp v0.0.0-20230515123100-6fdd0c338e56/go.mod h1:gWy7FIWioqYmYxkaoFyBnaKApeZVrUkHhv9EV9pz4dM= -github.com/ggerganov/whisper.cpp/bindings/go v0.0.0-20230515153606-95b02d76b04d h1:uxKTbiRnplE2SubchneSf4NChtxLJtOy9VdHnQMT0d0= -github.com/ggerganov/whisper.cpp/bindings/go v0.0.0-20230515153606-95b02d76b04d/go.mod h1:QIjZ9OktHFG7p+/m3sMvrAJKKdWrr1fZIK0rM6HZlyo= -github.com/ggerganov/whisper.cpp/bindings/go v0.0.0-20230520170006-429b9785c080 h1:W3itqKpRX9FhheKiAxdmuOBy/mjDfMf2G1vcuFIYqZc= -github.com/ggerganov/whisper.cpp/bindings/go v0.0.0-20230520170006-429b9785c080/go.mod h1:QIjZ9OktHFG7p+/m3sMvrAJKKdWrr1fZIK0rM6HZlyo= github.com/ggerganov/whisper.cpp/bindings/go v0.0.0-20230520182345-041be06d5881 h1:dafqVivljYk51VLFnnpTXJnfWDe637EobWZ1l8PyEf8= github.com/ggerganov/whisper.cpp/bindings/go v0.0.0-20230520182345-041be06d5881/go.mod h1:QIjZ9OktHFG7p+/m3sMvrAJKKdWrr1fZIK0rM6HZlyo= github.com/go-audio/audio v1.0.0 h1:zS9vebldgbQqktK4H0lUqWrG8P0NxCJVqcj7ZpNnwd4= @@ -42,21 +36,11 @@ github.com/go-openapi/spec v0.20.4/go.mod h1:faYFR1CvsJZ0mNsmsphTMSoRrNV3TEDoAM7 github.com/go-openapi/swag v0.19.5/go.mod h1:POnQmlKehdgb5mhVOsnJFsivZCEZ/vjK9gh66Z9tfKk= github.com/go-openapi/swag v0.19.15 h1:D2NRCBzS9/pEY3gP9Nl8aDqGUcPFrwG2p+CNFrLyrCM= github.com/go-openapi/swag v0.19.15/go.mod h1:QYRuS/SOXUCsnplDa677K7+DxSOj6IPNl/eQntq43wQ= -github.com/go-skynet/bloomz.cpp v0.0.0-20230510223001-e9366e82abdf h1:VJfSn8hIDE+K5+h38M3iAyFXrxpRExMKRdTk33UDxsw= -github.com/go-skynet/bloomz.cpp v0.0.0-20230510223001-e9366e82abdf/go.mod h1:wc0fJ9V04yiYTfgKvE5RUUSRQ5Kzi0Bo4I+U3nNOUuA= -github.com/go-skynet/go-bert.cpp v0.0.0-20230516063724-cea1ed76a7f4 h1:+3KPDf4Wv1VHOkzAfZnlj9qakLSYggTpm80AswhD/FU= -github.com/go-skynet/go-bert.cpp v0.0.0-20230516063724-cea1ed76a7f4/go.mod h1:VY0s5KoAI2jRCvQXKuDeEEe8KG7VaWifSNJSk+E1KtY= -github.com/go-skynet/go-gpt2.cpp v0.0.0-20230512145559-7bff56f02245 h1:IcfYY5uH0DdDXEJKJ8bq0WZCd9guPPd3xllaWNy8LOk= -github.com/go-skynet/go-gpt2.cpp v0.0.0-20230512145559-7bff56f02245/go.mod h1:1Wj/xbkMfwQSOrhNYK178IzqQHstZbRfhx4s8p1M5VM= -github.com/go-skynet/go-llama.cpp v0.0.0-20230520082618-a298043ef5f1 h1:i0oM2MERUgMIRmjOcv22TDQULxbmY8o9rZKLKKyWXLo= -github.com/go-skynet/go-llama.cpp v0.0.0-20230520082618-a298043ef5f1/go.mod h1:oA0r4BW8ndyjTMGi1tulsNd7sdg3Ql8MaVFuT1zF6ws= -github.com/go-skynet/go-llama.cpp v0.0.0-20230520155239-ccf23adfb278 h1:st4ow9JKy3UuhkwutrbWof2vMFU/YxwBCLYZ1IxJ2Po= -github.com/go-skynet/go-llama.cpp v0.0.0-20230520155239-ccf23adfb278/go.mod h1:oA0r4BW8ndyjTMGi1tulsNd7sdg3Ql8MaVFuT1zF6ws= +github.com/go-skynet/go-gpt2.cpp v0.0.0-20230523153133-3eb3a32c0874 h1:/6QWh2oarU7iPSpXj/3bLlkKptyxjKTRrNtGUrh8vhI= +github.com/go-skynet/go-gpt2.cpp v0.0.0-20230523153133-3eb3a32c0874/go.mod h1:1Wj/xbkMfwQSOrhNYK178IzqQHstZbRfhx4s8p1M5VM= github.com/go-task/slim-sprig v0.0.0-20230315185526-52ccab3ef572 h1:tfuBGBXKqDEevZMzYi5KSi8KkcZtzBcTgAUUtapy0OI= github.com/go-task/slim-sprig v0.0.0-20230315185526-52ccab3ef572/go.mod h1:9Pwr4B2jHnOSGXyyzV8ROjYa2ojvAY6HCGYYfMoC3Ls= github.com/godbus/dbus/v5 v5.0.4/go.mod h1:xhWf0FNVPg57R7Z0UbKHbJfkEywrmjJnf7w5xrFpKfA= -github.com/gofiber/fiber/v2 v2.45.0 h1:p4RpkJT9GAW6parBSbcNFH2ApnAuW3OzaQzbOCoDu+s= -github.com/gofiber/fiber/v2 v2.45.0/go.mod h1:DNl0/c37WLe0g92U6lx1VMQuxGUQY5V7EIaVoEsUffc= github.com/gofiber/fiber/v2 v2.46.0 h1:wkkWotblsGVlLjXj2dpgKQAYHtXumsK/HyFugQM68Ns= github.com/gofiber/fiber/v2 v2.46.0/go.mod h1:DNl0/c37WLe0g92U6lx1VMQuxGUQY5V7EIaVoEsUffc= github.com/golang/protobuf v1.5.3 h1:KhyjKVUg7Usr/dYsdSqoFveMYd5ko72D+zANwlG1mmg= @@ -95,20 +79,8 @@ github.com/mattn/go-isatty v0.0.18 h1:DOKFKCQ7FNG2L1rbrmstDN4QVRdS89Nkh85u68Uwp9 github.com/mattn/go-isatty v0.0.18/go.mod h1:W+V8PltTTMOvKvAeJH7IuucS94S2C6jfK/D7dTCTo3Y= github.com/mattn/go-runewidth v0.0.14 h1:+xnbZSEeDbOIg5/mE6JF0w6n9duR1l3/WmbinWVwUuU= github.com/mattn/go-runewidth v0.0.14/go.mod h1:Jdepj2loyihRzMpdS35Xk/zdY8IAYHsh153qUoGf23w= -github.com/mudler/go-stable-diffusion v0.0.0-20230516152536-c0748eca3642 h1:KTkh3lOUsGqQyP4v+oa38sPFdrZtNnM4HaxTb3epdYs= -github.com/mudler/go-stable-diffusion v0.0.0-20230516152536-c0748eca3642/go.mod h1:8ufRkpz/S/9ahkaxzZ5i4WMgO9w4InEhuRoT7vK5Rnw= github.com/niemeyer/pretty v0.0.0-20200227124842-a10e7caefd8e h1:fD57ERR4JtEqsWbfPhv4DMiApHyliiK5xCTNVSPiaAs= github.com/niemeyer/pretty v0.0.0-20200227124842-a10e7caefd8e/go.mod h1:zD1mROLANZcx1PVRCS0qkT7pwLkGfwJo4zjcN/Tysno= -github.com/nomic-ai/gpt4all/gpt4all-bindings/golang v0.0.0-20230519014017-914519e772fd h1:kMnZASxCNc8GsPuAV94tltEsfT6T+esuB+rgzdjwFVM= -github.com/nomic-ai/gpt4all/gpt4all-bindings/golang v0.0.0-20230519014017-914519e772fd/go.mod h1:4T3CHXyrt+7FQHXaxULZfPjHbD8/99WuDDJa0YVZARI= -github.com/nomic-ai/gpt4all/gpt4all-bindings/golang v0.0.0-20230521011615-aba1147a2253 h1:7udNpoHYOBktcpCEe8aDaPJ0LyzyRhVjpzAGFjPxPkY= -github.com/nomic-ai/gpt4all/gpt4all-bindings/golang v0.0.0-20230521011615-aba1147a2253/go.mod h1:4T3CHXyrt+7FQHXaxULZfPjHbD8/99WuDDJa0YVZARI= -github.com/nomic-ai/gpt4all/gpt4all-bindings/golang v0.0.0-20230522155256-c8c95ab46f92 h1:brOLJSsTLnFK2vUVi7MaVdxAEhHkOsoboR0vR5WW1HU= -github.com/nomic-ai/gpt4all/gpt4all-bindings/golang v0.0.0-20230522155256-c8c95ab46f92/go.mod h1:4T3CHXyrt+7FQHXaxULZfPjHbD8/99WuDDJa0YVZARI= -github.com/nomic-ai/gpt4all/gpt4all-bindings/golang v0.0.0-20230522200803-5ca8767c81a2 h1:3368tGU1ooRSPw0zMvXqv9wLMxS82LzEkVSuo8DWZBI= -github.com/nomic-ai/gpt4all/gpt4all-bindings/golang v0.0.0-20230522200803-5ca8767c81a2/go.mod h1:4T3CHXyrt+7FQHXaxULZfPjHbD8/99WuDDJa0YVZARI= -github.com/nomic-ai/gpt4all/gpt4all-bindings/golang v0.0.0-20230522220313-2ce22208a3dd h1:is/rE0YD8oEWcX3fQ+VxoS3fD0LqFEmTxh8XZegYYsA= -github.com/nomic-ai/gpt4all/gpt4all-bindings/golang v0.0.0-20230522220313-2ce22208a3dd/go.mod h1:4T3CHXyrt+7FQHXaxULZfPjHbD8/99WuDDJa0YVZARI= github.com/onsi/ginkgo/v2 v2.9.5 h1:+6Hr4uxzP4XIUyAkg61dWBw8lb/gc4/X5luuxN/EC+Q= github.com/onsi/ginkgo/v2 v2.9.5/go.mod h1:tvAoo1QUJwNEU2ITftXTpR7R1RbCzoZUOs3RonqW57k= github.com/onsi/gomega v1.27.7 h1:fVih9JD6ogIiHUN6ePK7HJidyEDpWGVB5mzM7cWNXoU= diff --git a/pkg/model/initializers.go b/pkg/model/initializers.go index b5e43a38..dc593a7c 100644 --- a/pkg/model/initializers.go +++ b/pkg/model/initializers.go @@ -10,7 +10,7 @@ import ( "github.com/go-skynet/LocalAI/pkg/stablediffusion" bloomz "github.com/go-skynet/bloomz.cpp" bert "github.com/go-skynet/go-bert.cpp" - gpt2 "github.com/go-skynet/go-gpt2.cpp" + transformers "github.com/go-skynet/go-ggml-transformers.cpp" llama "github.com/go-skynet/go-llama.cpp" "github.com/hashicorp/go-multierror" gpt4all "github.com/nomic-ai/gpt4all/gpt4all-bindings/golang" @@ -23,9 +23,9 @@ const ( LlamaBackend = "llama" BloomzBackend = "bloomz" StarcoderBackend = "starcoder" - StableLMBackend = "stablelm" + GPTJBackend = "gptj" DollyBackend = "dolly" - RedPajamaBackend = "redpajama" + MPTBackend = "mpt" GPTNeoXBackend = "gptneox" ReplitBackend = "replit" Gpt2Backend = "gpt2" @@ -43,41 +43,41 @@ var backends []string = []string{ Gpt4AllLlamaBackend, Gpt4AllMptBackend, Gpt4AllJBackend, - Gpt2Backend, - WhisperBackend, RwkvBackend, - BloomzBackend, - StableLMBackend, - DollyBackend, - RedPajamaBackend, - ReplitBackend, GPTNeoXBackend, + WhisperBackend, BertEmbeddingsBackend, + GPTJBackend, + Gpt2Backend, + DollyBackend, + MPTBackend, + ReplitBackend, StarcoderBackend, + BloomzBackend, } var starCoder = func(modelFile string) (interface{}, error) { - return gpt2.NewStarcoder(modelFile) + return transformers.NewStarcoder(modelFile) } -var redPajama = func(modelFile string) (interface{}, error) { - return gpt2.NewRedPajama(modelFile) +var mpt = func(modelFile string) (interface{}, error) { + return transformers.NewMPT(modelFile) } var dolly = func(modelFile string) (interface{}, error) { - return gpt2.NewDolly(modelFile) + return transformers.NewDolly(modelFile) } var gptNeoX = func(modelFile string) (interface{}, error) { - return gpt2.NewGPTNeoX(modelFile) + return transformers.NewGPTNeoX(modelFile) } var replit = func(modelFile string) (interface{}, error) { - return gpt2.NewReplit(modelFile) + return transformers.NewReplit(modelFile) } -var stableLM = func(modelFile string) (interface{}, error) { - return gpt2.NewStableLM(modelFile) +var gptJ = func(modelFile string) (interface{}, error) { + return transformers.NewGPTJ(modelFile) } var bertEmbeddings = func(modelFile string) (interface{}, error) { @@ -87,8 +87,9 @@ var bertEmbeddings = func(modelFile string) (interface{}, error) { var bloomzLM = func(modelFile string) (interface{}, error) { return bloomz.New(modelFile) } -var gpt2LM = func(modelFile string) (interface{}, error) { - return gpt2.New(modelFile) + +var transformersLM = func(modelFile string) (interface{}, error) { + return transformers.New(modelFile) } var stableDiffusion = func(assetDir string) (interface{}, error) { @@ -130,14 +131,14 @@ func (ml *ModelLoader) BackendLoader(backendString string, modelFile string, lla return ml.LoadModel(modelFile, llamaLM(llamaOpts...)) case BloomzBackend: return ml.LoadModel(modelFile, bloomzLM) - case StableLMBackend: - return ml.LoadModel(modelFile, stableLM) + case GPTJBackend: + return ml.LoadModel(modelFile, gptJ) case DollyBackend: return ml.LoadModel(modelFile, dolly) - case RedPajamaBackend: - return ml.LoadModel(modelFile, redPajama) + case MPTBackend: + return ml.LoadModel(modelFile, mpt) case Gpt2Backend: - return ml.LoadModel(modelFile, gpt2LM) + return ml.LoadModel(modelFile, transformersLM) case GPTNeoXBackend: return ml.LoadModel(modelFile, gptNeoX) case ReplitBackend: diff --git a/tests/models_fixtures/embeddings.yaml b/tests/models_fixtures/embeddings.yaml index b90ca75a..46a08502 100644 --- a/tests/models_fixtures/embeddings.yaml +++ b/tests/models_fixtures/embeddings.yaml @@ -1,6 +1,5 @@ name: text-embedding-ada-002 parameters: model: bert -threads: 14 backend: bert-embeddings embeddings: true diff --git a/tests/models_fixtures/rwkv.yaml b/tests/models_fixtures/rwkv.yaml index d78f5cf9..3b47fa0a 100644 --- a/tests/models_fixtures/rwkv.yaml +++ b/tests/models_fixtures/rwkv.yaml @@ -6,7 +6,6 @@ parameters: max_tokens: 100 top_p: 0.8 context_size: 1024 -threads: 14 backend: "rwkv" cutwords: - "Bob:.*"