mirror of
https://github.com/mudler/LocalAI.git
synced 2024-12-18 20:27:57 +00:00
feat(backends): Drop bert.cpp (#4272)
* feat(backends): Drop bert.cpp use llama.cpp 3.2 as a drop-in replacement for bert.cpp Signed-off-by: Ettore Di Giacinto <mudler@localai.io> * chore(tests): make test more robust Signed-off-by: Ettore Di Giacinto <mudler@localai.io> --------- Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
This commit is contained in:
parent
1688ba7f2a
commit
3c3050f68e
30
Makefile
30
Makefile
@ -14,10 +14,6 @@ CPPLLAMA_VERSION?=30ec39832165627dd6ed98938df63adfc6e6a21a
|
|||||||
WHISPER_REPO?=https://github.com/ggerganov/whisper.cpp
|
WHISPER_REPO?=https://github.com/ggerganov/whisper.cpp
|
||||||
WHISPER_CPP_VERSION?=6266a9f9e56a5b925e9892acf650f3eb1245814d
|
WHISPER_CPP_VERSION?=6266a9f9e56a5b925e9892acf650f3eb1245814d
|
||||||
|
|
||||||
# bert.cpp version
|
|
||||||
BERT_REPO?=https://github.com/go-skynet/go-bert.cpp
|
|
||||||
BERT_VERSION?=710044b124545415f555e4260d16b146c725a6e4
|
|
||||||
|
|
||||||
# go-piper version
|
# go-piper version
|
||||||
PIPER_REPO?=https://github.com/mudler/go-piper
|
PIPER_REPO?=https://github.com/mudler/go-piper
|
||||||
PIPER_VERSION?=e10ca041a885d4a8f3871d52924b47792d5e5aa0
|
PIPER_VERSION?=e10ca041a885d4a8f3871d52924b47792d5e5aa0
|
||||||
@ -198,7 +194,6 @@ ifeq ($(findstring tts,$(GO_TAGS)),tts)
|
|||||||
endif
|
endif
|
||||||
|
|
||||||
ALL_GRPC_BACKENDS=backend-assets/grpc/huggingface
|
ALL_GRPC_BACKENDS=backend-assets/grpc/huggingface
|
||||||
ALL_GRPC_BACKENDS+=backend-assets/grpc/bert-embeddings
|
|
||||||
ALL_GRPC_BACKENDS+=backend-assets/grpc/llama-cpp-avx
|
ALL_GRPC_BACKENDS+=backend-assets/grpc/llama-cpp-avx
|
||||||
ALL_GRPC_BACKENDS+=backend-assets/grpc/llama-cpp-avx2
|
ALL_GRPC_BACKENDS+=backend-assets/grpc/llama-cpp-avx2
|
||||||
ALL_GRPC_BACKENDS+=backend-assets/grpc/llama-cpp-fallback
|
ALL_GRPC_BACKENDS+=backend-assets/grpc/llama-cpp-fallback
|
||||||
@ -228,19 +223,6 @@ endif
|
|||||||
|
|
||||||
all: help
|
all: help
|
||||||
|
|
||||||
## BERT embeddings
|
|
||||||
sources/go-bert.cpp:
|
|
||||||
mkdir -p sources/go-bert.cpp
|
|
||||||
cd sources/go-bert.cpp && \
|
|
||||||
git init && \
|
|
||||||
git remote add origin $(BERT_REPO) && \
|
|
||||||
git fetch origin && \
|
|
||||||
git checkout $(BERT_VERSION) && \
|
|
||||||
git submodule update --init --recursive --depth 1 --single-branch
|
|
||||||
|
|
||||||
sources/go-bert.cpp/libgobert.a: sources/go-bert.cpp
|
|
||||||
$(MAKE) -C sources/go-bert.cpp libgobert.a
|
|
||||||
|
|
||||||
## go-llama.cpp
|
## go-llama.cpp
|
||||||
sources/go-llama.cpp:
|
sources/go-llama.cpp:
|
||||||
mkdir -p sources/go-llama.cpp
|
mkdir -p sources/go-llama.cpp
|
||||||
@ -320,12 +302,11 @@ sources/whisper.cpp:
|
|||||||
sources/whisper.cpp/libwhisper.a: sources/whisper.cpp
|
sources/whisper.cpp/libwhisper.a: sources/whisper.cpp
|
||||||
cd sources/whisper.cpp && $(MAKE) libwhisper.a libggml.a
|
cd sources/whisper.cpp && $(MAKE) libwhisper.a libggml.a
|
||||||
|
|
||||||
get-sources: sources/go-llama.cpp sources/go-piper sources/whisper.cpp sources/go-bert.cpp sources/go-stable-diffusion sources/go-tiny-dream backend/cpp/llama/llama.cpp
|
get-sources: sources/go-llama.cpp sources/go-piper sources/whisper.cpp sources/go-stable-diffusion sources/go-tiny-dream backend/cpp/llama/llama.cpp
|
||||||
|
|
||||||
replace:
|
replace:
|
||||||
$(GOCMD) mod edit -replace github.com/ggerganov/whisper.cpp=$(CURDIR)/sources/whisper.cpp
|
$(GOCMD) mod edit -replace github.com/ggerganov/whisper.cpp=$(CURDIR)/sources/whisper.cpp
|
||||||
$(GOCMD) mod edit -replace github.com/ggerganov/whisper.cpp/bindings/go=$(CURDIR)/sources/whisper.cpp/bindings/go
|
$(GOCMD) mod edit -replace github.com/ggerganov/whisper.cpp/bindings/go=$(CURDIR)/sources/whisper.cpp/bindings/go
|
||||||
$(GOCMD) mod edit -replace github.com/go-skynet/go-bert.cpp=$(CURDIR)/sources/go-bert.cpp
|
|
||||||
$(GOCMD) mod edit -replace github.com/M0Rf30/go-tiny-dream=$(CURDIR)/sources/go-tiny-dream
|
$(GOCMD) mod edit -replace github.com/M0Rf30/go-tiny-dream=$(CURDIR)/sources/go-tiny-dream
|
||||||
$(GOCMD) mod edit -replace github.com/mudler/go-piper=$(CURDIR)/sources/go-piper
|
$(GOCMD) mod edit -replace github.com/mudler/go-piper=$(CURDIR)/sources/go-piper
|
||||||
$(GOCMD) mod edit -replace github.com/mudler/go-stable-diffusion=$(CURDIR)/sources/go-stable-diffusion
|
$(GOCMD) mod edit -replace github.com/mudler/go-stable-diffusion=$(CURDIR)/sources/go-stable-diffusion
|
||||||
@ -334,7 +315,6 @@ replace:
|
|||||||
dropreplace:
|
dropreplace:
|
||||||
$(GOCMD) mod edit -dropreplace github.com/ggerganov/whisper.cpp
|
$(GOCMD) mod edit -dropreplace github.com/ggerganov/whisper.cpp
|
||||||
$(GOCMD) mod edit -dropreplace github.com/ggerganov/whisper.cpp/bindings/go
|
$(GOCMD) mod edit -dropreplace github.com/ggerganov/whisper.cpp/bindings/go
|
||||||
$(GOCMD) mod edit -dropreplace github.com/go-skynet/go-bert.cpp
|
|
||||||
$(GOCMD) mod edit -dropreplace github.com/M0Rf30/go-tiny-dream
|
$(GOCMD) mod edit -dropreplace github.com/M0Rf30/go-tiny-dream
|
||||||
$(GOCMD) mod edit -dropreplace github.com/mudler/go-piper
|
$(GOCMD) mod edit -dropreplace github.com/mudler/go-piper
|
||||||
$(GOCMD) mod edit -dropreplace github.com/mudler/go-stable-diffusion
|
$(GOCMD) mod edit -dropreplace github.com/mudler/go-stable-diffusion
|
||||||
@ -349,7 +329,6 @@ rebuild: ## Rebuilds the project
|
|||||||
$(MAKE) -C sources/go-llama.cpp clean
|
$(MAKE) -C sources/go-llama.cpp clean
|
||||||
$(MAKE) -C sources/whisper.cpp clean
|
$(MAKE) -C sources/whisper.cpp clean
|
||||||
$(MAKE) -C sources/go-stable-diffusion clean
|
$(MAKE) -C sources/go-stable-diffusion clean
|
||||||
$(MAKE) -C sources/go-bert.cpp clean
|
|
||||||
$(MAKE) -C sources/go-piper clean
|
$(MAKE) -C sources/go-piper clean
|
||||||
$(MAKE) -C sources/go-tiny-dream clean
|
$(MAKE) -C sources/go-tiny-dream clean
|
||||||
$(MAKE) build
|
$(MAKE) build
|
||||||
@ -707,13 +686,6 @@ backend-assets/espeak-ng-data: sources/go-piper sources/go-piper/libpiper_bindin
|
|||||||
backend-assets/grpc: protogen-go replace
|
backend-assets/grpc: protogen-go replace
|
||||||
mkdir -p backend-assets/grpc
|
mkdir -p backend-assets/grpc
|
||||||
|
|
||||||
backend-assets/grpc/bert-embeddings: sources/go-bert.cpp sources/go-bert.cpp/libgobert.a backend-assets/grpc
|
|
||||||
CGO_LDFLAGS="$(CGO_LDFLAGS)" C_INCLUDE_PATH=$(CURDIR)/sources/go-bert.cpp LIBRARY_PATH=$(CURDIR)/sources/go-bert.cpp \
|
|
||||||
$(GOCMD) build -ldflags "$(LD_FLAGS)" -tags "$(GO_TAGS)" -o backend-assets/grpc/bert-embeddings ./backend/go/llm/bert/
|
|
||||||
ifneq ($(UPX),)
|
|
||||||
$(UPX) backend-assets/grpc/bert-embeddings
|
|
||||||
endif
|
|
||||||
|
|
||||||
backend-assets/grpc/huggingface: backend-assets/grpc
|
backend-assets/grpc/huggingface: backend-assets/grpc
|
||||||
$(GOCMD) build -ldflags "$(LD_FLAGS)" -tags "$(GO_TAGS)" -o backend-assets/grpc/huggingface ./backend/go/llm/langchain/
|
$(GOCMD) build -ldflags "$(LD_FLAGS)" -tags "$(GO_TAGS)" -o backend-assets/grpc/huggingface ./backend/go/llm/langchain/
|
||||||
ifneq ($(UPX),)
|
ifneq ($(UPX),)
|
||||||
|
@ -1,7 +1,7 @@
|
|||||||
name: text-embedding-ada-002
|
name: text-embedding-ada-002
|
||||||
backend: bert-embeddings
|
embeddings: true
|
||||||
parameters:
|
parameters:
|
||||||
model: huggingface://mudler/all-MiniLM-L6-v2/ggml-model-q4_0.bin
|
model: huggingface://hugging-quants/Llama-3.2-1B-Instruct-Q4_K_M-GGUF/llama-3.2-1b-instruct-q4_k_m.gguf
|
||||||
|
|
||||||
usage: |
|
usage: |
|
||||||
You can test this model with curl like this:
|
You can test this model with curl like this:
|
||||||
|
@ -1,34 +0,0 @@
|
|||||||
package main
|
|
||||||
|
|
||||||
// This is a wrapper to statisfy the GRPC service interface
|
|
||||||
// It is meant to be used by the main executable that is the server for the specific backend type (falcon, gpt3, etc)
|
|
||||||
import (
|
|
||||||
bert "github.com/go-skynet/go-bert.cpp"
|
|
||||||
|
|
||||||
"github.com/mudler/LocalAI/pkg/grpc/base"
|
|
||||||
pb "github.com/mudler/LocalAI/pkg/grpc/proto"
|
|
||||||
)
|
|
||||||
|
|
||||||
type Embeddings struct {
|
|
||||||
base.SingleThread
|
|
||||||
bert *bert.Bert
|
|
||||||
}
|
|
||||||
|
|
||||||
func (llm *Embeddings) Load(opts *pb.ModelOptions) error {
|
|
||||||
model, err := bert.New(opts.ModelFile)
|
|
||||||
llm.bert = model
|
|
||||||
return err
|
|
||||||
}
|
|
||||||
|
|
||||||
func (llm *Embeddings) Embeddings(opts *pb.PredictOptions) ([]float32, error) {
|
|
||||||
|
|
||||||
if len(opts.EmbeddingTokens) > 0 {
|
|
||||||
tokens := []int{}
|
|
||||||
for _, t := range opts.EmbeddingTokens {
|
|
||||||
tokens = append(tokens, int(t))
|
|
||||||
}
|
|
||||||
return llm.bert.TokenEmbeddings(tokens, bert.SetThreads(int(opts.Threads)))
|
|
||||||
}
|
|
||||||
|
|
||||||
return llm.bert.Embeddings(opts.Embeddings, bert.SetThreads(int(opts.Threads)))
|
|
||||||
}
|
|
@ -1,21 +0,0 @@
|
|||||||
package main
|
|
||||||
|
|
||||||
// Note: this is started internally by LocalAI and a server is allocated for each model
|
|
||||||
|
|
||||||
import (
|
|
||||||
"flag"
|
|
||||||
|
|
||||||
grpc "github.com/mudler/LocalAI/pkg/grpc"
|
|
||||||
)
|
|
||||||
|
|
||||||
var (
|
|
||||||
addr = flag.String("addr", "localhost:50051", "the address to connect to")
|
|
||||||
)
|
|
||||||
|
|
||||||
func main() {
|
|
||||||
flag.Parse()
|
|
||||||
|
|
||||||
if err := grpc.StartServer(*addr, &Embeddings{}); err != nil {
|
|
||||||
panic(err)
|
|
||||||
}
|
|
||||||
}
|
|
@ -12,6 +12,8 @@ import (
|
|||||||
"gopkg.in/yaml.v3"
|
"gopkg.in/yaml.v3"
|
||||||
)
|
)
|
||||||
|
|
||||||
|
const bertEmbeddingsURL = `https://gist.githubusercontent.com/mudler/0a080b166b87640e8644b09c2aee6e3b/raw/f0e8c26bb72edc16d9fbafbfd6638072126ff225/bert-embeddings-gallery.yaml`
|
||||||
|
|
||||||
var _ = Describe("Model test", func() {
|
var _ = Describe("Model test", func() {
|
||||||
|
|
||||||
Context("Downloading", func() {
|
Context("Downloading", func() {
|
||||||
@ -47,7 +49,7 @@ var _ = Describe("Model test", func() {
|
|||||||
|
|
||||||
gallery := []GalleryModel{{
|
gallery := []GalleryModel{{
|
||||||
Name: "bert",
|
Name: "bert",
|
||||||
URL: "https://raw.githubusercontent.com/go-skynet/model-gallery/main/bert-embeddings.yaml",
|
URL: bertEmbeddingsURL,
|
||||||
}}
|
}}
|
||||||
out, err := yaml.Marshal(gallery)
|
out, err := yaml.Marshal(gallery)
|
||||||
Expect(err).ToNot(HaveOccurred())
|
Expect(err).ToNot(HaveOccurred())
|
||||||
@ -66,7 +68,7 @@ var _ = Describe("Model test", func() {
|
|||||||
Expect(err).ToNot(HaveOccurred())
|
Expect(err).ToNot(HaveOccurred())
|
||||||
Expect(len(models)).To(Equal(1))
|
Expect(len(models)).To(Equal(1))
|
||||||
Expect(models[0].Name).To(Equal("bert"))
|
Expect(models[0].Name).To(Equal("bert"))
|
||||||
Expect(models[0].URL).To(Equal("https://raw.githubusercontent.com/go-skynet/model-gallery/main/bert-embeddings.yaml"))
|
Expect(models[0].URL).To(Equal(bertEmbeddingsURL))
|
||||||
Expect(models[0].Installed).To(BeFalse())
|
Expect(models[0].Installed).To(BeFalse())
|
||||||
|
|
||||||
err = InstallModelFromGallery(galleries, "test@bert", tempdir, GalleryModel{}, func(s1, s2, s3 string, f float64) {}, true)
|
err = InstallModelFromGallery(galleries, "test@bert", tempdir, GalleryModel{}, func(s1, s2, s3 string, f float64) {}, true)
|
||||||
@ -78,7 +80,7 @@ var _ = Describe("Model test", func() {
|
|||||||
content := map[string]interface{}{}
|
content := map[string]interface{}{}
|
||||||
err = yaml.Unmarshal(dat, &content)
|
err = yaml.Unmarshal(dat, &content)
|
||||||
Expect(err).ToNot(HaveOccurred())
|
Expect(err).ToNot(HaveOccurred())
|
||||||
Expect(content["backend"]).To(Equal("bert-embeddings"))
|
Expect(content["usage"]).To(ContainSubstring("You can test this model with curl like this"))
|
||||||
|
|
||||||
models, err = AvailableGalleryModels(galleries, tempdir)
|
models, err = AvailableGalleryModels(galleries, tempdir)
|
||||||
Expect(err).ToNot(HaveOccurred())
|
Expect(err).ToNot(HaveOccurred())
|
||||||
|
@ -240,6 +240,8 @@ func postInvalidRequest(url string) (error, int) {
|
|||||||
return nil, resp.StatusCode
|
return nil, resp.StatusCode
|
||||||
}
|
}
|
||||||
|
|
||||||
|
const bertEmbeddingsURL = `https://gist.githubusercontent.com/mudler/0a080b166b87640e8644b09c2aee6e3b/raw/f0e8c26bb72edc16d9fbafbfd6638072126ff225/bert-embeddings-gallery.yaml`
|
||||||
|
|
||||||
//go:embed backend-assets/*
|
//go:embed backend-assets/*
|
||||||
var backendAssets embed.FS
|
var backendAssets embed.FS
|
||||||
|
|
||||||
@ -279,13 +281,13 @@ var _ = Describe("API test", func() {
|
|||||||
g := []gallery.GalleryModel{
|
g := []gallery.GalleryModel{
|
||||||
{
|
{
|
||||||
Name: "bert",
|
Name: "bert",
|
||||||
URL: "https://raw.githubusercontent.com/go-skynet/model-gallery/main/bert-embeddings.yaml",
|
URL: bertEmbeddingsURL,
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
Name: "bert2",
|
Name: "bert2",
|
||||||
URL: "https://raw.githubusercontent.com/go-skynet/model-gallery/main/bert-embeddings.yaml",
|
URL: bertEmbeddingsURL,
|
||||||
Overrides: map[string]interface{}{"foo": "bar"},
|
Overrides: map[string]interface{}{"foo": "bar"},
|
||||||
AdditionalFiles: []gallery.File{{Filename: "foo.yaml", URI: "https://raw.githubusercontent.com/go-skynet/model-gallery/main/bert-embeddings.yaml"}},
|
AdditionalFiles: []gallery.File{{Filename: "foo.yaml", URI: bertEmbeddingsURL}},
|
||||||
},
|
},
|
||||||
}
|
}
|
||||||
out, err := yaml.Marshal(g)
|
out, err := yaml.Marshal(g)
|
||||||
@ -383,7 +385,7 @@ var _ = Describe("API test", func() {
|
|||||||
content := map[string]interface{}{}
|
content := map[string]interface{}{}
|
||||||
err = yaml.Unmarshal(dat, &content)
|
err = yaml.Unmarshal(dat, &content)
|
||||||
Expect(err).ToNot(HaveOccurred())
|
Expect(err).ToNot(HaveOccurred())
|
||||||
Expect(content["backend"]).To(Equal("bert-embeddings"))
|
Expect(content["usage"]).To(ContainSubstring("You can test this model with curl like this"))
|
||||||
Expect(content["foo"]).To(Equal("bar"))
|
Expect(content["foo"]).To(Equal("bar"))
|
||||||
|
|
||||||
models, err = getModels("http://127.0.0.1:9090/models/available")
|
models, err = getModels("http://127.0.0.1:9090/models/available")
|
||||||
@ -402,7 +404,7 @@ var _ = Describe("API test", func() {
|
|||||||
It("overrides models", func() {
|
It("overrides models", func() {
|
||||||
|
|
||||||
response := postModelApplyRequest("http://127.0.0.1:9090/models/apply", modelApplyRequest{
|
response := postModelApplyRequest("http://127.0.0.1:9090/models/apply", modelApplyRequest{
|
||||||
URL: "https://raw.githubusercontent.com/go-skynet/model-gallery/main/bert-embeddings.yaml",
|
URL: bertEmbeddingsURL,
|
||||||
Name: "bert",
|
Name: "bert",
|
||||||
Overrides: map[string]interface{}{
|
Overrides: map[string]interface{}{
|
||||||
"backend": "llama",
|
"backend": "llama",
|
||||||
@ -451,7 +453,7 @@ var _ = Describe("API test", func() {
|
|||||||
})
|
})
|
||||||
It("apply models without overrides", func() {
|
It("apply models without overrides", func() {
|
||||||
response := postModelApplyRequest("http://127.0.0.1:9090/models/apply", modelApplyRequest{
|
response := postModelApplyRequest("http://127.0.0.1:9090/models/apply", modelApplyRequest{
|
||||||
URL: "https://raw.githubusercontent.com/go-skynet/model-gallery/main/bert-embeddings.yaml",
|
URL: bertEmbeddingsURL,
|
||||||
Name: "bert",
|
Name: "bert",
|
||||||
Overrides: map[string]interface{}{},
|
Overrides: map[string]interface{}{},
|
||||||
})
|
})
|
||||||
@ -471,7 +473,7 @@ var _ = Describe("API test", func() {
|
|||||||
content := map[string]interface{}{}
|
content := map[string]interface{}{}
|
||||||
err = yaml.Unmarshal(dat, &content)
|
err = yaml.Unmarshal(dat, &content)
|
||||||
Expect(err).ToNot(HaveOccurred())
|
Expect(err).ToNot(HaveOccurred())
|
||||||
Expect(content["backend"]).To(Equal("bert-embeddings"))
|
Expect(content["usage"]).To(ContainSubstring("You can test this model with curl like this"))
|
||||||
})
|
})
|
||||||
|
|
||||||
It("runs openllama(llama-ggml backend)", Label("llama"), func() {
|
It("runs openllama(llama-ggml backend)", Label("llama"), func() {
|
||||||
@ -806,7 +808,7 @@ var _ = Describe("API test", func() {
|
|||||||
It("returns the models list", func() {
|
It("returns the models list", func() {
|
||||||
models, err := client.ListModels(context.TODO())
|
models, err := client.ListModels(context.TODO())
|
||||||
Expect(err).ToNot(HaveOccurred())
|
Expect(err).ToNot(HaveOccurred())
|
||||||
Expect(len(models.Models)).To(Equal(6)) // If "config.yaml" should be included, this should be 8?
|
Expect(len(models.Models)).To(Equal(7)) // If "config.yaml" should be included, this should be 8?
|
||||||
})
|
})
|
||||||
It("can generate completions via ggml", func() {
|
It("can generate completions via ggml", func() {
|
||||||
resp, err := client.CreateCompletion(context.TODO(), openai.CompletionRequest{Model: "testmodel.ggml", Prompt: testPrompt})
|
resp, err := client.CreateCompletion(context.TODO(), openai.CompletionRequest{Model: "testmodel.ggml", Prompt: testPrompt})
|
||||||
@ -866,8 +868,8 @@ var _ = Describe("API test", func() {
|
|||||||
},
|
},
|
||||||
)
|
)
|
||||||
Expect(err).ToNot(HaveOccurred(), err)
|
Expect(err).ToNot(HaveOccurred(), err)
|
||||||
Expect(len(resp.Data[0].Embedding)).To(BeNumerically("==", 384))
|
Expect(len(resp.Data[0].Embedding)).To(BeNumerically("==", 2048))
|
||||||
Expect(len(resp.Data[1].Embedding)).To(BeNumerically("==", 384))
|
Expect(len(resp.Data[1].Embedding)).To(BeNumerically("==", 2048))
|
||||||
|
|
||||||
sunEmbedding := resp.Data[0].Embedding
|
sunEmbedding := resp.Data[0].Embedding
|
||||||
resp2, err := client.CreateEmbeddings(
|
resp2, err := client.CreateEmbeddings(
|
||||||
@ -951,7 +953,7 @@ var _ = Describe("API test", func() {
|
|||||||
openai.ChatCompletionRequest{Model: "rwkv_test", Messages: []openai.ChatCompletionMessage{{Content: "Can you count up to five?", Role: "user"}}})
|
openai.ChatCompletionRequest{Model: "rwkv_test", Messages: []openai.ChatCompletionMessage{{Content: "Can you count up to five?", Role: "user"}}})
|
||||||
Expect(err).ToNot(HaveOccurred())
|
Expect(err).ToNot(HaveOccurred())
|
||||||
Expect(len(resp.Choices) > 0).To(BeTrue())
|
Expect(len(resp.Choices) > 0).To(BeTrue())
|
||||||
Expect(strings.ToLower(resp.Choices[0].Message.Content)).To(Or(ContainSubstring("sure"), ContainSubstring("five")))
|
Expect(strings.ToLower(resp.Choices[0].Message.Content)).To(Or(ContainSubstring("sure"), ContainSubstring("five"), ContainSubstring("5")))
|
||||||
|
|
||||||
stream, err := client.CreateChatCompletionStream(context.TODO(), openai.ChatCompletionRequest{Model: "rwkv_test", Messages: []openai.ChatCompletionMessage{{Content: "Can you count up to five?", Role: "user"}}})
|
stream, err := client.CreateChatCompletionStream(context.TODO(), openai.ChatCompletionRequest{Model: "rwkv_test", Messages: []openai.ChatCompletionMessage{{Content: "Can you count up to five?", Role: "user"}}})
|
||||||
Expect(err).ToNot(HaveOccurred())
|
Expect(err).ToNot(HaveOccurred())
|
||||||
|
@ -27,39 +27,6 @@ embeddings: true
|
|||||||
# .. other parameters
|
# .. other parameters
|
||||||
```
|
```
|
||||||
|
|
||||||
## Bert embeddings
|
|
||||||
|
|
||||||
To use `bert.cpp` models you can use the `bert` embedding backend.
|
|
||||||
|
|
||||||
An example model config file:
|
|
||||||
|
|
||||||
```yaml
|
|
||||||
name: text-embedding-ada-002
|
|
||||||
parameters:
|
|
||||||
model: bert
|
|
||||||
backend: bert-embeddings
|
|
||||||
embeddings: true
|
|
||||||
# .. other parameters
|
|
||||||
```
|
|
||||||
|
|
||||||
The `bert` backend uses [bert.cpp](https://github.com/skeskinen/bert.cpp) and uses `ggml` models.
|
|
||||||
|
|
||||||
For instance you can download the `ggml` quantized version of `all-MiniLM-L6-v2` from https://huggingface.co/skeskinen/ggml:
|
|
||||||
|
|
||||||
```bash
|
|
||||||
wget https://huggingface.co/skeskinen/ggml/resolve/main/all-MiniLM-L6-v2/ggml-model-q4_0.bin -O models/bert
|
|
||||||
```
|
|
||||||
|
|
||||||
To test locally (LocalAI server running on `localhost`),
|
|
||||||
you can use `curl` (and `jq` at the end to prettify):
|
|
||||||
|
|
||||||
```bash
|
|
||||||
curl http://localhost:8080/embeddings -X POST -H "Content-Type: application/json" -d '{
|
|
||||||
"input": "Your text string goes here",
|
|
||||||
"model": "text-embedding-ada-002"
|
|
||||||
}' | jq "."
|
|
||||||
```
|
|
||||||
|
|
||||||
## Huggingface embeddings
|
## Huggingface embeddings
|
||||||
|
|
||||||
To use `sentence-transformers` and models in `huggingface` you can use the `sentencetransformers` embedding backend.
|
To use `sentence-transformers` and models in `huggingface` you can use the `sentencetransformers` embedding backend.
|
||||||
@ -87,17 +54,26 @@ The `sentencetransformers` backend uses Python [sentence-transformers](https://g
|
|||||||
|
|
||||||
## Llama.cpp embeddings
|
## Llama.cpp embeddings
|
||||||
|
|
||||||
Embeddings with `llama.cpp` are supported with the `llama` backend.
|
Embeddings with `llama.cpp` are supported with the `llama-cpp` backend, it needs to be enabled with `embeddings` set to `true`.
|
||||||
|
|
||||||
```yaml
|
```yaml
|
||||||
name: my-awesome-model
|
name: my-awesome-model
|
||||||
backend: llama
|
backend: llama-cpp
|
||||||
embeddings: true
|
embeddings: true
|
||||||
parameters:
|
parameters:
|
||||||
model: ggml-file.bin
|
model: ggml-file.bin
|
||||||
# ...
|
# ...
|
||||||
```
|
```
|
||||||
|
|
||||||
|
Then you can use the API to generate embeddings:
|
||||||
|
|
||||||
|
```bash
|
||||||
|
curl http://localhost:8080/embeddings -X POST -H "Content-Type: application/json" -d '{
|
||||||
|
"input": "My text",
|
||||||
|
"model": "my-awesome-model"
|
||||||
|
}' | jq "."
|
||||||
|
```
|
||||||
|
|
||||||
## 💡 Examples
|
## 💡 Examples
|
||||||
|
|
||||||
- Example that uses LLamaIndex and LocalAI as embedding: [here](https://github.com/go-skynet/LocalAI/tree/master/examples/query_data/).
|
- Example that uses LLamaIndex and LocalAI as embedding: [here](https://github.com/go-skynet/LocalAI/tree/master/examples/query_data/).
|
||||||
|
@ -300,7 +300,7 @@ curl $LOCALAI/models/apply -H "Content-Type: application/json" -d '{
|
|||||||
|
|
||||||
```bash
|
```bash
|
||||||
curl $LOCALAI/models/apply -H "Content-Type: application/json" -d '{
|
curl $LOCALAI/models/apply -H "Content-Type: application/json" -d '{
|
||||||
"url": "github:mudler/LocalAI/gallery/bert-embeddings.yaml",
|
"id": "bert-embeddings",
|
||||||
"name": "text-embedding-ada-002"
|
"name": "text-embedding-ada-002"
|
||||||
}'
|
}'
|
||||||
```
|
```
|
||||||
|
@ -1,23 +0,0 @@
|
|||||||
backend: bert-embeddings
|
|
||||||
embeddings: true
|
|
||||||
f16: true
|
|
||||||
|
|
||||||
gpu_layers: 90
|
|
||||||
mmap: true
|
|
||||||
name: bert-cpp-minilm-v6
|
|
||||||
|
|
||||||
parameters:
|
|
||||||
model: bert-MiniLM-L6-v2q4_0.bin
|
|
||||||
|
|
||||||
download_files:
|
|
||||||
- filename: "bert-MiniLM-L6-v2q4_0.bin"
|
|
||||||
sha256: "a5a174d8772c8a569faf9f3136c441f2c3855b5bf35ed32274294219533feaad"
|
|
||||||
uri: "https://huggingface.co/mudler/all-MiniLM-L6-v2/resolve/main/ggml-model-q4_0.bin"
|
|
||||||
|
|
||||||
usage: |
|
|
||||||
You can test this model with curl like this:
|
|
||||||
|
|
||||||
curl http://localhost:8080/embeddings -X POST -H "Content-Type: application/json" -d '{
|
|
||||||
"input": "Your text string goes here",
|
|
||||||
"model": "bert-cpp-minilm-v6"
|
|
||||||
}'
|
|
@ -1,12 +0,0 @@
|
|||||||
---
|
|
||||||
name: "bert-embeddings"
|
|
||||||
|
|
||||||
config_file: |
|
|
||||||
parameters:
|
|
||||||
model: bert-MiniLM-L6-v2q4_0.bin
|
|
||||||
backend: bert-embeddings
|
|
||||||
embeddings: true
|
|
||||||
files:
|
|
||||||
- filename: "bert-MiniLM-L6-v2q4_0.bin"
|
|
||||||
sha256: "a5a174d8772c8a569faf9f3136c441f2c3855b5bf35ed32274294219533feaad"
|
|
||||||
uri: "https://huggingface.co/mudler/all-MiniLM-L6-v2/resolve/main/ggml-model-q4_0.bin"
|
|
@ -380,6 +380,7 @@
|
|||||||
urls:
|
urls:
|
||||||
- https://huggingface.co/hugging-quants/Llama-3.2-1B-Instruct-Q4_K_M-GGUF
|
- https://huggingface.co/hugging-quants/Llama-3.2-1B-Instruct-Q4_K_M-GGUF
|
||||||
overrides:
|
overrides:
|
||||||
|
embeddings: true
|
||||||
parameters:
|
parameters:
|
||||||
model: llama-3.2-1b-instruct-q4_k_m.gguf
|
model: llama-3.2-1b-instruct-q4_k_m.gguf
|
||||||
files:
|
files:
|
||||||
@ -8732,16 +8733,13 @@
|
|||||||
- filename: "ggml-model-whisper-tiny.en-q8_0.bin"
|
- filename: "ggml-model-whisper-tiny.en-q8_0.bin"
|
||||||
uri: "https://ggml.ggerganov.com/ggml-model-whisper-tiny.en-q8_0.bin"
|
uri: "https://ggml.ggerganov.com/ggml-model-whisper-tiny.en-q8_0.bin"
|
||||||
sha256: 5bc2b3860aa151a4c6e7bb095e1fcce7cf12c7b020ca08dcec0c6d018bb7dd94
|
sha256: 5bc2b3860aa151a4c6e7bb095e1fcce7cf12c7b020ca08dcec0c6d018bb7dd94
|
||||||
## Bert embeddings
|
## Bert embeddings (llama3.2 drop-in)
|
||||||
- url: "github:mudler/LocalAI/gallery/bert-embeddings.yaml@master"
|
- !!merge <<: *llama32
|
||||||
name: "bert-embeddings"
|
name: "bert-embeddings"
|
||||||
license: "Apache 2.0"
|
description: |
|
||||||
urls:
|
llama3.2 embeddings model. Using as drop-in replacement for bert-embeddings
|
||||||
- https://huggingface.co/skeskinen/ggml
|
|
||||||
tags:
|
tags:
|
||||||
- embeddings
|
- embeddings
|
||||||
description: |
|
|
||||||
Bert model that can be used for embeddings
|
|
||||||
## Stable Diffusion
|
## Stable Diffusion
|
||||||
- url: github:mudler/LocalAI/gallery/stablediffusion.yaml@master
|
- url: github:mudler/LocalAI/gallery/stablediffusion.yaml@master
|
||||||
license: "BSD-3"
|
license: "BSD-3"
|
||||||
|
@ -45,7 +45,6 @@ const (
|
|||||||
|
|
||||||
LLamaCPPGRPC = "llama-cpp-grpc"
|
LLamaCPPGRPC = "llama-cpp-grpc"
|
||||||
|
|
||||||
BertEmbeddingsBackend = "bert-embeddings"
|
|
||||||
WhisperBackend = "whisper"
|
WhisperBackend = "whisper"
|
||||||
StableDiffusionBackend = "stablediffusion"
|
StableDiffusionBackend = "stablediffusion"
|
||||||
TinyDreamBackend = "tinydream"
|
TinyDreamBackend = "tinydream"
|
||||||
@ -154,8 +153,6 @@ func orderBackends(backends map[string][]string) ([]string, error) {
|
|||||||
toTheEnd := []string{
|
toTheEnd := []string{
|
||||||
// last has to be huggingface
|
// last has to be huggingface
|
||||||
LCHuggingFaceBackend,
|
LCHuggingFaceBackend,
|
||||||
// then bert embeddings
|
|
||||||
BertEmbeddingsBackend,
|
|
||||||
}
|
}
|
||||||
|
|
||||||
// create an ordered map
|
// create an ordered map
|
||||||
|
@ -1,5 +1,4 @@
|
|||||||
name: text-embedding-ada-002
|
name: text-embedding-ada-002
|
||||||
parameters:
|
|
||||||
model: bert
|
|
||||||
backend: bert-embeddings
|
|
||||||
embeddings: true
|
embeddings: true
|
||||||
|
parameters:
|
||||||
|
model: huggingface://hugging-quants/Llama-3.2-1B-Instruct-Q4_K_M-GGUF/llama-3.2-1b-instruct-q4_k_m.gguf
|
Loading…
Reference in New Issue
Block a user