mirror of
https://github.com/mudler/LocalAI.git
synced 2024-12-18 20:27:57 +00:00
feat(backends): Drop bert.cpp (#4272)
* feat(backends): Drop bert.cpp use llama.cpp 3.2 as a drop-in replacement for bert.cpp Signed-off-by: Ettore Di Giacinto <mudler@localai.io> * chore(tests): make test more robust Signed-off-by: Ettore Di Giacinto <mudler@localai.io> --------- Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
This commit is contained in:
parent
1688ba7f2a
commit
3c3050f68e
30
Makefile
30
Makefile
@ -14,10 +14,6 @@ CPPLLAMA_VERSION?=30ec39832165627dd6ed98938df63adfc6e6a21a
|
||||
WHISPER_REPO?=https://github.com/ggerganov/whisper.cpp
|
||||
WHISPER_CPP_VERSION?=6266a9f9e56a5b925e9892acf650f3eb1245814d
|
||||
|
||||
# bert.cpp version
|
||||
BERT_REPO?=https://github.com/go-skynet/go-bert.cpp
|
||||
BERT_VERSION?=710044b124545415f555e4260d16b146c725a6e4
|
||||
|
||||
# go-piper version
|
||||
PIPER_REPO?=https://github.com/mudler/go-piper
|
||||
PIPER_VERSION?=e10ca041a885d4a8f3871d52924b47792d5e5aa0
|
||||
@ -198,7 +194,6 @@ ifeq ($(findstring tts,$(GO_TAGS)),tts)
|
||||
endif
|
||||
|
||||
ALL_GRPC_BACKENDS=backend-assets/grpc/huggingface
|
||||
ALL_GRPC_BACKENDS+=backend-assets/grpc/bert-embeddings
|
||||
ALL_GRPC_BACKENDS+=backend-assets/grpc/llama-cpp-avx
|
||||
ALL_GRPC_BACKENDS+=backend-assets/grpc/llama-cpp-avx2
|
||||
ALL_GRPC_BACKENDS+=backend-assets/grpc/llama-cpp-fallback
|
||||
@ -228,19 +223,6 @@ endif
|
||||
|
||||
all: help
|
||||
|
||||
## BERT embeddings
|
||||
sources/go-bert.cpp:
|
||||
mkdir -p sources/go-bert.cpp
|
||||
cd sources/go-bert.cpp && \
|
||||
git init && \
|
||||
git remote add origin $(BERT_REPO) && \
|
||||
git fetch origin && \
|
||||
git checkout $(BERT_VERSION) && \
|
||||
git submodule update --init --recursive --depth 1 --single-branch
|
||||
|
||||
sources/go-bert.cpp/libgobert.a: sources/go-bert.cpp
|
||||
$(MAKE) -C sources/go-bert.cpp libgobert.a
|
||||
|
||||
## go-llama.cpp
|
||||
sources/go-llama.cpp:
|
||||
mkdir -p sources/go-llama.cpp
|
||||
@ -320,12 +302,11 @@ sources/whisper.cpp:
|
||||
sources/whisper.cpp/libwhisper.a: sources/whisper.cpp
|
||||
cd sources/whisper.cpp && $(MAKE) libwhisper.a libggml.a
|
||||
|
||||
get-sources: sources/go-llama.cpp sources/go-piper sources/whisper.cpp sources/go-bert.cpp sources/go-stable-diffusion sources/go-tiny-dream backend/cpp/llama/llama.cpp
|
||||
get-sources: sources/go-llama.cpp sources/go-piper sources/whisper.cpp sources/go-stable-diffusion sources/go-tiny-dream backend/cpp/llama/llama.cpp
|
||||
|
||||
replace:
|
||||
$(GOCMD) mod edit -replace github.com/ggerganov/whisper.cpp=$(CURDIR)/sources/whisper.cpp
|
||||
$(GOCMD) mod edit -replace github.com/ggerganov/whisper.cpp/bindings/go=$(CURDIR)/sources/whisper.cpp/bindings/go
|
||||
$(GOCMD) mod edit -replace github.com/go-skynet/go-bert.cpp=$(CURDIR)/sources/go-bert.cpp
|
||||
$(GOCMD) mod edit -replace github.com/M0Rf30/go-tiny-dream=$(CURDIR)/sources/go-tiny-dream
|
||||
$(GOCMD) mod edit -replace github.com/mudler/go-piper=$(CURDIR)/sources/go-piper
|
||||
$(GOCMD) mod edit -replace github.com/mudler/go-stable-diffusion=$(CURDIR)/sources/go-stable-diffusion
|
||||
@ -334,7 +315,6 @@ replace:
|
||||
dropreplace:
|
||||
$(GOCMD) mod edit -dropreplace github.com/ggerganov/whisper.cpp
|
||||
$(GOCMD) mod edit -dropreplace github.com/ggerganov/whisper.cpp/bindings/go
|
||||
$(GOCMD) mod edit -dropreplace github.com/go-skynet/go-bert.cpp
|
||||
$(GOCMD) mod edit -dropreplace github.com/M0Rf30/go-tiny-dream
|
||||
$(GOCMD) mod edit -dropreplace github.com/mudler/go-piper
|
||||
$(GOCMD) mod edit -dropreplace github.com/mudler/go-stable-diffusion
|
||||
@ -349,7 +329,6 @@ rebuild: ## Rebuilds the project
|
||||
$(MAKE) -C sources/go-llama.cpp clean
|
||||
$(MAKE) -C sources/whisper.cpp clean
|
||||
$(MAKE) -C sources/go-stable-diffusion clean
|
||||
$(MAKE) -C sources/go-bert.cpp clean
|
||||
$(MAKE) -C sources/go-piper clean
|
||||
$(MAKE) -C sources/go-tiny-dream clean
|
||||
$(MAKE) build
|
||||
@ -707,13 +686,6 @@ backend-assets/espeak-ng-data: sources/go-piper sources/go-piper/libpiper_bindin
|
||||
backend-assets/grpc: protogen-go replace
|
||||
mkdir -p backend-assets/grpc
|
||||
|
||||
backend-assets/grpc/bert-embeddings: sources/go-bert.cpp sources/go-bert.cpp/libgobert.a backend-assets/grpc
|
||||
CGO_LDFLAGS="$(CGO_LDFLAGS)" C_INCLUDE_PATH=$(CURDIR)/sources/go-bert.cpp LIBRARY_PATH=$(CURDIR)/sources/go-bert.cpp \
|
||||
$(GOCMD) build -ldflags "$(LD_FLAGS)" -tags "$(GO_TAGS)" -o backend-assets/grpc/bert-embeddings ./backend/go/llm/bert/
|
||||
ifneq ($(UPX),)
|
||||
$(UPX) backend-assets/grpc/bert-embeddings
|
||||
endif
|
||||
|
||||
backend-assets/grpc/huggingface: backend-assets/grpc
|
||||
$(GOCMD) build -ldflags "$(LD_FLAGS)" -tags "$(GO_TAGS)" -o backend-assets/grpc/huggingface ./backend/go/llm/langchain/
|
||||
ifneq ($(UPX),)
|
||||
|
@ -1,7 +1,7 @@
|
||||
name: text-embedding-ada-002
|
||||
backend: bert-embeddings
|
||||
embeddings: true
|
||||
parameters:
|
||||
model: huggingface://mudler/all-MiniLM-L6-v2/ggml-model-q4_0.bin
|
||||
model: huggingface://hugging-quants/Llama-3.2-1B-Instruct-Q4_K_M-GGUF/llama-3.2-1b-instruct-q4_k_m.gguf
|
||||
|
||||
usage: |
|
||||
You can test this model with curl like this:
|
||||
|
@ -1,34 +0,0 @@
|
||||
package main
|
||||
|
||||
// This is a wrapper to statisfy the GRPC service interface
|
||||
// It is meant to be used by the main executable that is the server for the specific backend type (falcon, gpt3, etc)
|
||||
import (
|
||||
bert "github.com/go-skynet/go-bert.cpp"
|
||||
|
||||
"github.com/mudler/LocalAI/pkg/grpc/base"
|
||||
pb "github.com/mudler/LocalAI/pkg/grpc/proto"
|
||||
)
|
||||
|
||||
type Embeddings struct {
|
||||
base.SingleThread
|
||||
bert *bert.Bert
|
||||
}
|
||||
|
||||
func (llm *Embeddings) Load(opts *pb.ModelOptions) error {
|
||||
model, err := bert.New(opts.ModelFile)
|
||||
llm.bert = model
|
||||
return err
|
||||
}
|
||||
|
||||
func (llm *Embeddings) Embeddings(opts *pb.PredictOptions) ([]float32, error) {
|
||||
|
||||
if len(opts.EmbeddingTokens) > 0 {
|
||||
tokens := []int{}
|
||||
for _, t := range opts.EmbeddingTokens {
|
||||
tokens = append(tokens, int(t))
|
||||
}
|
||||
return llm.bert.TokenEmbeddings(tokens, bert.SetThreads(int(opts.Threads)))
|
||||
}
|
||||
|
||||
return llm.bert.Embeddings(opts.Embeddings, bert.SetThreads(int(opts.Threads)))
|
||||
}
|
@ -1,21 +0,0 @@
|
||||
package main
|
||||
|
||||
// Note: this is started internally by LocalAI and a server is allocated for each model
|
||||
|
||||
import (
|
||||
"flag"
|
||||
|
||||
grpc "github.com/mudler/LocalAI/pkg/grpc"
|
||||
)
|
||||
|
||||
var (
|
||||
addr = flag.String("addr", "localhost:50051", "the address to connect to")
|
||||
)
|
||||
|
||||
func main() {
|
||||
flag.Parse()
|
||||
|
||||
if err := grpc.StartServer(*addr, &Embeddings{}); err != nil {
|
||||
panic(err)
|
||||
}
|
||||
}
|
@ -12,6 +12,8 @@ import (
|
||||
"gopkg.in/yaml.v3"
|
||||
)
|
||||
|
||||
const bertEmbeddingsURL = `https://gist.githubusercontent.com/mudler/0a080b166b87640e8644b09c2aee6e3b/raw/f0e8c26bb72edc16d9fbafbfd6638072126ff225/bert-embeddings-gallery.yaml`
|
||||
|
||||
var _ = Describe("Model test", func() {
|
||||
|
||||
Context("Downloading", func() {
|
||||
@ -47,7 +49,7 @@ var _ = Describe("Model test", func() {
|
||||
|
||||
gallery := []GalleryModel{{
|
||||
Name: "bert",
|
||||
URL: "https://raw.githubusercontent.com/go-skynet/model-gallery/main/bert-embeddings.yaml",
|
||||
URL: bertEmbeddingsURL,
|
||||
}}
|
||||
out, err := yaml.Marshal(gallery)
|
||||
Expect(err).ToNot(HaveOccurred())
|
||||
@ -66,7 +68,7 @@ var _ = Describe("Model test", func() {
|
||||
Expect(err).ToNot(HaveOccurred())
|
||||
Expect(len(models)).To(Equal(1))
|
||||
Expect(models[0].Name).To(Equal("bert"))
|
||||
Expect(models[0].URL).To(Equal("https://raw.githubusercontent.com/go-skynet/model-gallery/main/bert-embeddings.yaml"))
|
||||
Expect(models[0].URL).To(Equal(bertEmbeddingsURL))
|
||||
Expect(models[0].Installed).To(BeFalse())
|
||||
|
||||
err = InstallModelFromGallery(galleries, "test@bert", tempdir, GalleryModel{}, func(s1, s2, s3 string, f float64) {}, true)
|
||||
@ -78,7 +80,7 @@ var _ = Describe("Model test", func() {
|
||||
content := map[string]interface{}{}
|
||||
err = yaml.Unmarshal(dat, &content)
|
||||
Expect(err).ToNot(HaveOccurred())
|
||||
Expect(content["backend"]).To(Equal("bert-embeddings"))
|
||||
Expect(content["usage"]).To(ContainSubstring("You can test this model with curl like this"))
|
||||
|
||||
models, err = AvailableGalleryModels(galleries, tempdir)
|
||||
Expect(err).ToNot(HaveOccurred())
|
||||
|
@ -240,6 +240,8 @@ func postInvalidRequest(url string) (error, int) {
|
||||
return nil, resp.StatusCode
|
||||
}
|
||||
|
||||
const bertEmbeddingsURL = `https://gist.githubusercontent.com/mudler/0a080b166b87640e8644b09c2aee6e3b/raw/f0e8c26bb72edc16d9fbafbfd6638072126ff225/bert-embeddings-gallery.yaml`
|
||||
|
||||
//go:embed backend-assets/*
|
||||
var backendAssets embed.FS
|
||||
|
||||
@ -279,13 +281,13 @@ var _ = Describe("API test", func() {
|
||||
g := []gallery.GalleryModel{
|
||||
{
|
||||
Name: "bert",
|
||||
URL: "https://raw.githubusercontent.com/go-skynet/model-gallery/main/bert-embeddings.yaml",
|
||||
URL: bertEmbeddingsURL,
|
||||
},
|
||||
{
|
||||
Name: "bert2",
|
||||
URL: "https://raw.githubusercontent.com/go-skynet/model-gallery/main/bert-embeddings.yaml",
|
||||
URL: bertEmbeddingsURL,
|
||||
Overrides: map[string]interface{}{"foo": "bar"},
|
||||
AdditionalFiles: []gallery.File{{Filename: "foo.yaml", URI: "https://raw.githubusercontent.com/go-skynet/model-gallery/main/bert-embeddings.yaml"}},
|
||||
AdditionalFiles: []gallery.File{{Filename: "foo.yaml", URI: bertEmbeddingsURL}},
|
||||
},
|
||||
}
|
||||
out, err := yaml.Marshal(g)
|
||||
@ -383,7 +385,7 @@ var _ = Describe("API test", func() {
|
||||
content := map[string]interface{}{}
|
||||
err = yaml.Unmarshal(dat, &content)
|
||||
Expect(err).ToNot(HaveOccurred())
|
||||
Expect(content["backend"]).To(Equal("bert-embeddings"))
|
||||
Expect(content["usage"]).To(ContainSubstring("You can test this model with curl like this"))
|
||||
Expect(content["foo"]).To(Equal("bar"))
|
||||
|
||||
models, err = getModels("http://127.0.0.1:9090/models/available")
|
||||
@ -402,7 +404,7 @@ var _ = Describe("API test", func() {
|
||||
It("overrides models", func() {
|
||||
|
||||
response := postModelApplyRequest("http://127.0.0.1:9090/models/apply", modelApplyRequest{
|
||||
URL: "https://raw.githubusercontent.com/go-skynet/model-gallery/main/bert-embeddings.yaml",
|
||||
URL: bertEmbeddingsURL,
|
||||
Name: "bert",
|
||||
Overrides: map[string]interface{}{
|
||||
"backend": "llama",
|
||||
@ -451,7 +453,7 @@ var _ = Describe("API test", func() {
|
||||
})
|
||||
It("apply models without overrides", func() {
|
||||
response := postModelApplyRequest("http://127.0.0.1:9090/models/apply", modelApplyRequest{
|
||||
URL: "https://raw.githubusercontent.com/go-skynet/model-gallery/main/bert-embeddings.yaml",
|
||||
URL: bertEmbeddingsURL,
|
||||
Name: "bert",
|
||||
Overrides: map[string]interface{}{},
|
||||
})
|
||||
@ -471,7 +473,7 @@ var _ = Describe("API test", func() {
|
||||
content := map[string]interface{}{}
|
||||
err = yaml.Unmarshal(dat, &content)
|
||||
Expect(err).ToNot(HaveOccurred())
|
||||
Expect(content["backend"]).To(Equal("bert-embeddings"))
|
||||
Expect(content["usage"]).To(ContainSubstring("You can test this model with curl like this"))
|
||||
})
|
||||
|
||||
It("runs openllama(llama-ggml backend)", Label("llama"), func() {
|
||||
@ -806,7 +808,7 @@ var _ = Describe("API test", func() {
|
||||
It("returns the models list", func() {
|
||||
models, err := client.ListModels(context.TODO())
|
||||
Expect(err).ToNot(HaveOccurred())
|
||||
Expect(len(models.Models)).To(Equal(6)) // If "config.yaml" should be included, this should be 8?
|
||||
Expect(len(models.Models)).To(Equal(7)) // If "config.yaml" should be included, this should be 8?
|
||||
})
|
||||
It("can generate completions via ggml", func() {
|
||||
resp, err := client.CreateCompletion(context.TODO(), openai.CompletionRequest{Model: "testmodel.ggml", Prompt: testPrompt})
|
||||
@ -866,8 +868,8 @@ var _ = Describe("API test", func() {
|
||||
},
|
||||
)
|
||||
Expect(err).ToNot(HaveOccurred(), err)
|
||||
Expect(len(resp.Data[0].Embedding)).To(BeNumerically("==", 384))
|
||||
Expect(len(resp.Data[1].Embedding)).To(BeNumerically("==", 384))
|
||||
Expect(len(resp.Data[0].Embedding)).To(BeNumerically("==", 2048))
|
||||
Expect(len(resp.Data[1].Embedding)).To(BeNumerically("==", 2048))
|
||||
|
||||
sunEmbedding := resp.Data[0].Embedding
|
||||
resp2, err := client.CreateEmbeddings(
|
||||
@ -951,7 +953,7 @@ var _ = Describe("API test", func() {
|
||||
openai.ChatCompletionRequest{Model: "rwkv_test", Messages: []openai.ChatCompletionMessage{{Content: "Can you count up to five?", Role: "user"}}})
|
||||
Expect(err).ToNot(HaveOccurred())
|
||||
Expect(len(resp.Choices) > 0).To(BeTrue())
|
||||
Expect(strings.ToLower(resp.Choices[0].Message.Content)).To(Or(ContainSubstring("sure"), ContainSubstring("five")))
|
||||
Expect(strings.ToLower(resp.Choices[0].Message.Content)).To(Or(ContainSubstring("sure"), ContainSubstring("five"), ContainSubstring("5")))
|
||||
|
||||
stream, err := client.CreateChatCompletionStream(context.TODO(), openai.ChatCompletionRequest{Model: "rwkv_test", Messages: []openai.ChatCompletionMessage{{Content: "Can you count up to five?", Role: "user"}}})
|
||||
Expect(err).ToNot(HaveOccurred())
|
||||
|
@ -27,39 +27,6 @@ embeddings: true
|
||||
# .. other parameters
|
||||
```
|
||||
|
||||
## Bert embeddings
|
||||
|
||||
To use `bert.cpp` models you can use the `bert` embedding backend.
|
||||
|
||||
An example model config file:
|
||||
|
||||
```yaml
|
||||
name: text-embedding-ada-002
|
||||
parameters:
|
||||
model: bert
|
||||
backend: bert-embeddings
|
||||
embeddings: true
|
||||
# .. other parameters
|
||||
```
|
||||
|
||||
The `bert` backend uses [bert.cpp](https://github.com/skeskinen/bert.cpp) and uses `ggml` models.
|
||||
|
||||
For instance you can download the `ggml` quantized version of `all-MiniLM-L6-v2` from https://huggingface.co/skeskinen/ggml:
|
||||
|
||||
```bash
|
||||
wget https://huggingface.co/skeskinen/ggml/resolve/main/all-MiniLM-L6-v2/ggml-model-q4_0.bin -O models/bert
|
||||
```
|
||||
|
||||
To test locally (LocalAI server running on `localhost`),
|
||||
you can use `curl` (and `jq` at the end to prettify):
|
||||
|
||||
```bash
|
||||
curl http://localhost:8080/embeddings -X POST -H "Content-Type: application/json" -d '{
|
||||
"input": "Your text string goes here",
|
||||
"model": "text-embedding-ada-002"
|
||||
}' | jq "."
|
||||
```
|
||||
|
||||
## Huggingface embeddings
|
||||
|
||||
To use `sentence-transformers` and models in `huggingface` you can use the `sentencetransformers` embedding backend.
|
||||
@ -87,17 +54,26 @@ The `sentencetransformers` backend uses Python [sentence-transformers](https://g
|
||||
|
||||
## Llama.cpp embeddings
|
||||
|
||||
Embeddings with `llama.cpp` are supported with the `llama` backend.
|
||||
Embeddings with `llama.cpp` are supported with the `llama-cpp` backend, it needs to be enabled with `embeddings` set to `true`.
|
||||
|
||||
```yaml
|
||||
name: my-awesome-model
|
||||
backend: llama
|
||||
backend: llama-cpp
|
||||
embeddings: true
|
||||
parameters:
|
||||
model: ggml-file.bin
|
||||
# ...
|
||||
```
|
||||
|
||||
Then you can use the API to generate embeddings:
|
||||
|
||||
```bash
|
||||
curl http://localhost:8080/embeddings -X POST -H "Content-Type: application/json" -d '{
|
||||
"input": "My text",
|
||||
"model": "my-awesome-model"
|
||||
}' | jq "."
|
||||
```
|
||||
|
||||
## 💡 Examples
|
||||
|
||||
- Example that uses LLamaIndex and LocalAI as embedding: [here](https://github.com/go-skynet/LocalAI/tree/master/examples/query_data/).
|
||||
|
@ -300,7 +300,7 @@ curl $LOCALAI/models/apply -H "Content-Type: application/json" -d '{
|
||||
|
||||
```bash
|
||||
curl $LOCALAI/models/apply -H "Content-Type: application/json" -d '{
|
||||
"url": "github:mudler/LocalAI/gallery/bert-embeddings.yaml",
|
||||
"id": "bert-embeddings",
|
||||
"name": "text-embedding-ada-002"
|
||||
}'
|
||||
```
|
||||
|
@ -1,23 +0,0 @@
|
||||
backend: bert-embeddings
|
||||
embeddings: true
|
||||
f16: true
|
||||
|
||||
gpu_layers: 90
|
||||
mmap: true
|
||||
name: bert-cpp-minilm-v6
|
||||
|
||||
parameters:
|
||||
model: bert-MiniLM-L6-v2q4_0.bin
|
||||
|
||||
download_files:
|
||||
- filename: "bert-MiniLM-L6-v2q4_0.bin"
|
||||
sha256: "a5a174d8772c8a569faf9f3136c441f2c3855b5bf35ed32274294219533feaad"
|
||||
uri: "https://huggingface.co/mudler/all-MiniLM-L6-v2/resolve/main/ggml-model-q4_0.bin"
|
||||
|
||||
usage: |
|
||||
You can test this model with curl like this:
|
||||
|
||||
curl http://localhost:8080/embeddings -X POST -H "Content-Type: application/json" -d '{
|
||||
"input": "Your text string goes here",
|
||||
"model": "bert-cpp-minilm-v6"
|
||||
}'
|
@ -1,12 +0,0 @@
|
||||
---
|
||||
name: "bert-embeddings"
|
||||
|
||||
config_file: |
|
||||
parameters:
|
||||
model: bert-MiniLM-L6-v2q4_0.bin
|
||||
backend: bert-embeddings
|
||||
embeddings: true
|
||||
files:
|
||||
- filename: "bert-MiniLM-L6-v2q4_0.bin"
|
||||
sha256: "a5a174d8772c8a569faf9f3136c441f2c3855b5bf35ed32274294219533feaad"
|
||||
uri: "https://huggingface.co/mudler/all-MiniLM-L6-v2/resolve/main/ggml-model-q4_0.bin"
|
@ -380,6 +380,7 @@
|
||||
urls:
|
||||
- https://huggingface.co/hugging-quants/Llama-3.2-1B-Instruct-Q4_K_M-GGUF
|
||||
overrides:
|
||||
embeddings: true
|
||||
parameters:
|
||||
model: llama-3.2-1b-instruct-q4_k_m.gguf
|
||||
files:
|
||||
@ -8732,16 +8733,13 @@
|
||||
- filename: "ggml-model-whisper-tiny.en-q8_0.bin"
|
||||
uri: "https://ggml.ggerganov.com/ggml-model-whisper-tiny.en-q8_0.bin"
|
||||
sha256: 5bc2b3860aa151a4c6e7bb095e1fcce7cf12c7b020ca08dcec0c6d018bb7dd94
|
||||
## Bert embeddings
|
||||
- url: "github:mudler/LocalAI/gallery/bert-embeddings.yaml@master"
|
||||
## Bert embeddings (llama3.2 drop-in)
|
||||
- !!merge <<: *llama32
|
||||
name: "bert-embeddings"
|
||||
license: "Apache 2.0"
|
||||
urls:
|
||||
- https://huggingface.co/skeskinen/ggml
|
||||
description: |
|
||||
llama3.2 embeddings model. Using as drop-in replacement for bert-embeddings
|
||||
tags:
|
||||
- embeddings
|
||||
description: |
|
||||
Bert model that can be used for embeddings
|
||||
## Stable Diffusion
|
||||
- url: github:mudler/LocalAI/gallery/stablediffusion.yaml@master
|
||||
license: "BSD-3"
|
||||
|
@ -45,7 +45,6 @@ const (
|
||||
|
||||
LLamaCPPGRPC = "llama-cpp-grpc"
|
||||
|
||||
BertEmbeddingsBackend = "bert-embeddings"
|
||||
WhisperBackend = "whisper"
|
||||
StableDiffusionBackend = "stablediffusion"
|
||||
TinyDreamBackend = "tinydream"
|
||||
@ -154,8 +153,6 @@ func orderBackends(backends map[string][]string) ([]string, error) {
|
||||
toTheEnd := []string{
|
||||
// last has to be huggingface
|
||||
LCHuggingFaceBackend,
|
||||
// then bert embeddings
|
||||
BertEmbeddingsBackend,
|
||||
}
|
||||
|
||||
// create an ordered map
|
||||
|
@ -1,5 +1,4 @@
|
||||
name: text-embedding-ada-002
|
||||
parameters:
|
||||
model: bert
|
||||
backend: bert-embeddings
|
||||
embeddings: true
|
||||
parameters:
|
||||
model: huggingface://hugging-quants/Llama-3.2-1B-Instruct-Q4_K_M-GGUF/llama-3.2-1b-instruct-q4_k_m.gguf
|
Loading…
Reference in New Issue
Block a user