mirror of
https://github.com/mudler/LocalAI.git
synced 2024-12-18 20:27:57 +00:00
feat(llama.cpp): do not specify backends to autoload and add llama.cpp variants (#2232)
* feat(initializer): do not specify backends to autoload We can simply try to autoload the backends extracted in the asset dir. This will allow to build variants of the same backend (for e.g. with different instructions sets), so to have a single binary for all the variants. Signed-off-by: mudler <mudler@localai.io> * refactor(prepare): refactor out llama.cpp prepare steps Make it so are idempotent and that we can re-build Signed-off-by: mudler <mudler@localai.io> * [TEST] feat(build): build noavx version along Signed-off-by: mudler <mudler@localai.io> * build: make build parallel Signed-off-by: Ettore Di Giacinto <mudler@localai.io> * build: do not override CMAKE_ARGS Signed-off-by: Ettore Di Giacinto <mudler@localai.io> * build: add fallback variant Signed-off-by: Ettore Di Giacinto <mudler@localai.io> * Fixups Signed-off-by: Ettore Di Giacinto <mudler@localai.io> * fix(huggingface-langchain): fail if no token is set Signed-off-by: Ettore Di Giacinto <mudler@localai.io> * fix(huggingface-langchain): rename Signed-off-by: Ettore Di Giacinto <mudler@localai.io> * fix: do not autoload local-store Signed-off-by: Ettore Di Giacinto <mudler@localai.io> * fix: give priority between the listed backends Signed-off-by: Ettore Di Giacinto <mudler@localai.io> --------- Signed-off-by: mudler <mudler@localai.io> Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
This commit is contained in:
parent
fa10302dd2
commit
530bec9c64
41
Makefile
41
Makefile
@ -152,9 +152,11 @@ ifeq ($(findstring tts,$(GO_TAGS)),tts)
|
|||||||
OPTIONAL_GRPC+=backend-assets/grpc/piper
|
OPTIONAL_GRPC+=backend-assets/grpc/piper
|
||||||
endif
|
endif
|
||||||
|
|
||||||
ALL_GRPC_BACKENDS=backend-assets/grpc/langchain-huggingface
|
ALL_GRPC_BACKENDS=backend-assets/grpc/huggingface
|
||||||
ALL_GRPC_BACKENDS+=backend-assets/grpc/bert-embeddings
|
ALL_GRPC_BACKENDS+=backend-assets/grpc/bert-embeddings
|
||||||
ALL_GRPC_BACKENDS+=backend-assets/grpc/llama-cpp
|
ALL_GRPC_BACKENDS+=backend-assets/grpc/llama-cpp
|
||||||
|
ALL_GRPC_BACKENDS+=backend-assets/grpc/llama-cpp-noavx
|
||||||
|
ALL_GRPC_BACKENDS+=backend-assets/grpc/llama-cpp-fallback
|
||||||
ALL_GRPC_BACKENDS+=backend-assets/grpc/llama-ggml
|
ALL_GRPC_BACKENDS+=backend-assets/grpc/llama-ggml
|
||||||
ALL_GRPC_BACKENDS+=backend-assets/grpc/gpt4all
|
ALL_GRPC_BACKENDS+=backend-assets/grpc/gpt4all
|
||||||
ALL_GRPC_BACKENDS+=backend-assets/grpc/rwkv
|
ALL_GRPC_BACKENDS+=backend-assets/grpc/rwkv
|
||||||
@ -293,6 +295,7 @@ clean: ## Remove build related file
|
|||||||
rm -rf backend-assets/*
|
rm -rf backend-assets/*
|
||||||
$(MAKE) -C backend/cpp/grpc clean
|
$(MAKE) -C backend/cpp/grpc clean
|
||||||
$(MAKE) -C backend/cpp/llama clean
|
$(MAKE) -C backend/cpp/llama clean
|
||||||
|
rm -rf backend/cpp/llama-* || true
|
||||||
$(MAKE) dropreplace
|
$(MAKE) dropreplace
|
||||||
$(MAKE) protogen-clean
|
$(MAKE) protogen-clean
|
||||||
rmdir pkg/grpc/proto || true
|
rmdir pkg/grpc/proto || true
|
||||||
@ -311,7 +314,7 @@ build: prepare backend-assets grpcs ## Build the project
|
|||||||
CGO_LDFLAGS="$(CGO_LDFLAGS)" $(GOCMD) build -ldflags "$(LD_FLAGS)" -tags "$(GO_TAGS)" -o $(BINARY_NAME) ./
|
CGO_LDFLAGS="$(CGO_LDFLAGS)" $(GOCMD) build -ldflags "$(LD_FLAGS)" -tags "$(GO_TAGS)" -o $(BINARY_NAME) ./
|
||||||
|
|
||||||
build-minimal:
|
build-minimal:
|
||||||
BUILD_GRPC_FOR_BACKEND_LLAMA=true GRPC_BACKENDS=backend-assets/grpc/llama-cpp GO_TAGS=none $(MAKE) build
|
BUILD_GRPC_FOR_BACKEND_LLAMA=true GRPC_BACKENDS="backend-assets/grpc/llama-cpp" GO_TAGS=none $(MAKE) build
|
||||||
|
|
||||||
build-api:
|
build-api:
|
||||||
BUILD_GRPC_FOR_BACKEND_LLAMA=true BUILD_API_ONLY=true GO_TAGS=none $(MAKE) build
|
BUILD_GRPC_FOR_BACKEND_LLAMA=true BUILD_API_ONLY=true GO_TAGS=none $(MAKE) build
|
||||||
@ -616,8 +619,8 @@ backend-assets/grpc/gpt4all: sources/gpt4all sources/gpt4all/gpt4all-bindings/go
|
|||||||
CGO_LDFLAGS="$(CGO_LDFLAGS)" C_INCLUDE_PATH=$(CURDIR)/sources/gpt4all/gpt4all-bindings/golang/ LIBRARY_PATH=$(CURDIR)/sources/gpt4all/gpt4all-bindings/golang/ \
|
CGO_LDFLAGS="$(CGO_LDFLAGS)" C_INCLUDE_PATH=$(CURDIR)/sources/gpt4all/gpt4all-bindings/golang/ LIBRARY_PATH=$(CURDIR)/sources/gpt4all/gpt4all-bindings/golang/ \
|
||||||
$(GOCMD) build -ldflags "$(LD_FLAGS)" -tags "$(GO_TAGS)" -o backend-assets/grpc/gpt4all ./backend/go/llm/gpt4all/
|
$(GOCMD) build -ldflags "$(LD_FLAGS)" -tags "$(GO_TAGS)" -o backend-assets/grpc/gpt4all ./backend/go/llm/gpt4all/
|
||||||
|
|
||||||
backend-assets/grpc/langchain-huggingface: backend-assets/grpc
|
backend-assets/grpc/huggingface: backend-assets/grpc
|
||||||
$(GOCMD) build -ldflags "$(LD_FLAGS)" -tags "$(GO_TAGS)" -o backend-assets/grpc/langchain-huggingface ./backend/go/llm/langchain/
|
$(GOCMD) build -ldflags "$(LD_FLAGS)" -tags "$(GO_TAGS)" -o backend-assets/grpc/huggingface ./backend/go/llm/langchain/
|
||||||
|
|
||||||
backend/cpp/llama/llama.cpp:
|
backend/cpp/llama/llama.cpp:
|
||||||
LLAMA_VERSION=$(CPPLLAMA_VERSION) $(MAKE) -C backend/cpp/llama llama.cpp
|
LLAMA_VERSION=$(CPPLLAMA_VERSION) $(MAKE) -C backend/cpp/llama llama.cpp
|
||||||
@ -629,7 +632,7 @@ ADDED_CMAKE_ARGS=-Dabsl_DIR=${INSTALLED_LIB_CMAKE}/absl \
|
|||||||
-Dutf8_range_DIR=${INSTALLED_LIB_CMAKE}/utf8_range \
|
-Dutf8_range_DIR=${INSTALLED_LIB_CMAKE}/utf8_range \
|
||||||
-DgRPC_DIR=${INSTALLED_LIB_CMAKE}/grpc \
|
-DgRPC_DIR=${INSTALLED_LIB_CMAKE}/grpc \
|
||||||
-DCMAKE_CXX_STANDARD_INCLUDE_DIRECTORIES=${INSTALLED_PACKAGES}/include
|
-DCMAKE_CXX_STANDARD_INCLUDE_DIRECTORIES=${INSTALLED_PACKAGES}/include
|
||||||
backend/cpp/llama/grpc-server:
|
build-llama-cpp-grpc-server:
|
||||||
# Conditionally build grpc for the llama backend to use if needed
|
# Conditionally build grpc for the llama backend to use if needed
|
||||||
ifdef BUILD_GRPC_FOR_BACKEND_LLAMA
|
ifdef BUILD_GRPC_FOR_BACKEND_LLAMA
|
||||||
$(MAKE) -C backend/cpp/grpc build
|
$(MAKE) -C backend/cpp/grpc build
|
||||||
@ -638,19 +641,37 @@ ifdef BUILD_GRPC_FOR_BACKEND_LLAMA
|
|||||||
PATH="${INSTALLED_PACKAGES}/bin:${PATH}" \
|
PATH="${INSTALLED_PACKAGES}/bin:${PATH}" \
|
||||||
CMAKE_ARGS="${CMAKE_ARGS} ${ADDED_CMAKE_ARGS}" \
|
CMAKE_ARGS="${CMAKE_ARGS} ${ADDED_CMAKE_ARGS}" \
|
||||||
LLAMA_VERSION=$(CPPLLAMA_VERSION) \
|
LLAMA_VERSION=$(CPPLLAMA_VERSION) \
|
||||||
$(MAKE) -C backend/cpp/llama grpc-server
|
$(MAKE) -C backend/cpp/${VARIANT} grpc-server
|
||||||
else
|
else
|
||||||
echo "BUILD_GRPC_FOR_BACKEND_LLAMA is not defined."
|
echo "BUILD_GRPC_FOR_BACKEND_LLAMA is not defined."
|
||||||
LLAMA_VERSION=$(CPPLLAMA_VERSION) $(MAKE) -C backend/cpp/llama grpc-server
|
LLAMA_VERSION=$(CPPLLAMA_VERSION) $(MAKE) -C backend/cpp/${VARIANT} grpc-server
|
||||||
endif
|
endif
|
||||||
|
|
||||||
backend-assets/grpc/llama-cpp: backend-assets/grpc backend/cpp/llama/grpc-server
|
backend-assets/grpc/llama-cpp: backend-assets/grpc
|
||||||
cp -rfv backend/cpp/llama/grpc-server backend-assets/grpc/llama-cpp
|
$(info ${GREEN}I llama-cpp build info:standard${RESET})
|
||||||
|
cp -rf backend/cpp/llama backend/cpp/llama-default
|
||||||
|
$(MAKE) -C backend/cpp/llama-default purge
|
||||||
|
$(MAKE) VARIANT="llama-default" build-llama-cpp-grpc-server
|
||||||
|
cp -rfv backend/cpp/llama-default/grpc-server backend-assets/grpc/llama-cpp
|
||||||
# TODO: every binary should have its own folder instead, so can have different metal implementations
|
# TODO: every binary should have its own folder instead, so can have different metal implementations
|
||||||
ifeq ($(BUILD_TYPE),metal)
|
ifeq ($(BUILD_TYPE),metal)
|
||||||
cp backend/cpp/llama/llama.cpp/build/bin/default.metallib backend-assets/grpc/
|
cp backend/cpp/llama-default/llama.cpp/build/bin/default.metallib backend-assets/grpc/
|
||||||
endif
|
endif
|
||||||
|
|
||||||
|
backend-assets/grpc/llama-cpp-noavx: backend-assets/grpc
|
||||||
|
cp -rf backend/cpp/llama backend/cpp/llama-noavx
|
||||||
|
$(MAKE) -C backend/cpp/llama-noavx purge
|
||||||
|
$(info ${GREEN}I llama-cpp build info:noavx${RESET})
|
||||||
|
CMAKE_ARGS="$(CMAKE_ARGS) -DLLAMA_AVX512=OFF -DLLAMA_AVX2=OFF" $(MAKE) VARIANT="llama-noavx" build-llama-cpp-grpc-server
|
||||||
|
cp -rfv backend/cpp/llama-noavx/grpc-server backend-assets/grpc/llama-cpp-noavx
|
||||||
|
|
||||||
|
backend-assets/grpc/llama-cpp-fallback: backend-assets/grpc
|
||||||
|
cp -rf backend/cpp/llama backend/cpp/llama-fallback
|
||||||
|
$(MAKE) -C backend/cpp/llama-fallback purge
|
||||||
|
$(info ${GREEN}I llama-cpp build info:fallback${RESET})
|
||||||
|
CMAKE_ARGS="$(CMAKE_ARGS) -DLLAMA_F16C=OFF -DLLAMA_AVX512=OFF -DLLAMA_AVX2=OFF -DLLAMA_FMA=OFF" $(MAKE) VARIANT="llama-fallback" build-llama-cpp-grpc-server
|
||||||
|
cp -rfv backend/cpp/llama-fallback/grpc-server backend-assets/grpc/llama-cpp-fallback
|
||||||
|
|
||||||
backend-assets/grpc/llama-ggml: sources/go-llama.cpp sources/go-llama.cpp/libbinding.a backend-assets/grpc
|
backend-assets/grpc/llama-ggml: sources/go-llama.cpp sources/go-llama.cpp/libbinding.a backend-assets/grpc
|
||||||
CGO_LDFLAGS="$(CGO_LDFLAGS)" C_INCLUDE_PATH=$(CURDIR)/sources/go-llama.cpp LIBRARY_PATH=$(CURDIR)/sources/go-llama.cpp \
|
CGO_LDFLAGS="$(CGO_LDFLAGS)" C_INCLUDE_PATH=$(CURDIR)/sources/go-llama.cpp LIBRARY_PATH=$(CURDIR)/sources/go-llama.cpp \
|
||||||
$(GOCMD) build -ldflags "$(LD_FLAGS)" -tags "$(GO_TAGS)" -o backend-assets/grpc/llama-ggml ./backend/go/llm/llama-ggml/
|
$(GOCMD) build -ldflags "$(LD_FLAGS)" -tags "$(GO_TAGS)" -o backend-assets/grpc/llama-ggml ./backend/go/llm/llama-ggml/
|
||||||
|
@ -43,31 +43,23 @@ llama.cpp:
|
|||||||
|
|
||||||
llama.cpp/examples/grpc-server: llama.cpp
|
llama.cpp/examples/grpc-server: llama.cpp
|
||||||
mkdir -p llama.cpp/examples/grpc-server
|
mkdir -p llama.cpp/examples/grpc-server
|
||||||
cp -r $(abspath ./)/CMakeLists.txt llama.cpp/examples/grpc-server/
|
bash prepare.sh
|
||||||
cp -r $(abspath ./)/grpc-server.cpp llama.cpp/examples/grpc-server/
|
|
||||||
cp -rfv $(abspath ./)/json.hpp llama.cpp/examples/grpc-server/
|
|
||||||
cp -rfv $(abspath ./)/utils.hpp llama.cpp/examples/grpc-server/
|
|
||||||
echo "add_subdirectory(grpc-server)" >> llama.cpp/examples/CMakeLists.txt
|
|
||||||
## XXX: In some versions of CMake clip wasn't being built before llama.
|
|
||||||
## This is an hack for now, but it should be fixed in the future.
|
|
||||||
cp -rfv llama.cpp/examples/llava/clip.h llama.cpp/examples/grpc-server/clip.h
|
|
||||||
cp -rfv llama.cpp/examples/llava/llava.cpp llama.cpp/examples/grpc-server/llava.cpp
|
|
||||||
echo '#include "llama.h"' > llama.cpp/examples/grpc-server/llava.h
|
|
||||||
cat llama.cpp/examples/llava/llava.h >> llama.cpp/examples/grpc-server/llava.h
|
|
||||||
cp -rfv llama.cpp/examples/llava/clip.cpp llama.cpp/examples/grpc-server/clip.cpp
|
|
||||||
|
|
||||||
rebuild:
|
rebuild:
|
||||||
cp -rfv $(abspath ./)/CMakeLists.txt llama.cpp/examples/grpc-server/
|
bash prepare.sh
|
||||||
cp -rfv $(abspath ./)/grpc-server.cpp llama.cpp/examples/grpc-server/
|
|
||||||
cp -rfv $(abspath ./)/json.hpp llama.cpp/examples/grpc-server/
|
|
||||||
rm -rf grpc-server
|
rm -rf grpc-server
|
||||||
$(MAKE) grpc-server
|
$(MAKE) grpc-server
|
||||||
|
|
||||||
clean:
|
purge:
|
||||||
rm -rf llama.cpp
|
rm -rf llama.cpp/build
|
||||||
|
rm -rf llama.cpp/examples/grpc-server
|
||||||
rm -rf grpc-server
|
rm -rf grpc-server
|
||||||
|
|
||||||
|
clean: purge
|
||||||
|
rm -rf llama.cpp
|
||||||
|
|
||||||
grpc-server: llama.cpp llama.cpp/examples/grpc-server
|
grpc-server: llama.cpp llama.cpp/examples/grpc-server
|
||||||
|
@echo "Building grpc-server with $(BUILD_TYPE) build type and $(CMAKE_ARGS)"
|
||||||
ifneq (,$(findstring sycl,$(BUILD_TYPE)))
|
ifneq (,$(findstring sycl,$(BUILD_TYPE)))
|
||||||
bash -c "source $(ONEAPI_VARS); \
|
bash -c "source $(ONEAPI_VARS); \
|
||||||
cd llama.cpp && mkdir -p build && cd build && cmake .. $(CMAKE_ARGS) && cmake --build . --config Release"
|
cd llama.cpp && mkdir -p build && cd build && cmake .. $(CMAKE_ARGS) && cmake --build . --config Release"
|
||||||
|
20
backend/cpp/llama/prepare.sh
Normal file
20
backend/cpp/llama/prepare.sh
Normal file
@ -0,0 +1,20 @@
|
|||||||
|
#!/bin/bash
|
||||||
|
|
||||||
|
cp -r CMakeLists.txt llama.cpp/examples/grpc-server/
|
||||||
|
cp -r grpc-server.cpp llama.cpp/examples/grpc-server/
|
||||||
|
cp -rfv json.hpp llama.cpp/examples/grpc-server/
|
||||||
|
cp -rfv utils.hpp llama.cpp/examples/grpc-server/
|
||||||
|
|
||||||
|
if grep -q "grpc-server" llama.cpp/examples/CMakeLists.txt; then
|
||||||
|
echo "grpc-server already added"
|
||||||
|
else
|
||||||
|
echo "add_subdirectory(grpc-server)" >> llama.cpp/examples/CMakeLists.txt
|
||||||
|
fi
|
||||||
|
|
||||||
|
## XXX: In some versions of CMake clip wasn't being built before llama.
|
||||||
|
## This is an hack for now, but it should be fixed in the future.
|
||||||
|
cp -rfv llama.cpp/examples/llava/clip.h llama.cpp/examples/grpc-server/clip.h
|
||||||
|
cp -rfv llama.cpp/examples/llava/llava.cpp llama.cpp/examples/grpc-server/llava.cpp
|
||||||
|
echo '#include "llama.h"' > llama.cpp/examples/grpc-server/llava.h
|
||||||
|
cat llama.cpp/examples/llava/llava.h >> llama.cpp/examples/grpc-server/llava.h
|
||||||
|
cp -rfv llama.cpp/examples/llava/clip.cpp llama.cpp/examples/grpc-server/clip.cpp
|
@ -4,6 +4,7 @@ package main
|
|||||||
// It is meant to be used by the main executable that is the server for the specific backend type (falcon, gpt3, etc)
|
// It is meant to be used by the main executable that is the server for the specific backend type (falcon, gpt3, etc)
|
||||||
import (
|
import (
|
||||||
"fmt"
|
"fmt"
|
||||||
|
"os"
|
||||||
|
|
||||||
"github.com/go-skynet/LocalAI/pkg/grpc/base"
|
"github.com/go-skynet/LocalAI/pkg/grpc/base"
|
||||||
pb "github.com/go-skynet/LocalAI/pkg/grpc/proto"
|
pb "github.com/go-skynet/LocalAI/pkg/grpc/proto"
|
||||||
@ -18,9 +19,14 @@ type LLM struct {
|
|||||||
}
|
}
|
||||||
|
|
||||||
func (llm *LLM) Load(opts *pb.ModelOptions) error {
|
func (llm *LLM) Load(opts *pb.ModelOptions) error {
|
||||||
llm.langchain, _ = langchain.NewHuggingFace(opts.Model)
|
var err error
|
||||||
|
hfToken := os.Getenv("HUGGINGFACEHUB_API_TOKEN")
|
||||||
|
if hfToken == "" {
|
||||||
|
return fmt.Errorf("no huggingface token provided")
|
||||||
|
}
|
||||||
|
llm.langchain, err = langchain.NewHuggingFace(opts.Model, hfToken)
|
||||||
llm.model = opts.Model
|
llm.model = opts.Model
|
||||||
return nil
|
return err
|
||||||
}
|
}
|
||||||
|
|
||||||
func (llm *LLM) Predict(opts *pb.PredictOptions) (string, error) {
|
func (llm *LLM) Predict(opts *pb.PredictOptions) (string, error) {
|
||||||
|
@ -787,11 +787,11 @@ var _ = Describe("API test", func() {
|
|||||||
})
|
})
|
||||||
|
|
||||||
It("returns errors", func() {
|
It("returns errors", func() {
|
||||||
backends := len(model.AutoLoadBackends) + 1 // +1 for huggingface
|
|
||||||
_, err := client.CreateCompletion(context.TODO(), openai.CompletionRequest{Model: "foomodel", Prompt: testPrompt})
|
_, err := client.CreateCompletion(context.TODO(), openai.CompletionRequest{Model: "foomodel", Prompt: testPrompt})
|
||||||
Expect(err).To(HaveOccurred())
|
Expect(err).To(HaveOccurred())
|
||||||
Expect(err.Error()).To(ContainSubstring(fmt.Sprintf("error, status code: 500, message: could not load model - all backends returned error: %d errors occurred:", backends)))
|
Expect(err.Error()).To(ContainSubstring("error, status code: 500, message: could not load model - all backends returned error:"))
|
||||||
})
|
})
|
||||||
|
|
||||||
It("transcribes audio", func() {
|
It("transcribes audio", func() {
|
||||||
if runtime.GOOS != "linux" {
|
if runtime.GOOS != "linux" {
|
||||||
Skip("test supported only on linux")
|
Skip("test supported only on linux")
|
||||||
|
@ -2,6 +2,7 @@ package langchain
|
|||||||
|
|
||||||
import (
|
import (
|
||||||
"context"
|
"context"
|
||||||
|
"fmt"
|
||||||
|
|
||||||
"github.com/tmc/langchaingo/llms"
|
"github.com/tmc/langchaingo/llms"
|
||||||
"github.com/tmc/langchaingo/llms/huggingface"
|
"github.com/tmc/langchaingo/llms/huggingface"
|
||||||
@ -9,11 +10,16 @@ import (
|
|||||||
|
|
||||||
type HuggingFace struct {
|
type HuggingFace struct {
|
||||||
modelPath string
|
modelPath string
|
||||||
|
token string
|
||||||
}
|
}
|
||||||
|
|
||||||
func NewHuggingFace(repoId string) (*HuggingFace, error) {
|
func NewHuggingFace(repoId, token string) (*HuggingFace, error) {
|
||||||
|
if token == "" {
|
||||||
|
return nil, fmt.Errorf("no huggingface token provided")
|
||||||
|
}
|
||||||
return &HuggingFace{
|
return &HuggingFace{
|
||||||
modelPath: repoId,
|
modelPath: repoId,
|
||||||
|
token: token,
|
||||||
}, nil
|
}, nil
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -21,7 +27,7 @@ func (s *HuggingFace) PredictHuggingFace(text string, opts ...PredictOption) (*P
|
|||||||
po := NewPredictOptions(opts...)
|
po := NewPredictOptions(opts...)
|
||||||
|
|
||||||
// Init client
|
// Init client
|
||||||
llm, err := huggingface.New()
|
llm, err := huggingface.New(huggingface.WithToken(s.token))
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return nil, err
|
return nil, err
|
||||||
}
|
}
|
||||||
|
@ -2,27 +2,32 @@ package model
|
|||||||
|
|
||||||
import (
|
import (
|
||||||
"context"
|
"context"
|
||||||
|
"errors"
|
||||||
"fmt"
|
"fmt"
|
||||||
"os"
|
"os"
|
||||||
"path/filepath"
|
"path/filepath"
|
||||||
|
"slices"
|
||||||
"strings"
|
"strings"
|
||||||
"time"
|
"time"
|
||||||
|
|
||||||
grpc "github.com/go-skynet/LocalAI/pkg/grpc"
|
grpc "github.com/go-skynet/LocalAI/pkg/grpc"
|
||||||
"github.com/hashicorp/go-multierror"
|
|
||||||
"github.com/phayes/freeport"
|
"github.com/phayes/freeport"
|
||||||
"github.com/rs/zerolog/log"
|
"github.com/rs/zerolog/log"
|
||||||
)
|
)
|
||||||
|
|
||||||
var Aliases map[string]string = map[string]string{
|
var Aliases map[string]string = map[string]string{
|
||||||
"go-llama": LLamaCPP,
|
"go-llama": LLamaCPP,
|
||||||
"llama": LLamaCPP,
|
"llama": LLamaCPP,
|
||||||
"embedded-store": LocalStoreBackend,
|
"embedded-store": LocalStoreBackend,
|
||||||
|
"langchain-huggingface": LCHuggingFaceBackend,
|
||||||
}
|
}
|
||||||
|
|
||||||
const (
|
const (
|
||||||
LlamaGGML = "llama-ggml"
|
LlamaGGML = "llama-ggml"
|
||||||
LLamaCPP = "llama-cpp"
|
LLamaCPP = "llama-cpp"
|
||||||
|
|
||||||
|
LLamaCPPFallback = "llama-cpp-fallback"
|
||||||
|
|
||||||
Gpt4AllLlamaBackend = "gpt4all-llama"
|
Gpt4AllLlamaBackend = "gpt4all-llama"
|
||||||
Gpt4AllMptBackend = "gpt4all-mpt"
|
Gpt4AllMptBackend = "gpt4all-mpt"
|
||||||
Gpt4AllJBackend = "gpt4all-j"
|
Gpt4AllJBackend = "gpt4all-j"
|
||||||
@ -34,21 +39,73 @@ const (
|
|||||||
StableDiffusionBackend = "stablediffusion"
|
StableDiffusionBackend = "stablediffusion"
|
||||||
TinyDreamBackend = "tinydream"
|
TinyDreamBackend = "tinydream"
|
||||||
PiperBackend = "piper"
|
PiperBackend = "piper"
|
||||||
LCHuggingFaceBackend = "langchain-huggingface"
|
LCHuggingFaceBackend = "huggingface"
|
||||||
|
|
||||||
LocalStoreBackend = "local-store"
|
LocalStoreBackend = "local-store"
|
||||||
)
|
)
|
||||||
|
|
||||||
var AutoLoadBackends []string = []string{
|
func backendPath(assetDir, backend string) string {
|
||||||
LLamaCPP,
|
return filepath.Join(assetDir, "backend-assets", "grpc", backend)
|
||||||
LlamaGGML,
|
}
|
||||||
Gpt4All,
|
|
||||||
BertEmbeddingsBackend,
|
func backendsInAssetDir(assetDir string) ([]string, error) {
|
||||||
RwkvBackend,
|
excludeBackends := []string{"local-store"}
|
||||||
WhisperBackend,
|
entry, err := os.ReadDir(backendPath(assetDir, ""))
|
||||||
StableDiffusionBackend,
|
if err != nil {
|
||||||
TinyDreamBackend,
|
return nil, err
|
||||||
PiperBackend,
|
}
|
||||||
|
var backends []string
|
||||||
|
ENTRY:
|
||||||
|
for _, e := range entry {
|
||||||
|
for _, exclude := range excludeBackends {
|
||||||
|
if e.Name() == exclude {
|
||||||
|
continue ENTRY
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if !e.IsDir() {
|
||||||
|
backends = append(backends, e.Name())
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// order backends from the asset directory.
|
||||||
|
// as we scan for backends, we want to keep some order which backends are tried of.
|
||||||
|
// for example, llama.cpp should be tried first, and we want to keep the huggingface backend at the last.
|
||||||
|
|
||||||
|
// sets a priority list
|
||||||
|
// First has more priority
|
||||||
|
priorityList := []string{
|
||||||
|
// First llama.cpp and llama-ggml
|
||||||
|
LLamaCPP, LLamaCPPFallback, LlamaGGML, Gpt4All,
|
||||||
|
}
|
||||||
|
toTheEnd := []string{
|
||||||
|
// last has to be huggingface
|
||||||
|
LCHuggingFaceBackend,
|
||||||
|
// then bert embeddings
|
||||||
|
BertEmbeddingsBackend,
|
||||||
|
}
|
||||||
|
slices.Reverse(priorityList)
|
||||||
|
slices.Reverse(toTheEnd)
|
||||||
|
|
||||||
|
// order certain backends first
|
||||||
|
for _, b := range priorityList {
|
||||||
|
for i, be := range backends {
|
||||||
|
if be == b {
|
||||||
|
backends = append([]string{be}, append(backends[:i], backends[i+1:]...)...)
|
||||||
|
break
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
// make sure that some others are pushed at the end
|
||||||
|
for _, b := range toTheEnd {
|
||||||
|
for i, be := range backends {
|
||||||
|
if be == b {
|
||||||
|
backends = append(append(backends[:i], backends[i+1:]...), be)
|
||||||
|
break
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return backends, nil
|
||||||
}
|
}
|
||||||
|
|
||||||
// starts the grpcModelProcess for the backend, and returns a grpc client
|
// starts the grpcModelProcess for the backend, and returns a grpc client
|
||||||
@ -99,7 +156,7 @@ func (ml *ModelLoader) grpcModel(backend string, o *Options) func(string, string
|
|||||||
client = ModelAddress(uri)
|
client = ModelAddress(uri)
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
grpcProcess := filepath.Join(o.assetDir, "backend-assets", "grpc", backend)
|
grpcProcess := backendPath(o.assetDir, backend)
|
||||||
// Check if the file exists
|
// Check if the file exists
|
||||||
if _, err := os.Stat(grpcProcess); os.IsNotExist(err) {
|
if _, err := os.Stat(grpcProcess); os.IsNotExist(err) {
|
||||||
return "", fmt.Errorf("grpc process not found: %s. some backends(stablediffusion, tts) require LocalAI compiled with GO_TAGS", grpcProcess)
|
return "", fmt.Errorf("grpc process not found: %s. some backends(stablediffusion, tts) require LocalAI compiled with GO_TAGS", grpcProcess)
|
||||||
@ -243,7 +300,12 @@ func (ml *ModelLoader) GreedyLoader(opts ...Option) (grpc.Backend, error) {
|
|||||||
|
|
||||||
// autoload also external backends
|
// autoload also external backends
|
||||||
allBackendsToAutoLoad := []string{}
|
allBackendsToAutoLoad := []string{}
|
||||||
allBackendsToAutoLoad = append(allBackendsToAutoLoad, AutoLoadBackends...)
|
autoLoadBackends, err := backendsInAssetDir(o.assetDir)
|
||||||
|
if err != nil {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
log.Debug().Msgf("Loading from the following backends (in order): %+v", autoLoadBackends)
|
||||||
|
allBackendsToAutoLoad = append(allBackendsToAutoLoad, autoLoadBackends...)
|
||||||
for _, b := range o.externalBackends {
|
for _, b := range o.externalBackends {
|
||||||
allBackendsToAutoLoad = append(allBackendsToAutoLoad, b)
|
allBackendsToAutoLoad = append(allBackendsToAutoLoad, b)
|
||||||
}
|
}
|
||||||
@ -271,10 +333,10 @@ func (ml *ModelLoader) GreedyLoader(opts ...Option) (grpc.Backend, error) {
|
|||||||
log.Info().Msgf("[%s] Loads OK", b)
|
log.Info().Msgf("[%s] Loads OK", b)
|
||||||
return model, nil
|
return model, nil
|
||||||
} else if modelerr != nil {
|
} else if modelerr != nil {
|
||||||
err = multierror.Append(err, modelerr)
|
err = errors.Join(err, modelerr)
|
||||||
log.Info().Msgf("[%s] Fails: %s", b, modelerr.Error())
|
log.Info().Msgf("[%s] Fails: %s", b, modelerr.Error())
|
||||||
} else if model == nil {
|
} else if model == nil {
|
||||||
err = multierror.Append(err, fmt.Errorf("backend returned no usable model"))
|
err = errors.Join(err, fmt.Errorf("backend returned no usable model"))
|
||||||
log.Info().Msgf("[%s] Fails: %s", b, "backend returned no usable model")
|
log.Info().Msgf("[%s] Fails: %s", b, "backend returned no usable model")
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
Loading…
Reference in New Issue
Block a user