mirror of
https://github.com/mudler/LocalAI.git
synced 2024-12-18 20:27:57 +00:00
feat: auto select llama-cpp cpu variant (#2305)
* auto select cpu variant Signed-off-by: Sertac Ozercan <sozercan@gmail.com> * remove cuda target for now Signed-off-by: Sertac Ozercan <sozercan@gmail.com> * fix metal Signed-off-by: Sertac Ozercan <sozercan@gmail.com> * fix path Signed-off-by: Sertac Ozercan <sozercan@gmail.com> --------- Signed-off-by: Sertac Ozercan <sozercan@gmail.com>
This commit is contained in:
parent
b4cb22f444
commit
e2c3ffb09b
42
Makefile
42
Makefile
@ -70,7 +70,7 @@ UNAME_S := $(shell uname -s)
|
||||
endif
|
||||
|
||||
ifeq ($(OS),Darwin)
|
||||
|
||||
|
||||
ifeq ($(OSX_SIGNING_IDENTITY),)
|
||||
OSX_SIGNING_IDENTITY := $(shell security find-identity -v -p codesigning | grep '"' | head -n 1 | sed -E 's/.*"(.*)"/\1/')
|
||||
endif
|
||||
@ -154,8 +154,8 @@ endif
|
||||
|
||||
ALL_GRPC_BACKENDS=backend-assets/grpc/huggingface
|
||||
ALL_GRPC_BACKENDS+=backend-assets/grpc/bert-embeddings
|
||||
ALL_GRPC_BACKENDS+=backend-assets/grpc/llama-cpp
|
||||
ALL_GRPC_BACKENDS+=backend-assets/grpc/llama-cpp-noavx
|
||||
ALL_GRPC_BACKENDS+=backend-assets/grpc/llama-cpp-avx
|
||||
ALL_GRPC_BACKENDS+=backend-assets/grpc/llama-cpp-avx2
|
||||
ALL_GRPC_BACKENDS+=backend-assets/grpc/llama-cpp-fallback
|
||||
ALL_GRPC_BACKENDS+=backend-assets/grpc/llama-ggml
|
||||
ALL_GRPC_BACKENDS+=backend-assets/grpc/gpt4all
|
||||
@ -652,30 +652,30 @@ else
|
||||
LLAMA_VERSION=$(CPPLLAMA_VERSION) $(MAKE) -C backend/cpp/${VARIANT} grpc-server
|
||||
endif
|
||||
|
||||
backend-assets/grpc/llama-cpp: backend-assets/grpc
|
||||
$(info ${GREEN}I llama-cpp build info:standard${RESET})
|
||||
cp -rf backend/cpp/llama backend/cpp/llama-default
|
||||
$(MAKE) -C backend/cpp/llama-default purge
|
||||
$(MAKE) VARIANT="llama-default" build-llama-cpp-grpc-server
|
||||
cp -rfv backend/cpp/llama-default/grpc-server backend-assets/grpc/llama-cpp
|
||||
# TODO: every binary should have its own folder instead, so can have different metal implementations
|
||||
ifeq ($(BUILD_TYPE),metal)
|
||||
cp backend/cpp/llama-default/llama.cpp/build/bin/default.metallib backend-assets/grpc/
|
||||
endif
|
||||
backend-assets/grpc/llama-cpp-avx2: backend-assets/grpc
|
||||
cp -rf backend/cpp/llama backend/cpp/llama-avx2
|
||||
$(MAKE) -C backend/cpp/llama-avx2 purge
|
||||
$(info ${GREEN}I llama-cpp build info:avx2${RESET})
|
||||
CMAKE_ARGS="$(CMAKE_ARGS) -DLLAMA_AVX=on -DLLAMA_AVX2=on -DLLAMA_AVX512=off -DLLAMA_FMA=on -DLLAMA_F16C=on" $(MAKE) VARIANT="llama-avx2" build-llama-cpp-grpc-server
|
||||
cp -rfv backend/cpp/llama-avx2/grpc-server backend-assets/grpc/llama-cpp-avx2
|
||||
|
||||
backend-assets/grpc/llama-cpp-noavx: backend-assets/grpc
|
||||
cp -rf backend/cpp/llama backend/cpp/llama-noavx
|
||||
$(MAKE) -C backend/cpp/llama-noavx purge
|
||||
$(info ${GREEN}I llama-cpp build info:noavx${RESET})
|
||||
CMAKE_ARGS="$(CMAKE_ARGS) -DLLAMA_AVX512=OFF -DLLAMA_AVX2=OFF" $(MAKE) VARIANT="llama-noavx" build-llama-cpp-grpc-server
|
||||
cp -rfv backend/cpp/llama-noavx/grpc-server backend-assets/grpc/llama-cpp-noavx
|
||||
backend-assets/grpc/llama-cpp-avx: backend-assets/grpc
|
||||
cp -rf backend/cpp/llama backend/cpp/llama-avx
|
||||
$(MAKE) -C backend/cpp/llama-avx purge
|
||||
$(info ${GREEN}I llama-cpp build info:avx${RESET})
|
||||
CMAKE_ARGS="$(CMAKE_ARGS) -DLLAMA_AVX=on -DLLAMA_AVX2=off -DLLAMA_AVX512=off -DLLAMA_FMA=off -DLLAMA_F16C=off" $(MAKE) VARIANT="llama-avx" build-llama-cpp-grpc-server
|
||||
cp -rfv backend/cpp/llama-avx/grpc-server backend-assets/grpc/llama-cpp-avx
|
||||
|
||||
backend-assets/grpc/llama-cpp-fallback: backend-assets/grpc
|
||||
cp -rf backend/cpp/llama backend/cpp/llama-fallback
|
||||
$(MAKE) -C backend/cpp/llama-fallback purge
|
||||
$(info ${GREEN}I llama-cpp build info:fallback${RESET})
|
||||
CMAKE_ARGS="$(CMAKE_ARGS) -DLLAMA_F16C=OFF -DLLAMA_AVX512=OFF -DLLAMA_AVX2=OFF -DLLAMA_FMA=OFF" $(MAKE) VARIANT="llama-fallback" build-llama-cpp-grpc-server
|
||||
CMAKE_ARGS="$(CMAKE_ARGS) -DLLAMA_AVX=off -DLLAMA_AVX2=off -DLLAMA_AVX512=off -DLLAMA_FMA=off -DLLAMA_F16C=off" $(MAKE) VARIANT="llama-fallback" build-llama-cpp-grpc-server
|
||||
cp -rfv backend/cpp/llama-fallback/grpc-server backend-assets/grpc/llama-cpp-fallback
|
||||
# TODO: every binary should have its own folder instead, so can have different metal implementations
|
||||
ifeq ($(BUILD_TYPE),metal)
|
||||
cp backend/cpp/llama-fallback/llama.cpp/build/bin/default.metallib backend-assets/grpc/
|
||||
endif
|
||||
|
||||
backend-assets/grpc/llama-ggml: sources/go-llama.cpp sources/go-llama.cpp/libbinding.a backend-assets/grpc
|
||||
CGO_LDFLAGS="$(CGO_LDFLAGS)" C_INCLUDE_PATH=$(CURDIR)/sources/go-llama.cpp LIBRARY_PATH=$(CURDIR)/sources/go-llama.cpp \
|
||||
@ -719,7 +719,7 @@ docker:
|
||||
--build-arg MAKEFLAGS="$(DOCKER_MAKEFLAGS)" \
|
||||
--build-arg BUILD_TYPE=$(BUILD_TYPE) \
|
||||
-t $(DOCKER_IMAGE) .
|
||||
|
||||
|
||||
docker-aio:
|
||||
@echo "Building AIO image with base $(BASE_IMAGE) as $(DOCKER_AIO_IMAGE)"
|
||||
docker build \
|
||||
|
77
go.mod
77
go.mod
@ -7,8 +7,11 @@ toolchain go1.22.2
|
||||
require (
|
||||
github.com/M0Rf30/go-tiny-dream v0.0.0-20231128165230-772a9c0d9aaf
|
||||
github.com/Masterminds/sprig/v3 v3.2.3
|
||||
github.com/alecthomas/kong v0.9.0
|
||||
github.com/charmbracelet/glamour v0.7.0
|
||||
github.com/chasefleming/elem-go v0.25.0
|
||||
github.com/donomii/go-rwkv.cpp v0.0.0-20230715075832-c898cd0f62df
|
||||
github.com/elliotchance/orderedmap/v2 v2.2.0
|
||||
github.com/fsnotify/fsnotify v1.7.0
|
||||
github.com/ggerganov/whisper.cpp/bindings/go v0.0.0-20230628193450-85ed71aaec8e
|
||||
github.com/go-audio/wav v1.1.0
|
||||
@ -18,10 +21,13 @@ require (
|
||||
github.com/gofiber/swagger v1.0.0
|
||||
github.com/gofiber/template/html/v2 v2.1.1
|
||||
github.com/google/uuid v1.5.0
|
||||
github.com/hashicorp/go-multierror v1.1.1
|
||||
github.com/hpcloud/tail v1.0.0
|
||||
github.com/imdario/mergo v0.3.16
|
||||
github.com/jaypipes/ghw v0.12.0
|
||||
github.com/joho/godotenv v1.5.1
|
||||
github.com/klauspost/cpuid/v2 v2.2.7
|
||||
github.com/mholt/archiver/v3 v3.5.1
|
||||
github.com/microcosm-cc/bluemonday v1.0.26
|
||||
github.com/mudler/go-processmanager v0.0.0-20230818213616-f204007f963c
|
||||
github.com/mudler/go-stable-diffusion v0.0.0-20230605122230-d89260f598af
|
||||
github.com/nomic-ai/gpt4all/gpt4all-bindings/golang v0.0.0-20231022042237-c25dc5193530
|
||||
@ -35,6 +41,7 @@ require (
|
||||
github.com/russross/blackfriday v1.6.0
|
||||
github.com/sashabaranov/go-openai v1.20.4
|
||||
github.com/schollz/progressbar/v3 v3.13.1
|
||||
github.com/shirou/gopsutil/v3 v3.23.9
|
||||
github.com/stretchr/testify v1.9.0
|
||||
github.com/swaggo/swag v1.16.3
|
||||
github.com/tmc/langchaingo v0.0.0-20231019140956-c636b3da7701
|
||||
@ -43,23 +50,13 @@ require (
|
||||
go.opentelemetry.io/otel/exporters/prometheus v0.42.0
|
||||
go.opentelemetry.io/otel/metric v1.19.0
|
||||
go.opentelemetry.io/otel/sdk/metric v1.19.0
|
||||
golang.org/x/sys v0.19.0
|
||||
google.golang.org/grpc v1.59.0
|
||||
google.golang.org/protobuf v1.33.0
|
||||
gopkg.in/yaml.v2 v2.4.0
|
||||
gopkg.in/yaml.v3 v3.0.1
|
||||
)
|
||||
|
||||
require (
|
||||
github.com/go-ole/go-ole v1.2.6 // indirect
|
||||
github.com/lufia/plan9stats v0.0.0-20211012122336-39d0f177ccd0 // indirect
|
||||
github.com/power-devops/perfstat v0.0.0-20210106213030-5aafc221ea8c // indirect
|
||||
github.com/shirou/gopsutil/v3 v3.23.9
|
||||
github.com/shoenig/go-m1cpu v0.1.6 // indirect
|
||||
github.com/tklauser/go-sysconf v0.3.12 // indirect
|
||||
github.com/tklauser/numcpus v0.6.1 // indirect
|
||||
github.com/yusufpapurcu/wmi v1.2.3 // indirect
|
||||
)
|
||||
|
||||
require (
|
||||
github.com/Azure/go-ansiterm v0.0.0-20170929234023-d6e3b3328b78 // indirect
|
||||
github.com/KyleBanks/depth v1.2.1 // indirect
|
||||
@ -69,12 +66,12 @@ require (
|
||||
github.com/Nvveen/Gotty v0.0.0-20120604004816-cd527374f1e5 // indirect
|
||||
github.com/StackExchange/wmi v1.2.1 // indirect
|
||||
github.com/alecthomas/chroma/v2 v2.8.0 // indirect
|
||||
github.com/andybalholm/brotli v1.0.5 // indirect
|
||||
github.com/aymanbagabas/go-osc52/v2 v2.0.1 // indirect
|
||||
github.com/aymerick/douceur v0.2.0 // indirect
|
||||
github.com/beorn7/perks v1.0.1 // indirect
|
||||
github.com/cenkalti/backoff/v4 v4.1.3 // indirect
|
||||
github.com/cespare/xxhash/v2 v2.2.0 // indirect
|
||||
github.com/chasefleming/elem-go v0.25.0 // indirect
|
||||
github.com/containerd/continuity v0.3.0 // indirect
|
||||
github.com/davecgh/go-spew v1.1.1 // indirect
|
||||
github.com/dlclark/regexp2 v1.8.1 // indirect
|
||||
@ -84,34 +81,45 @@ require (
|
||||
github.com/docker/go-units v0.4.0 // indirect
|
||||
github.com/dsnet/compress v0.0.2-0.20210315054119-f66993602bf5 // indirect
|
||||
github.com/ghodss/yaml v1.0.0 // indirect
|
||||
github.com/go-audio/audio v1.0.0 // indirect
|
||||
github.com/go-audio/riff v1.0.0 // indirect
|
||||
github.com/go-logr/logr v1.2.4 // indirect
|
||||
github.com/go-logr/stdr v1.2.2 // indirect
|
||||
github.com/go-ole/go-ole v1.2.6 // indirect
|
||||
github.com/go-openapi/jsonpointer v0.21.0 // indirect
|
||||
github.com/go-openapi/jsonreference v0.21.0 // indirect
|
||||
github.com/go-openapi/spec v0.21.0 // indirect
|
||||
github.com/go-openapi/swag v0.23.0 // indirect
|
||||
github.com/go-task/slim-sprig v0.0.0-20230315185526-52ccab3ef572 // indirect
|
||||
github.com/gofiber/contrib/fiberzerolog v1.0.0
|
||||
github.com/gofiber/template v1.8.3 // indirect
|
||||
github.com/gofiber/utils v1.1.0 // indirect
|
||||
github.com/gogo/protobuf v1.3.2 // indirect
|
||||
github.com/golang/protobuf v1.5.3 // indirect
|
||||
github.com/golang/snappy v0.0.2 // indirect
|
||||
github.com/google/go-cmp v0.6.0 // indirect
|
||||
github.com/google/pprof v0.0.0-20210407192527-94a9f03dee38 // indirect
|
||||
github.com/google/shlex v0.0.0-20191202100458-e7afc7fbc510 // indirect
|
||||
github.com/gorilla/css v1.0.1 // indirect
|
||||
github.com/huandu/xstrings v1.3.3 // indirect
|
||||
github.com/jaypipes/ghw v0.12.0 // indirect
|
||||
github.com/jaypipes/pcidb v1.0.0 // indirect
|
||||
github.com/josharian/intern v1.0.0 // indirect
|
||||
github.com/klauspost/cpuid/v2 v2.2.7 // indirect
|
||||
github.com/klauspost/compress v1.17.0 // indirect
|
||||
github.com/klauspost/pgzip v1.2.5 // indirect
|
||||
github.com/lucasb-eyer/go-colorful v1.2.0 // indirect
|
||||
github.com/lufia/plan9stats v0.0.0-20211012122336-39d0f177ccd0 // indirect
|
||||
github.com/mailru/easyjson v0.7.7 // indirect
|
||||
github.com/mattn/go-colorable v0.1.13 // indirect
|
||||
github.com/mattn/go-isatty v0.0.20 // indirect
|
||||
github.com/mattn/go-runewidth v0.0.15 // indirect
|
||||
github.com/matttproud/golang_protobuf_extensions v1.0.4 // indirect
|
||||
github.com/microcosm-cc/bluemonday v1.0.26 // indirect
|
||||
github.com/mitchellh/colorstring v0.0.0-20190213212951-d06e56a500db // indirect
|
||||
github.com/mitchellh/copystructure v1.0.0 // indirect
|
||||
github.com/mitchellh/go-homedir v1.1.0 // indirect
|
||||
github.com/mitchellh/mapstructure v1.5.0 // indirect
|
||||
github.com/mitchellh/reflectwalk v1.0.0 // indirect
|
||||
github.com/moby/term v0.0.0-20201216013528-df9cb8a40635 // indirect
|
||||
github.com/mudler/go-piper v0.0.0-20230621222733-56b8a81b4760
|
||||
github.com/muesli/reflow v0.3.0 // indirect
|
||||
github.com/muesli/termenv v0.15.2 // indirect
|
||||
github.com/nwaples/rardecode v1.1.0 // indirect
|
||||
@ -123,53 +131,38 @@ require (
|
||||
github.com/pkg/errors v0.9.1 // indirect
|
||||
github.com/pkoukk/tiktoken-go v0.1.2 // indirect
|
||||
github.com/pmezard/go-difflib v1.0.0 // indirect
|
||||
github.com/power-devops/perfstat v0.0.0-20210106213030-5aafc221ea8c // indirect
|
||||
github.com/prometheus/client_model v0.4.1-0.20230718164431-9a2bf3000d16 // indirect
|
||||
github.com/prometheus/common v0.44.0 // indirect
|
||||
github.com/prometheus/procfs v0.11.1 // indirect
|
||||
github.com/rivo/uniseg v0.2.0 // indirect
|
||||
github.com/shoenig/go-m1cpu v0.1.6 // indirect
|
||||
github.com/shopspring/decimal v1.2.0 // indirect
|
||||
github.com/sirupsen/logrus v1.8.1 // indirect
|
||||
github.com/spf13/cast v1.3.1 // indirect
|
||||
github.com/swaggo/files/v2 v2.0.0 // indirect
|
||||
github.com/tklauser/go-sysconf v0.3.12 // indirect
|
||||
github.com/tklauser/numcpus v0.6.1 // indirect
|
||||
github.com/ulikunitz/xz v0.5.9 // indirect
|
||||
github.com/valyala/bytebufferpool v1.0.0 // indirect
|
||||
github.com/valyala/tcplisten v1.0.0 // indirect
|
||||
github.com/xeipuuv/gojsonpointer v0.0.0-20180127040702-4e3ac2762d5f // indirect
|
||||
github.com/xeipuuv/gojsonreference v0.0.0-20180127040603-bd5ef7bd5415 // indirect
|
||||
github.com/xeipuuv/gojsonschema v1.2.0 // indirect
|
||||
github.com/xi2/xz v0.0.0-20171230120015-48954b6210f8 // indirect
|
||||
github.com/yuin/goldmark v1.5.4 // indirect
|
||||
github.com/yuin/goldmark-emoji v1.0.2 // indirect
|
||||
github.com/yusufpapurcu/wmi v1.2.3 // indirect
|
||||
go.opentelemetry.io/otel/sdk v1.19.0 // indirect
|
||||
go.opentelemetry.io/otel/trace v1.19.0 // indirect
|
||||
golang.org/x/crypto v0.22.0 // indirect
|
||||
golang.org/x/mod v0.16.0 // indirect
|
||||
golang.org/x/net v0.24.0 // indirect
|
||||
golang.org/x/term v0.19.0 // indirect
|
||||
golang.org/x/text v0.14.0 // indirect
|
||||
golang.org/x/tools v0.19.0 // indirect
|
||||
google.golang.org/genproto/googleapis/rpc v0.0.0-20230822172742-b8732ec3820d // indirect
|
||||
gopkg.in/fsnotify.v1 v1.4.7 // indirect
|
||||
gopkg.in/tomb.v1 v1.0.0-20141024135613-dd632973f1e7 // indirect
|
||||
howett.net/plist v1.0.0 // indirect
|
||||
)
|
||||
|
||||
require (
|
||||
github.com/alecthomas/kong v0.9.0
|
||||
github.com/andybalholm/brotli v1.0.5 // indirect
|
||||
github.com/go-audio/audio v1.0.0 // indirect
|
||||
github.com/go-audio/riff v1.0.0 // indirect
|
||||
github.com/go-logr/logr v1.2.4 // indirect
|
||||
github.com/go-task/slim-sprig v0.0.0-20230315185526-52ccab3ef572 // indirect
|
||||
github.com/gofiber/contrib/fiberzerolog v1.0.0
|
||||
github.com/google/go-cmp v0.6.0 // indirect
|
||||
github.com/google/pprof v0.0.0-20210407192527-94a9f03dee38 // indirect
|
||||
github.com/hashicorp/errwrap v1.0.0 // indirect
|
||||
github.com/joho/godotenv v1.5.1
|
||||
github.com/klauspost/compress v1.17.0 // indirect
|
||||
github.com/mattn/go-colorable v0.1.13 // indirect
|
||||
github.com/mattn/go-isatty v0.0.20 // indirect
|
||||
github.com/mattn/go-runewidth v0.0.15 // indirect
|
||||
github.com/mudler/go-piper v0.0.0-20230621222733-56b8a81b4760
|
||||
github.com/rivo/uniseg v0.2.0 // indirect
|
||||
github.com/valyala/bytebufferpool v1.0.0 // indirect
|
||||
github.com/valyala/tcplisten v1.0.0 // indirect
|
||||
golang.org/x/net v0.24.0 // indirect
|
||||
golang.org/x/sys v0.19.0 // indirect
|
||||
golang.org/x/text v0.14.0 // indirect
|
||||
golang.org/x/tools v0.19.0 // indirect
|
||||
)
|
||||
|
18
go.sum
18
go.sum
@ -67,6 +67,8 @@ github.com/donomii/go-rwkv.cpp v0.0.0-20230715075832-c898cd0f62df/go.mod h1:gWy7
|
||||
github.com/dsnet/compress v0.0.2-0.20210315054119-f66993602bf5 h1:iFaUwBSo5Svw6L7HYpRu/0lE3e0BaElwnNO1qkNQxBY=
|
||||
github.com/dsnet/compress v0.0.2-0.20210315054119-f66993602bf5/go.mod h1:qssHWj60/X5sZFNxpG4HBPDHVqxNm4DfnCKgrbZOT+s=
|
||||
github.com/dsnet/golib v0.0.0-20171103203638-1ea166775780/go.mod h1:Lj+Z9rebOhdfkVLjJ8T6VcRQv3SXugXy999NBtR9aFY=
|
||||
github.com/elliotchance/orderedmap/v2 v2.2.0 h1:7/2iwO98kYT4XkOjA9mBEIwvi4KpGB4cyHeOFOnj4Vk=
|
||||
github.com/elliotchance/orderedmap/v2 v2.2.0/go.mod h1:85lZyVbpGaGvHvnKa7Qhx7zncAdBIBq6u56Hb1PRU5Q=
|
||||
github.com/fsnotify/fsnotify v1.4.7/go.mod h1:jwhsz4b93w/PPRr/qN1Yymfu8t87LnFCMoQvtojpjFo=
|
||||
github.com/fsnotify/fsnotify v1.4.9/go.mod h1:znqG4EE+3YCdAaPaxE2ZRY/06pZUdp0tY4IgpuI1SZQ=
|
||||
github.com/fsnotify/fsnotify v1.7.0 h1:8JEhPFa5W2WU7YfeZzPNqzMP6Lwt7L2715Ggo0nosvA=
|
||||
@ -149,14 +151,8 @@ github.com/google/shlex v0.0.0-20191202100458-e7afc7fbc510/go.mod h1:pupxD2MaaD3
|
||||
github.com/google/uuid v1.1.1/go.mod h1:TIyPZe4MgqvfeYDBFedMoGGpEw/LqOeaOT+nhxU+yHo=
|
||||
github.com/google/uuid v1.5.0 h1:1p67kYwdtXjb0gL0BPiP1Av9wiZPo5A8z2cWkTZ+eyU=
|
||||
github.com/google/uuid v1.5.0/go.mod h1:TIyPZe4MgqvfeYDBFedMoGGpEw/LqOeaOT+nhxU+yHo=
|
||||
github.com/gorilla/css v1.0.0 h1:BQqNyPTi50JCFMTw/b67hByjMVXZRwGha6wxVGkeihY=
|
||||
github.com/gorilla/css v1.0.0/go.mod h1:Dn721qIggHpt4+EFCcTLTU/vk5ySda2ReITrtgBl60c=
|
||||
github.com/gorilla/css v1.0.1 h1:ntNaBIghp6JmvWnxbZKANoLyuXTPZ4cAMlo6RyhlbO8=
|
||||
github.com/gorilla/css v1.0.1/go.mod h1:BvnYkspnSzMmwRK+b8/xgNPLiIuNZr6vbZBTPQ2A3b0=
|
||||
github.com/hashicorp/errwrap v1.0.0 h1:hLrqtEDnRye3+sgx6z4qVLNuviH3MR5aQ0ykNJa/UYA=
|
||||
github.com/hashicorp/errwrap v1.0.0/go.mod h1:YH+1FKiLXxHSkmPseP+kNlulaMuP3n2brvKWEqk/Jc4=
|
||||
github.com/hashicorp/go-multierror v1.1.1 h1:H5DkEtf6CXdFp0N0Em5UCwQpXMWke8IA0+lD48awMYo=
|
||||
github.com/hashicorp/go-multierror v1.1.1/go.mod h1:iw975J/qwKPdAO1clOe2L8331t/9/fmwbPZ6JB6eMoM=
|
||||
github.com/hexops/gotextdiff v1.0.3 h1:gitA9+qJrrTCsiCl7+kh75nPqQt1cx4ZkudSTLoUqJM=
|
||||
github.com/hexops/gotextdiff v1.0.3/go.mod h1:pSWU5MAI3yDq+fZBTazCSJysOMbxWL1BSow5/V2vxeg=
|
||||
github.com/hpcloud/tail v1.0.0 h1:nfCOvKYfkgYP8hkirhJocXT2+zOD8yUNjXaWfTlyFKI=
|
||||
@ -299,8 +295,6 @@ github.com/rivo/uniseg v0.2.0/go.mod h1:J6wj4VEh+S6ZtnVlnTBMWIodfgj8LQOQFoIToxlJ
|
||||
github.com/rogpeppe/go-internal v1.11.0 h1:cWPaGQEPrBb5/AsnsZesgZZ9yb1OQ+GOISoDNXVBh4M=
|
||||
github.com/rogpeppe/go-internal v1.11.0/go.mod h1:ddIwULY96R17DhadqLgMfk9H9tvdUzkipdSkR5nkCZA=
|
||||
github.com/rs/xid v1.5.0/go.mod h1:trrq9SKmegXys3aeAKXMUTdJsYXVwGY3RLcfgqegfbg=
|
||||
github.com/rs/zerolog v1.31.0 h1:FcTR3NnLWW+NnTwwhFWiJSZr4ECLpqCm6QsEnyvbV4A=
|
||||
github.com/rs/zerolog v1.31.0/go.mod h1:/7mN4D5sKwJLZQ2b/znpjC3/GQWY/xaDXUM0kKWRHss=
|
||||
github.com/rs/zerolog v1.32.0 h1:keLypqrlIjaFsbmJOBdB/qvyF8KEtCWHwobLp5l/mQ0=
|
||||
github.com/rs/zerolog v1.32.0/go.mod h1:/7mN4D5sKwJLZQ2b/znpjC3/GQWY/xaDXUM0kKWRHss=
|
||||
github.com/russross/blackfriday v1.6.0 h1:KqfZb0pUVN2lYqZUYRddxF4OR8ZMURnJIG5Y3VRLtww=
|
||||
@ -391,8 +385,6 @@ golang.org/x/crypto v0.0.0-20191011191535-87dc89f01550/go.mod h1:yigFU9vqHzYiE8U
|
||||
golang.org/x/crypto v0.0.0-20200622213623-75b288015ac9/go.mod h1:LzIPMQfyMNhhGPhUkYOs5KpL4U8rLKemX1yGLhDgUto=
|
||||
golang.org/x/crypto v0.0.0-20210921155107-089bfa567519/go.mod h1:GvvjBRRGRdwPK5ydBHafDWAxML/pGHZbMvKqRZ5+Abc=
|
||||
golang.org/x/crypto v0.3.0/go.mod h1:hebNnKkNXi2UzZN1eVRvBB7co0a+JxK6XbPiWVs/3J4=
|
||||
golang.org/x/crypto v0.21.0 h1:X31++rzVUdKhX5sWmSOFZxx8UW/ldWx55cbf08iNAMA=
|
||||
golang.org/x/crypto v0.21.0/go.mod h1:0BP7YvVV9gBbVKyeTG0Gyn+gZm94bibOW5BjDEYAOMs=
|
||||
golang.org/x/crypto v0.22.0 h1:g1v0xeRhjcugydODzvb3mEM9SQ0HGp9s/nh3COQ/C30=
|
||||
golang.org/x/crypto v0.22.0/go.mod h1:vr6Su+7cTlO45qkww3VDJlzDn0ctJvRgYbC2NvXHt+M=
|
||||
golang.org/x/mod v0.2.0/go.mod h1:s0Qsj1ACt9ePp/hMypM3fl4fZqREWJwdYDEqhRiZZUA=
|
||||
@ -411,8 +403,6 @@ golang.org/x/net v0.0.0-20210226172049-e18ecbb05110/go.mod h1:m0MpNAwzfU5UDzcl9v
|
||||
golang.org/x/net v0.0.0-20210428140749-89ef3d95e781/go.mod h1:OJAsFXCWl8Ukc7SiCT/9KSuxbyM7479/AVlXFRxuMCk=
|
||||
golang.org/x/net v0.0.0-20220722155237-a158d28d115b/go.mod h1:XRhObCWvk6IyKnWLug+ECip1KBveYUHfp+8e9klMJ9c=
|
||||
golang.org/x/net v0.2.0/go.mod h1:KqCZLdyyvdV855qA2rE3GC2aiw5xGR5TEjj8smXukLY=
|
||||
golang.org/x/net v0.22.0 h1:9sGLhx7iRIHEiX0oAJ3MRZMUCElJgy7Br1nO+AMN3Tc=
|
||||
golang.org/x/net v0.22.0/go.mod h1:JKghWKKOSdJwpW2GEx0Ja7fmaKnMsbu+MWVZTokSYmg=
|
||||
golang.org/x/net v0.24.0 h1:1PcaxkF854Fu3+lvBIx5SYn9wRlBzzcnHZSiaFFAb0w=
|
||||
golang.org/x/net v0.24.0/go.mod h1:2Q7sJY5mzlzWjKtYUEXSlBWCdyaioyXzRB2RtU8KVE8=
|
||||
golang.org/x/sync v0.0.0-20180314180146-1d60e4601c6f/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
|
||||
@ -450,16 +440,12 @@ golang.org/x/sys v0.8.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
|
||||
golang.org/x/sys v0.10.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
|
||||
golang.org/x/sys v0.11.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
|
||||
golang.org/x/sys v0.12.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
|
||||
golang.org/x/sys v0.18.0 h1:DBdB3niSjOA/O0blCZBqDefyWNYveAYMNF1Wum0DYQ4=
|
||||
golang.org/x/sys v0.18.0/go.mod h1:/VUhepiaJMQUp4+oa/7Zr1D23ma6VTLIYjOOTFZPUcA=
|
||||
golang.org/x/sys v0.19.0 h1:q5f1RH2jigJ1MoAWp2KTp3gm5zAGFUTarQZ5U386+4o=
|
||||
golang.org/x/sys v0.19.0/go.mod h1:/VUhepiaJMQUp4+oa/7Zr1D23ma6VTLIYjOOTFZPUcA=
|
||||
golang.org/x/term v0.0.0-20201126162022-7de9c90e9dd1/go.mod h1:bj7SfCRtBDWHUb9snDiAeCFNEtKQo2Wmx5Cou7ajbmo=
|
||||
golang.org/x/term v0.0.0-20210927222741-03fcf44c2211/go.mod h1:jbD1KX2456YbFQfuXm/mYQcufACuNUgVhRMnK/tPxf8=
|
||||
golang.org/x/term v0.2.0/go.mod h1:TVmDHMZPmdnySmBfhjOoOdhjzdE1h4u1VwSiw2l1Nuc=
|
||||
golang.org/x/term v0.6.0/go.mod h1:m6U89DPEgQRMq3DNkDClhWw02AUbt2daBVO4cn4Hv9U=
|
||||
golang.org/x/term v0.18.0 h1:FcHjZXDMxI8mM3nwhX9HlKop4C0YQvCVCdwYl2wOtE8=
|
||||
golang.org/x/term v0.18.0/go.mod h1:ILwASektA3OnRv7amZ1xhE/KTR+u50pbXfZ03+6Nx58=
|
||||
golang.org/x/term v0.19.0 h1:+ThwsDv+tYfnJFhF4L8jITxu1tdTWRTZpdsWgEgjL6Q=
|
||||
golang.org/x/term v0.19.0/go.mod h1:2CuTdWZ7KHSQwUzKva0cbMg6q2DMI3Mmxp+gKJbskEk=
|
||||
golang.org/x/text v0.3.0/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ=
|
||||
|
@ -13,6 +13,9 @@ import (
|
||||
grpc "github.com/go-skynet/LocalAI/pkg/grpc"
|
||||
"github.com/phayes/freeport"
|
||||
"github.com/rs/zerolog/log"
|
||||
"golang.org/x/sys/cpu"
|
||||
|
||||
"github.com/elliotchance/orderedmap/v2"
|
||||
)
|
||||
|
||||
var Aliases map[string]string = map[string]string{
|
||||
@ -24,8 +27,11 @@ var Aliases map[string]string = map[string]string{
|
||||
|
||||
const (
|
||||
LlamaGGML = "llama-ggml"
|
||||
LLamaCPP = "llama-cpp"
|
||||
|
||||
LLamaCPP = "llama-cpp"
|
||||
LLamaCPPCUDA12 = "llama-cpp-cuda12"
|
||||
LLamaCPPAVX2 = "llama-cpp-avx2"
|
||||
LLamaCPPAVX = "llama-cpp-avx"
|
||||
LLamaCPPFallback = "llama-cpp-fallback"
|
||||
|
||||
Gpt4AllLlamaBackend = "gpt4all-llama"
|
||||
@ -50,14 +56,14 @@ func backendPath(assetDir, backend string) string {
|
||||
|
||||
// backendsInAssetDir returns the list of backends in the asset directory
|
||||
// that should be loaded
|
||||
func backendsInAssetDir(assetDir string) ([]string, error) {
|
||||
func backendsInAssetDir(assetDir string) (*orderedmap.OrderedMap[string, any], error) {
|
||||
// Exclude backends from automatic loading
|
||||
excludeBackends := []string{LocalStoreBackend}
|
||||
entry, err := os.ReadDir(backendPath(assetDir, ""))
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
var backends []string
|
||||
backends := make(map[string][]string)
|
||||
ENTRY:
|
||||
for _, e := range entry {
|
||||
for _, exclude := range excludeBackends {
|
||||
@ -66,7 +72,28 @@ ENTRY:
|
||||
}
|
||||
}
|
||||
if !e.IsDir() {
|
||||
backends = append(backends, e.Name())
|
||||
//backends = append(backends, e.Name())
|
||||
if !strings.Contains(e.Name(), LLamaCPP) {
|
||||
backends[e.Name()] = []string{}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
foundLCPPAVX, foundLCPPAVX2, foundLCPPFallback := false, false, false
|
||||
if _, ok := backends[LLamaCPP]; !ok {
|
||||
for _, e := range entry {
|
||||
if strings.Contains(e.Name(), LLamaCPPAVX2) && !foundLCPPAVX2 {
|
||||
backends[LLamaCPP] = append(backends[LLamaCPP], LLamaCPPAVX2)
|
||||
foundLCPPAVX2 = true
|
||||
}
|
||||
if strings.Contains(e.Name(), LLamaCPPAVX) && !foundLCPPAVX {
|
||||
backends[LLamaCPP] = append(backends[LLamaCPP], LLamaCPPAVX)
|
||||
foundLCPPAVX = true
|
||||
}
|
||||
if strings.Contains(e.Name(), LLamaCPPFallback) && !foundLCPPFallback {
|
||||
backends[LLamaCPP] = append(backends[LLamaCPP], LLamaCPPFallback)
|
||||
foundLCPPFallback = true
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@ -77,37 +104,40 @@ ENTRY:
|
||||
// First has more priority
|
||||
priorityList := []string{
|
||||
// First llama.cpp and llama-ggml
|
||||
LLamaCPP, LLamaCPPFallback, LlamaGGML, Gpt4All,
|
||||
LLamaCPP, LlamaGGML, Gpt4All,
|
||||
}
|
||||
|
||||
toTheEnd := []string{
|
||||
// last has to be huggingface
|
||||
LCHuggingFaceBackend,
|
||||
// then bert embeddings
|
||||
BertEmbeddingsBackend,
|
||||
}
|
||||
slices.Reverse(priorityList)
|
||||
slices.Reverse(toTheEnd)
|
||||
|
||||
// order certain backends first
|
||||
for _, b := range priorityList {
|
||||
for i, be := range backends {
|
||||
if be == b {
|
||||
backends = append([]string{be}, append(backends[:i], backends[i+1:]...)...)
|
||||
break
|
||||
}
|
||||
// create an ordered map
|
||||
orderedBackends := orderedmap.NewOrderedMap[string, any]()
|
||||
// add priorityList first
|
||||
for _, p := range priorityList {
|
||||
if _, ok := backends[p]; ok {
|
||||
orderedBackends.Set(p, backends[p])
|
||||
}
|
||||
}
|
||||
// make sure that some others are pushed at the end
|
||||
for _, b := range toTheEnd {
|
||||
for i, be := range backends {
|
||||
if be == b {
|
||||
backends = append(append(backends[:i], backends[i+1:]...), be)
|
||||
break
|
||||
|
||||
for k, v := range backends {
|
||||
if !slices.Contains(toTheEnd, k) {
|
||||
if _, ok := orderedBackends.Get(k); !ok {
|
||||
orderedBackends.Set(k, v)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return backends, nil
|
||||
for _, t := range toTheEnd {
|
||||
if _, ok := backends[t]; ok {
|
||||
orderedBackends.Set(t, backends[t])
|
||||
}
|
||||
}
|
||||
|
||||
return orderedBackends, nil
|
||||
}
|
||||
|
||||
// starts the grpcModelProcess for the backend, and returns a grpc client
|
||||
@ -159,6 +189,21 @@ func (ml *ModelLoader) grpcModel(backend string, o *Options) func(string, string
|
||||
}
|
||||
} else {
|
||||
grpcProcess := backendPath(o.assetDir, backend)
|
||||
|
||||
// for llama-cpp, check CPU capabilities and load the appropriate variant
|
||||
if backend == LLamaCPP {
|
||||
if cpu.X86.HasAVX2 {
|
||||
log.Info().Msgf("[%s] attempting to load with AVX2 variant", backend)
|
||||
grpcProcess = backendPath(o.assetDir, LLamaCPPAVX2)
|
||||
} else if cpu.X86.HasAVX {
|
||||
log.Info().Msgf("[%s] attempting to load with AVX variant", backend)
|
||||
grpcProcess = backendPath(o.assetDir, LLamaCPPAVX)
|
||||
} else {
|
||||
log.Info().Msgf("[%s] attempting to load with fallback variant", backend)
|
||||
grpcProcess = backendPath(o.assetDir, LLamaCPPFallback)
|
||||
}
|
||||
}
|
||||
|
||||
// Check if the file exists
|
||||
if _, err := os.Stat(grpcProcess); os.IsNotExist(err) {
|
||||
return "", fmt.Errorf("grpc process not found: %s. some backends(stablediffusion, tts) require LocalAI compiled with GO_TAGS", grpcProcess)
|
||||
@ -301,25 +346,30 @@ func (ml *ModelLoader) GreedyLoader(opts ...Option) (grpc.Backend, error) {
|
||||
var err error
|
||||
|
||||
// autoload also external backends
|
||||
allBackendsToAutoLoad := []string{}
|
||||
allBackendsToAutoLoad := orderedmap.NewOrderedMap[string, any]()
|
||||
autoLoadBackends, err := backendsInAssetDir(o.assetDir)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
log.Debug().Msgf("Loading from the following backends (in order): %+v", autoLoadBackends)
|
||||
allBackendsToAutoLoad = append(allBackendsToAutoLoad, autoLoadBackends...)
|
||||
|
||||
for _, k := range autoLoadBackends.Keys() {
|
||||
v, _ := autoLoadBackends.Get(k)
|
||||
allBackendsToAutoLoad.Set(k, v)
|
||||
}
|
||||
|
||||
for _, b := range o.externalBackends {
|
||||
allBackendsToAutoLoad = append(allBackendsToAutoLoad, b)
|
||||
allBackendsToAutoLoad.Set(b, []string{})
|
||||
}
|
||||
|
||||
if o.model != "" {
|
||||
log.Info().Msgf("Trying to load the model '%s' with all the available backends: %s", o.model, strings.Join(allBackendsToAutoLoad, ", "))
|
||||
log.Info().Msgf("Trying to load the model '%s' with the backend '%s'", o.model, allBackendsToAutoLoad.Keys())
|
||||
}
|
||||
|
||||
for _, b := range allBackendsToAutoLoad {
|
||||
log.Info().Msgf("[%s] Attempting to load", b)
|
||||
for _, key := range allBackendsToAutoLoad.Keys() {
|
||||
log.Info().Msgf("[%s] Attempting to load", key)
|
||||
options := []Option{
|
||||
WithBackendString(b),
|
||||
WithBackendString(key),
|
||||
WithModel(o.model),
|
||||
WithLoadGRPCLoadModelOpts(o.gRPCOptions),
|
||||
WithThreads(o.threads),
|
||||
@ -332,14 +382,14 @@ func (ml *ModelLoader) GreedyLoader(opts ...Option) (grpc.Backend, error) {
|
||||
|
||||
model, modelerr := ml.BackendLoader(options...)
|
||||
if modelerr == nil && model != nil {
|
||||
log.Info().Msgf("[%s] Loads OK", b)
|
||||
log.Info().Msgf("[%s] Loads OK", key)
|
||||
return model, nil
|
||||
} else if modelerr != nil {
|
||||
err = errors.Join(err, modelerr)
|
||||
log.Info().Msgf("[%s] Fails: %s", b, modelerr.Error())
|
||||
log.Info().Msgf("[%s] Fails: %s", key, modelerr.Error())
|
||||
} else if model == nil {
|
||||
err = errors.Join(err, fmt.Errorf("backend returned no usable model"))
|
||||
log.Info().Msgf("[%s] Fails: %s", b, "backend returned no usable model")
|
||||
log.Info().Msgf("[%s] Fails: %s", key, "backend returned no usable model")
|
||||
}
|
||||
}
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user