mirror of
https://github.com/mudler/LocalAI.git
synced 2024-12-18 20:27:57 +00:00
chore(cuda): reduce binary size (#3379)
fix(cuda): reduce binary size Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
This commit is contained in:
parent
7f06954425
commit
5d892f86ea
@ -286,7 +286,14 @@ COPY --from=grpc /opt/grpc /usr/local
|
||||
WORKDIR /build
|
||||
|
||||
## Build the binary
|
||||
RUN make build
|
||||
## If it's CUDA, we want to skip some of the llama-compat backends to save space
|
||||
## We only leave the most CPU-optimized variant and the fallback for the cublas build
|
||||
## (both will use CUDA for the actual computation)
|
||||
RUN if [ "${BUILD_TYPE}" = "cublas" ]; then \
|
||||
SKIP_GRPC_BACKEND="backend-assets/grpc/llama-cpp-avx backend-assets/grpc/llama-cpp-avx2" make build; \
|
||||
else \
|
||||
make build; \
|
||||
fi
|
||||
|
||||
RUN if [ ! -d "/build/sources/go-piper/piper-phonemize/pi/lib/" ]; then \
|
||||
mkdir -p /build/sources/go-piper/piper-phonemize/pi/lib/ \
|
||||
|
2
Makefile
2
Makefile
@ -8,7 +8,7 @@ DETECT_LIBS?=true
|
||||
# llama.cpp versions
|
||||
GOLLAMA_REPO?=https://github.com/go-skynet/go-llama.cpp
|
||||
GOLLAMA_VERSION?=2b57a8ae43e4699d3dc5d1496a1ccd42922993be
|
||||
CPPLLAMA_VERSION?=3ba780e2a8f0ffe13f571b27f0bbf2ca5a199efc
|
||||
CPPLLAMA_VERSION?=e11bd856d538e44d24d8cad4b0381fba0984d162
|
||||
|
||||
# go-rwkv version
|
||||
RWKV_REPO?=https://github.com/donomii/go-rwkv.cpp
|
||||
|
Loading…
Reference in New Issue
Block a user