diff --git a/Dockerfile b/Dockerfile index 9d651760..14e037e6 100644 --- a/Dockerfile +++ b/Dockerfile @@ -286,7 +286,14 @@ COPY --from=grpc /opt/grpc /usr/local WORKDIR /build ## Build the binary -RUN make build +## If it's CUDA, we want to skip some of the llama-compat backends to save space +## We only leave the most CPU-optimized variant and the fallback for the cublas build +## (both will use CUDA for the actual computation) +RUN if [ "${BUILD_TYPE}" = "cublas" ]; then \ + SKIP_GRPC_BACKEND="backend-assets/grpc/llama-cpp-avx backend-assets/grpc/llama-cpp-avx2" make build; \ + else \ + make build; \ + fi RUN if [ ! -d "/build/sources/go-piper/piper-phonemize/pi/lib/" ]; then \ mkdir -p /build/sources/go-piper/piper-phonemize/pi/lib/ \ diff --git a/Makefile b/Makefile index 2ecbaea8..ca8077af 100644 --- a/Makefile +++ b/Makefile @@ -8,7 +8,7 @@ DETECT_LIBS?=true # llama.cpp versions GOLLAMA_REPO?=https://github.com/go-skynet/go-llama.cpp GOLLAMA_VERSION?=2b57a8ae43e4699d3dc5d1496a1ccd42922993be -CPPLLAMA_VERSION?=3ba780e2a8f0ffe13f571b27f0bbf2ca5a199efc +CPPLLAMA_VERSION?=e11bd856d538e44d24d8cad4b0381fba0984d162 # go-rwkv version RWKV_REPO?=https://github.com/donomii/go-rwkv.cpp