From 43f0688a95ce5a5f43228ae288020bef02770e8e Mon Sep 17 00:00:00 2001 From: Rene Leonhardt <65483435+reneleonhardt@users.noreply.github.com> Date: Wed, 19 Jun 2024 17:50:49 +0200 Subject: [PATCH] feat: Upgrade to CUDA 12.5 (#2601) Signed-off-by: Rene Leonhardt <65483435+reneleonhardt@users.noreply.github.com> --- .github/workflows/image-pr.yml | 6 +++--- .github/workflows/image.yml | 18 +++++++++--------- .github/workflows/image_build.yml | 6 +++--- .github/workflows/release.yaml | 22 +++++++++++----------- Dockerfile | 8 ++++---- Makefile | 13 ++++++++++++- 6 files changed, 42 insertions(+), 31 deletions(-) diff --git a/.github/workflows/image-pr.yml b/.github/workflows/image-pr.yml index f2cda005..230681a2 100644 --- a/.github/workflows/image-pr.yml +++ b/.github/workflows/image-pr.yml @@ -46,7 +46,7 @@ jobs: makeflags: "--jobs=3 --output-sync=target" - build-type: 'cublas' cuda-major-version: "12" - cuda-minor-version: "1" + cuda-minor-version: "5" platforms: 'linux/amd64' tag-latest: 'false' tag-suffix: '-cublas-cuda12-ffmpeg' @@ -119,7 +119,7 @@ jobs: makeflags: "--jobs=3 --output-sync=target" - build-type: 'cublas' cuda-major-version: "12" - cuda-minor-version: "1" + cuda-minor-version: "5" platforms: 'linux/amd64' tag-latest: 'false' tag-suffix: '-cublas-cuda12-ffmpeg-core' @@ -127,4 +127,4 @@ jobs: image-type: 'core' runs-on: 'ubuntu-latest' base-image: "ubuntu:22.04" - makeflags: "--jobs=4 --output-sync=target" \ No newline at end of file + makeflags: "--jobs=4 --output-sync=target" diff --git a/.github/workflows/image.yml b/.github/workflows/image.yml index 15b2693c..4ae28970 100644 --- a/.github/workflows/image.yml +++ b/.github/workflows/image.yml @@ -64,7 +64,7 @@ jobs: makeflags: "--jobs=3 --output-sync=target" - build-type: 'cublas' cuda-major-version: "11" - cuda-minor-version: "7" + cuda-minor-version: "8" platforms: 'linux/amd64' tag-latest: 'false' tag-suffix: '-cublas-cuda11' @@ -75,7 +75,7 @@ jobs: makeflags: "--jobs=3 --output-sync=target" - build-type: 'cublas' cuda-major-version: "12" - cuda-minor-version: "1" + cuda-minor-version: "5" platforms: 'linux/amd64' tag-latest: 'false' tag-suffix: '-cublas-cuda12' @@ -86,7 +86,7 @@ jobs: makeflags: "--jobs=3 --output-sync=target" - build-type: 'cublas' cuda-major-version: "11" - cuda-minor-version: "7" + cuda-minor-version: "8" platforms: 'linux/amd64' tag-latest: 'auto' tag-suffix: '-cublas-cuda11-ffmpeg' @@ -100,7 +100,7 @@ jobs: makeflags: "--jobs=3 --output-sync=target" - build-type: 'cublas' cuda-major-version: "12" - cuda-minor-version: "1" + cuda-minor-version: "5" platforms: 'linux/amd64' tag-latest: 'auto' tag-suffix: '-cublas-cuda12-ffmpeg' @@ -232,7 +232,7 @@ jobs: grpc-base-image: "ubuntu:22.04" runs-on: 'arc-runner-set' makeflags: "--jobs=3 --output-sync=target" - + core-image-build: uses: ./.github/workflows/image_build.yml with: @@ -273,7 +273,7 @@ jobs: makeflags: "--jobs=4 --output-sync=target" - build-type: 'cublas' cuda-major-version: "11" - cuda-minor-version: "7" + cuda-minor-version: "8" platforms: 'linux/amd64' tag-latest: 'false' tag-suffix: '-cublas-cuda11-core' @@ -284,7 +284,7 @@ jobs: makeflags: "--jobs=4 --output-sync=target" - build-type: 'cublas' cuda-major-version: "12" - cuda-minor-version: "1" + cuda-minor-version: "5" platforms: 'linux/amd64' tag-latest: 'false' tag-suffix: '-cublas-cuda12-core' @@ -295,7 +295,7 @@ jobs: makeflags: "--jobs=4 --output-sync=target" - build-type: 'cublas' cuda-major-version: "11" - cuda-minor-version: "7" + cuda-minor-version: "8" platforms: 'linux/amd64' tag-latest: 'false' tag-suffix: '-cublas-cuda11-ffmpeg-core' @@ -306,7 +306,7 @@ jobs: makeflags: "--jobs=4 --output-sync=target" - build-type: 'cublas' cuda-major-version: "12" - cuda-minor-version: "1" + cuda-minor-version: "5" platforms: 'linux/amd64' tag-latest: 'false' tag-suffix: '-cublas-cuda12-ffmpeg-core' diff --git a/.github/workflows/image_build.yml b/.github/workflows/image_build.yml index 96cd5992..53015565 100644 --- a/.github/workflows/image_build.yml +++ b/.github/workflows/image_build.yml @@ -19,11 +19,11 @@ on: type: string cuda-major-version: description: 'CUDA major version' - default: "11" + default: "12" type: string cuda-minor-version: description: 'CUDA minor version' - default: "7" + default: "5" type: string platforms: description: 'Platforms' @@ -324,7 +324,7 @@ jobs: docker pull quay.io/go-skynet/local-ai:${{ steps.meta_aio.outputs.version }} docker tag quay.io/go-skynet/local-ai:${{ steps.meta_aio.outputs.version }} quay.io/go-skynet/local-ai:${{ inputs.latest-image-aio }} docker push quay.io/go-skynet/local-ai:${{ inputs.latest-image-aio }} - + - name: job summary run: | echo "Built image: ${{ steps.meta.outputs.labels }}" >> $GITHUB_STEP_SUMMARY diff --git a/.github/workflows/release.yaml b/.github/workflows/release.yaml index 536f4e84..8c1eea2c 100644 --- a/.github/workflows/release.yaml +++ b/.github/workflows/release.yaml @@ -40,7 +40,7 @@ jobs: sudo apt-get update sudo apt-get install -y cuda-cross-aarch64 cuda-nvcc-cross-aarch64-${CUDA_VERSION} libcublas-cross-aarch64-${CUDA_VERSION} env: - CUDA_VERSION: 12-4 + CUDA_VERSION: 12-5 - name: Cache grpc id: cache-grpc uses: actions/cache@v4 @@ -167,15 +167,15 @@ jobs: ROCM_VERSION: "6.1" AMDGPU_VERSION: "6.1" run: | - set -ex + set -ex sudo apt-get update - sudo DEBIAN_FRONTEND=noninteractive apt-get install -y --no-install-recommends ca-certificates curl libnuma-dev gnupg - - curl -sL https://repo.radeon.com/rocm/rocm.gpg.key | sudo apt-key add - - + sudo DEBIAN_FRONTEND=noninteractive apt-get install -y --no-install-recommends ca-certificates curl libnuma-dev gnupg + + curl -sL https://repo.radeon.com/rocm/rocm.gpg.key | sudo apt-key add - + printf "deb [arch=amd64] https://repo.radeon.com/rocm/apt/$ROCM_VERSION/ jammy main" | sudo tee /etc/apt/sources.list.d/rocm.list - + printf "deb [arch=amd64] https://repo.radeon.com/amdgpu/$AMDGPU_VERSION/ubuntu jammy main" | sudo tee /etc/apt/sources.list.d/amdgpu.list printf 'Package: *\nPin: release o=repo.radeon.com\nPin-Priority: 600' | sudo tee /etc/apt/preferences.d/rocm-pin-600 sudo apt-get update @@ -183,10 +183,10 @@ jobs: sudo DEBIAN_FRONTEND=noninteractive apt-get install -y \ hipblas-dev rocm-dev \ rocblas-dev - + sudo apt-get clean sudo rm -rf /var/lib/apt/lists/* - sudo ldconfig + sudo ldconfig - name: Cache grpc id: cache-grpc uses: actions/cache@v4 @@ -291,7 +291,7 @@ jobs: export C_INCLUDE_PATH=/usr/local/include export CPLUS_INCLUDE_PATH=/usr/local/include export PATH=$PATH:$GOPATH/bin - + BACKEND_LIBS="$(ls /opt/homebrew/opt/grpc/lib/*.dylib /opt/homebrew/opt/re2/lib/*.dylib /opt/homebrew/opt/openssl@3/lib/*.dylib /opt/homebrew/opt/protobuf/lib/*.dylib /opt/homebrew/opt/abseil/lib/*.dylib | xargs)" GO_TAGS=p2p make dist - uses: actions/upload-artifact@v4 with: @@ -309,4 +309,4 @@ jobs: with: detached: true connect-timeout-seconds: 180 - limit-access-to-actor: true \ No newline at end of file + limit-access-to-actor: true diff --git a/Dockerfile b/Dockerfile index 2b30dccd..61efc00d 100644 --- a/Dockerfile +++ b/Dockerfile @@ -33,7 +33,7 @@ RUN curl -L -s https://go.dev/dl/go${GO_VERSION}.linux-${TARGETARCH}.tar.gz | ta ENV PATH $PATH:/root/go/bin:/usr/local/go/bin # Install grpc compilers -RUN go install google.golang.org/protobuf/cmd/protoc-gen-go@v1.34.1 && \ +RUN go install google.golang.org/protobuf/cmd/protoc-gen-go@v1.34.2 && \ go install google.golang.org/grpc/cmd/protoc-gen-go-grpc@1958fcbe2ca8bd93af633f11e97d44e567e945af COPY --chmod=644 custom-ca-certs/* /usr/local/share/ca-certificates/ @@ -98,8 +98,8 @@ RUN pip install --user grpcio-tools FROM requirements-${IMAGE_TYPE} AS requirements-drivers ARG BUILD_TYPE -ARG CUDA_MAJOR_VERSION=11 -ARG CUDA_MINOR_VERSION=8 +ARG CUDA_MAJOR_VERSION=12 +ARG CUDA_MINOR_VERSION=5 ENV BUILD_TYPE=${BUILD_TYPE} @@ -292,7 +292,7 @@ ENV REBUILD=false ENV HEALTHCHECK_ENDPOINT=http://localhost:8080/readyz ENV MAKEFLAGS=${MAKEFLAGS} -ARG CUDA_MAJOR_VERSION=11 +ARG CUDA_MAJOR_VERSION=12 ENV NVIDIA_DRIVER_CAPABILITIES=compute,utility ENV NVIDIA_REQUIRE_CUDA="cuda>=${CUDA_MAJOR_VERSION}.0" ENV NVIDIA_VISIBLE_DEVICES=all diff --git a/Makefile b/Makefile index 63e04ece..4ccda6e4 100644 --- a/Makefile +++ b/Makefile @@ -400,7 +400,7 @@ prepare-e2e: mkdir -p $(TEST_DIR) cp -rfv $(abspath ./tests/e2e-fixtures)/gpu.yaml $(TEST_DIR)/gpu.yaml test -e $(TEST_DIR)/ggllm-test-model.bin || wget -q https://huggingface.co/TheBloke/CodeLlama-7B-Instruct-GGUF/resolve/main/codellama-7b-instruct.Q2_K.gguf -O $(TEST_DIR)/ggllm-test-model.bin - docker build --build-arg GRPC_BACKENDS="$(GRPC_BACKENDS)" --build-arg IMAGE_TYPE=core --build-arg BUILD_TYPE=$(BUILD_TYPE) --build-arg CUDA_MAJOR_VERSION=11 --build-arg CUDA_MINOR_VERSION=7 --build-arg FFMPEG=true -t localai-tests . + docker build --build-arg GRPC_BACKENDS="$(GRPC_BACKENDS)" --build-arg IMAGE_TYPE=core --build-arg BUILD_TYPE=$(BUILD_TYPE) --build-arg CUDA_MAJOR_VERSION=12 --build-arg CUDA_MINOR_VERSION=5 --build-arg FFMPEG=true -t localai-tests . run-e2e-image: ls -liah $(abspath ./tests/e2e-fixtures) @@ -810,6 +810,17 @@ docker: --build-arg BUILD_TYPE=$(BUILD_TYPE) \ -t $(DOCKER_IMAGE) . +docker-cuda11: + docker build \ + --build-arg CUDA_MAJOR_VERSION=11 \ + --build-arg CUDA_MINOR_VERSION=8 \ + --build-arg BASE_IMAGE=$(BASE_IMAGE) \ + --build-arg IMAGE_TYPE=$(IMAGE_TYPE) \ + --build-arg GO_TAGS="$(GO_TAGS)" \ + --build-arg MAKEFLAGS="$(DOCKER_MAKEFLAGS)" \ + --build-arg BUILD_TYPE=$(BUILD_TYPE) \ + -t $(DOCKER_IMAGE)-cuda11 . + docker-aio: @echo "Building AIO image with base $(BASE_IMAGE) as $(DOCKER_AIO_IMAGE)" docker build \