fix(cuda): downgrade to 12.0 to increase compatibility range (#2994)

* fix(cuda): downgrade to 12.0 to increase compatibility range Signed-off-by: Ettore Di Giacinto <mudler@localai.io> * improve messaging Signed-off-by: Ettore Di Giacinto <mudler@localai.io> --------- Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2025-06-17 22:48:08 +00:00 · 2024-07-23 23:35:31 +02:00
parent 1c96e0b79e
commit a9757fb057
6 changed files with 11 additions and 12 deletions
--- a/.github/workflows/image-pr.yml
+++ b/.github/workflows/image-pr.yml
@ -47,7 +47,7 @@ jobs:
          #   makeflags: "--jobs=3 --output-sync=target"
          - build-type: 'cublas'
            cuda-major-version: "12"
-            cuda-minor-version: "4"
+            cuda-minor-version: "0"
            platforms: 'linux/amd64'
            tag-latest: 'false'
            tag-suffix: '-cublas-cuda12-ffmpeg'
@ -120,7 +120,7 @@ jobs:
          #   makeflags: "--jobs=3 --output-sync=target"
          # - build-type: 'cublas'
          #   cuda-major-version: "12"
-          #   cuda-minor-version: "4"
+          #   cuda-minor-version: "0"
          #   platforms: 'linux/amd64'
          #   tag-latest: 'false'
          #   tag-suffix: '-cublas-cuda12-ffmpeg-core'
--- a/.github/workflows/image.yml
+++ b/.github/workflows/image.yml
@ -75,7 +75,7 @@ jobs:
            makeflags: "--jobs=3 --output-sync=target"
          - build-type: 'cublas'
            cuda-major-version: "12"
-            cuda-minor-version: "4"
+            cuda-minor-version: "0"
            platforms: 'linux/amd64'
            tag-latest: 'false'
            tag-suffix: '-cublas-cuda12'
@ -100,7 +100,7 @@ jobs:
            makeflags: "--jobs=3 --output-sync=target"
          - build-type: 'cublas'
            cuda-major-version: "12"
-            cuda-minor-version: "4"
+            cuda-minor-version: "0"
            platforms: 'linux/amd64'
            tag-latest: 'auto'
            tag-suffix: '-cublas-cuda12-ffmpeg'
@ -285,7 +285,7 @@ jobs:
            makeflags: "--jobs=4 --output-sync=target"
          - build-type: 'cublas'
            cuda-major-version: "12"
-            cuda-minor-version: "4"
+            cuda-minor-version: "0"
            platforms: 'linux/amd64'
            tag-latest: 'false'
            tag-suffix: '-cublas-cuda12-core'
@ -307,7 +307,7 @@ jobs:
            makeflags: "--jobs=4 --output-sync=target"
          - build-type: 'cublas'
            cuda-major-version: "12"
-            cuda-minor-version: "4"
+            cuda-minor-version: "0"
            platforms: 'linux/amd64'
            tag-latest: 'false'
            tag-suffix: '-cublas-cuda12-ffmpeg-core'
--- a/.github/workflows/release.yaml
+++ b/.github/workflows/release.yaml
@ -31,7 +31,6 @@ jobs:
        with:
          go-version: '1.21.x'
          cache: false
-
      - name: Dependencies
        run: |
          sudo apt-get update
--- a/2
+++ b/2
@ -99,7 +99,7 @@ FROM requirements-${IMAGE_TYPE} AS requirements-drivers

 ARG BUILD_TYPE
 ARG CUDA_MAJOR_VERSION=12
-ARG CUDA_MINOR_VERSION=4
+ARG CUDA_MINOR_VERSION=0

 ENV BUILD_TYPE=${BUILD_TYPE}

--- a/2
+++ b/2
@ -480,7 +480,7 @@ prepare-e2e:
 	mkdir -p $(TEST_DIR)
 	cp -rfv $(abspath ./tests/e2e-fixtures)/gpu.yaml $(TEST_DIR)/gpu.yaml
 	test -e $(TEST_DIR)/ggllm-test-model.bin || wget -q https://huggingface.co/TheBloke/CodeLlama-7B-Instruct-GGUF/resolve/main/codellama-7b-instruct.Q2_K.gguf -O $(TEST_DIR)/ggllm-test-model.bin
-	docker build --build-arg GRPC_BACKENDS="$(GRPC_BACKENDS)" --build-arg IMAGE_TYPE=core --build-arg BUILD_TYPE=$(BUILD_TYPE) --build-arg CUDA_MAJOR_VERSION=12 --build-arg CUDA_MINOR_VERSION=4 --build-arg FFMPEG=true -t localai-tests .
+	docker build --build-arg GRPC_BACKENDS="$(GRPC_BACKENDS)" --build-arg IMAGE_TYPE=core --build-arg BUILD_TYPE=$(BUILD_TYPE) --build-arg CUDA_MAJOR_VERSION=12 --build-arg CUDA_MINOR_VERSION=0 --build-arg FFMPEG=true -t localai-tests .

 run-e2e-image:
 	ls -liah $(abspath ./tests/e2e-fixtures)
--- a/pkg/model/initializers.go
+++ b/pkg/model/initializers.go
@ -212,7 +212,7 @@ func selectGRPCProcess(backend, assetDir string, f16 bool) string {
 					grpcProcess = p
 					foundCUDA = true
 				} else {
-					log.Info().Msgf("GPU device found but no CUDA backend present")
+					log.Debug().Msgf("Nvidia GPU device found, no embedded CUDA variant found. You can ignore this message if you are using container with CUDA support")
 				}
 			}
 			if strings.Contains(gpu.String(), "amd") {
@ -222,7 +222,7 @@ func selectGRPCProcess(backend, assetDir string, f16 bool) string {
 					grpcProcess = p
 					foundAMDGPU = true
 				} else {
-					log.Info().Msgf("GPU device found but no HIPBLAS backend present")
+					log.Debug().Msgf("AMD GPU device found, no embedded HIPBLAS variant found. You can ignore this message if you are using container with HIPBLAS support")
 				}
 			}
 			if strings.Contains(gpu.String(), "intel") {
@ -236,7 +236,7 @@ func selectGRPCProcess(backend, assetDir string, f16 bool) string {
 					grpcProcess = p
 					foundIntelGPU = true
 				} else {
-					log.Info().Msgf("GPU device found but no Intel backend present")
+					log.Debug().Msgf("Intel GPU device found, no embedded SYCL variant found. You can ignore this message if you are using container with SYCL support")
 				}
 			}
 		}