diff --git a/.github/workflows/image-pr.yml b/.github/workflows/image-pr.yml
index 2452ef04..3e4d8e4d 100644
--- a/.github/workflows/image-pr.yml
+++ b/.github/workflows/image-pr.yml
@@ -46,7 +46,7 @@ jobs:
             makeflags: "--jobs=3 --output-sync=target"
           - build-type: 'cublas'
             cuda-major-version: "12"
-            cuda-minor-version: "5"
+            cuda-minor-version: "4"
             platforms: 'linux/amd64'
             tag-latest: 'false'
             tag-suffix: '-cublas-cuda12-ffmpeg'
@@ -119,7 +119,7 @@ jobs:
             makeflags: "--jobs=3 --output-sync=target"
           - build-type: 'cublas'
             cuda-major-version: "12"
-            cuda-minor-version: "5"
+            cuda-minor-version: "4"
             platforms: 'linux/amd64'
             tag-latest: 'false'
             tag-suffix: '-cublas-cuda12-ffmpeg-core'
diff --git a/.github/workflows/image.yml b/.github/workflows/image.yml
index 0a30e46f..73899e15 100644
--- a/.github/workflows/image.yml
+++ b/.github/workflows/image.yml
@@ -64,7 +64,7 @@ jobs:
             makeflags: "--jobs=3 --output-sync=target"
           - build-type: 'cublas'
             cuda-major-version: "11"
-            cuda-minor-version: "8"
+            cuda-minor-version: "7"
             platforms: 'linux/amd64'
             tag-latest: 'false'
             tag-suffix: '-cublas-cuda11'
@@ -75,7 +75,7 @@ jobs:
             makeflags: "--jobs=3 --output-sync=target"
           - build-type: 'cublas'
             cuda-major-version: "12"
-            cuda-minor-version: "5"
+            cuda-minor-version: "4"
             platforms: 'linux/amd64'
             tag-latest: 'false'
             tag-suffix: '-cublas-cuda12'
@@ -86,7 +86,7 @@ jobs:
             makeflags: "--jobs=3 --output-sync=target"
           - build-type: 'cublas'
             cuda-major-version: "11"
-            cuda-minor-version: "8"
+            cuda-minor-version: "7"
             platforms: 'linux/amd64'
             tag-latest: 'auto'
             tag-suffix: '-cublas-cuda11-ffmpeg'
@@ -100,7 +100,7 @@ jobs:
             makeflags: "--jobs=3 --output-sync=target"
           - build-type: 'cublas'
             cuda-major-version: "12"
-            cuda-minor-version: "5"
+            cuda-minor-version: "4"
             platforms: 'linux/amd64'
             tag-latest: 'auto'
             tag-suffix: '-cublas-cuda12-ffmpeg'
@@ -274,7 +274,7 @@ jobs:
             makeflags: "--jobs=4 --output-sync=target"
           - build-type: 'cublas'
             cuda-major-version: "11"
-            cuda-minor-version: "8"
+            cuda-minor-version: "7"
             platforms: 'linux/amd64'
             tag-latest: 'false'
             tag-suffix: '-cublas-cuda11-core'
@@ -285,7 +285,7 @@ jobs:
             makeflags: "--jobs=4 --output-sync=target"
           - build-type: 'cublas'
             cuda-major-version: "12"
-            cuda-minor-version: "5"
+            cuda-minor-version: "4"
             platforms: 'linux/amd64'
             tag-latest: 'false'
             tag-suffix: '-cublas-cuda12-core'
@@ -296,7 +296,7 @@ jobs:
             makeflags: "--jobs=4 --output-sync=target"
           - build-type: 'cublas'
             cuda-major-version: "11"
-            cuda-minor-version: "8"
+            cuda-minor-version: "7"
             platforms: 'linux/amd64'
             tag-latest: 'false'
             tag-suffix: '-cublas-cuda11-ffmpeg-core'
@@ -307,7 +307,7 @@ jobs:
             makeflags: "--jobs=4 --output-sync=target"
           - build-type: 'cublas'
             cuda-major-version: "12"
-            cuda-minor-version: "5"
+            cuda-minor-version: "4"
             platforms: 'linux/amd64'
             tag-latest: 'false'
             tag-suffix: '-cublas-cuda12-ffmpeg-core'
diff --git a/.github/workflows/image_build.yml b/.github/workflows/image_build.yml
index 53015565..c891d3dd 100644
--- a/.github/workflows/image_build.yml
+++ b/.github/workflows/image_build.yml
@@ -23,7 +23,7 @@ on:
         type: string
       cuda-minor-version:
         description: 'CUDA minor version'
-        default: "5"
+        default: "4"
         type: string
       platforms:
         description: 'Platforms'
diff --git a/.github/workflows/release.yaml b/.github/workflows/release.yaml
index efd33f83..86d60921 100644
--- a/.github/workflows/release.yaml
+++ b/.github/workflows/release.yaml
@@ -40,7 +40,7 @@ jobs:
           sudo apt-get update
           sudo apt-get install -y cuda-cross-aarch64 cuda-nvcc-cross-aarch64-${CUDA_VERSION} libcublas-cross-aarch64-${CUDA_VERSION}
         env:
-          CUDA_VERSION: 12-5
+          CUDA_VERSION: 12-4
       - name: Cache grpc
         id: cache-grpc
         uses: actions/cache@v4
diff --git a/Dockerfile b/Dockerfile
index ac42db5d..907ad54b 100644
--- a/Dockerfile
+++ b/Dockerfile
@@ -99,7 +99,7 @@ FROM requirements-${IMAGE_TYPE} AS requirements-drivers
 
 ARG BUILD_TYPE
 ARG CUDA_MAJOR_VERSION=12
-ARG CUDA_MINOR_VERSION=5
+ARG CUDA_MINOR_VERSION=4
 
 ENV BUILD_TYPE=${BUILD_TYPE}
 
diff --git a/Makefile b/Makefile
index e2ae5c49..d4ca1e4f 100644
--- a/Makefile
+++ b/Makefile
@@ -425,7 +425,7 @@ prepare-e2e:
 	mkdir -p $(TEST_DIR)
 	cp -rfv $(abspath ./tests/e2e-fixtures)/gpu.yaml $(TEST_DIR)/gpu.yaml
 	test -e $(TEST_DIR)/ggllm-test-model.bin || wget -q https://huggingface.co/TheBloke/CodeLlama-7B-Instruct-GGUF/resolve/main/codellama-7b-instruct.Q2_K.gguf -O $(TEST_DIR)/ggllm-test-model.bin
-	docker build --build-arg GRPC_BACKENDS="$(GRPC_BACKENDS)" --build-arg IMAGE_TYPE=core --build-arg BUILD_TYPE=$(BUILD_TYPE) --build-arg CUDA_MAJOR_VERSION=12 --build-arg CUDA_MINOR_VERSION=5 --build-arg FFMPEG=true -t localai-tests .
+	docker build --build-arg GRPC_BACKENDS="$(GRPC_BACKENDS)" --build-arg IMAGE_TYPE=core --build-arg BUILD_TYPE=$(BUILD_TYPE) --build-arg CUDA_MAJOR_VERSION=12 --build-arg CUDA_MINOR_VERSION=4 --build-arg FFMPEG=true -t localai-tests .
 
 run-e2e-image:
 	ls -liah $(abspath ./tests/e2e-fixtures)