feat(amdgpu): try to build in single binary (#2485)

* feat(amdgpu): try to build in single binary Signed-off-by: Ettore Di Giacinto <mudler@localai.io> * Release space from worker Signed-off-by: Ettore Di Giacinto <mudler@localai.io> --------- Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2025-06-18 23:18:10 +00:00 · 2024-06-05 08:44:15 +02:00
parent fab3e711ff
commit 17cf6c4a4d
3 changed files with 92 additions and 2 deletions
--- a/.github/workflows/release.yaml
+++ b/.github/workflows/release.yaml
@ -18,6 +18,46 @@ jobs:
  build-linux:
    runs-on: ubuntu-latest
    steps:
      - name: Release space from worker
        run: |
          echo "Listing top largest packages"
          pkgs=$(dpkg-query -Wf '${Installed-Size}\t${Package}\t${Status}\n' | awk '$NF == "installed"{print $1 "\t" $2}' | sort -nr)
          head -n 30 <<< "${pkgs}"
          echo
          df -h
          echo
          sudo apt-get remove -y '^llvm-.*|^libllvm.*' || true
          sudo apt-get remove --auto-remove android-sdk-platform-tools || true
          sudo apt-get purge --auto-remove android-sdk-platform-tools || true
          sudo rm -rf /usr/local/lib/android
          sudo apt-get remove -y '^dotnet-.*|^aspnetcore-.*' || true
          sudo rm -rf /usr/share/dotnet
          sudo apt-get remove -y '^mono-.*' || true
          sudo apt-get remove -y '^ghc-.*' || true
          sudo apt-get remove -y '.*jdk.*|.*jre.*' || true
          sudo apt-get remove -y 'php.*' || true
          sudo apt-get remove -y hhvm powershell firefox monodoc-manual msbuild || true
          sudo apt-get remove -y '^google-.*' || true
          sudo apt-get remove -y azure-cli || true
          sudo apt-get remove -y '^mongo.*-.*|^postgresql-.*|^mysql-.*|^mssql-.*' || true
          sudo apt-get remove -y '^gfortran-.*' || true
          sudo apt-get remove -y microsoft-edge-stable || true
          sudo apt-get remove -y firefox || true
          sudo apt-get remove -y powershell || true
          sudo apt-get remove -y r-base-core || true
          sudo apt-get autoremove -y
          sudo apt-get clean
          echo
          echo "Listing top largest packages"
          pkgs=$(dpkg-query -Wf '${Installed-Size}\t${Package}\t${Status}\n' | awk '$NF == "installed"{print $1 "\t" $2}' | sort -nr)
          head -n 30 <<< "${pkgs}"
          echo
          sudo rm -rfv build || true
          sudo rm -rf /usr/share/dotnet || true
          sudo rm -rf /opt/ghc || true
          sudo rm -rf "/usr/local/share/boost" || true
          sudo rm -rf "$AGENT_TOOLSDIRECTORY" || true
          df -h
      - name: Clone
        uses: actions/checkout@v4
        with:
@ -38,6 +78,31 @@ jobs:
          sudo apt-get install -y cuda-nvcc-${CUDA_VERSION} libcublas-dev-${CUDA_VERSION}
        env:
          CUDA_VERSION: 12-3
      - name: "Install Hipblas"
        env:
          ROCM_VERSION: "6.1"
          AMDGPU_VERSION: "6.1"
        run: |
            set -ex 
            sudo apt-get update
            sudo DEBIAN_FRONTEND=noninteractive apt-get install -y --no-install-recommends ca-certificates curl libnuma-dev gnupg 
            curl -sL https://repo.radeon.com/rocm/rocm.gpg.key | sudo apt-key add - 
            printf "deb [arch=amd64] https://repo.radeon.com/rocm/apt/$ROCM_VERSION/ jammy main" | sudo tee /etc/apt/sources.list.d/rocm.list
            printf "deb [arch=amd64] https://repo.radeon.com/amdgpu/$AMDGPU_VERSION/ubuntu jammy main" | sudo tee /etc/apt/sources.list.d/amdgpu.list
            printf 'Package: *\nPin: release o=repo.radeon.com\nPin-Priority: 600' | sudo tee /etc/apt/preferences.d/rocm-pin-600
            sudo apt-get update
            sudo DEBIAN_FRONTEND=noninteractive apt-get install -y \
                hipblas-dev rocm-dev \
                rocblas-dev
            sudo apt-get clean
            sudo rm -rf /var/lib/apt/lists/*
            sudo ldconfig 
      - name: Cache grpc
        id: cache-grpc
        uses: actions/cache@v4
@ -61,6 +126,7 @@ jobs:
          go install google.golang.org/protobuf/cmd/protoc-gen-go@v1.34.0
          export PATH=$PATH:$GOPATH/bin
          export PATH=/usr/local/cuda/bin:$PATH
          export PATH=/opt/rocm/bin:$PATH
          GO_TAGS=p2p make dist
      - uses: actions/upload-artifact@v4
        with:
--- a/8
+++ b/8
@ -327,6 +327,7 @@ ifeq ($(OS),Darwin)
 	$(info ${GREEN}I Skip CUDA build on MacOS${RESET})
 else
 	$(MAKE) backend-assets/grpc/llama-cpp-cuda
 	$(MAKE) backend-assets/grpc/llama-cpp-hipblas
 endif
 	$(MAKE) build
 	mkdir -p release
@ -712,6 +713,13 @@ backend-assets/grpc/llama-cpp-cuda: backend-assets/grpc
 	CMAKE_ARGS="$(CMAKE_ARGS) -DLLAMA_AVX=on -DLLAMA_AVX2=off -DLLAMA_AVX512=off -DLLAMA_FMA=off -DLLAMA_F16C=off -DLLAMA_CUDA=ON" $(MAKE) VARIANT="llama-cuda" build-llama-cpp-grpc-server
 	cp -rfv backend/cpp/llama-cuda/grpc-server backend-assets/grpc/llama-cpp-cuda
 backend-assets/grpc/llama-cpp-hipblas: backend-assets/grpc
 	cp -rf backend/cpp/llama backend/cpp/llama-hipblas
 	$(MAKE) -C backend/cpp/llama-hipblas purge
 	$(info ${GREEN}I llama-cpp build info:hipblas${RESET})
 	BUILD_TYPE="hipblas" $(MAKE) VARIANT="llama-hipblas" build-llama-cpp-grpc-server
 	cp -rfv backend/cpp/llama-hipblas/grpc-server backend-assets/grpc/llama-cpp-hipblas
 backend-assets/grpc/llama-cpp-grpc: backend-assets/grpc
 	cp -rf backend/cpp/llama backend/cpp/llama-grpc
 	$(MAKE) -C backend/cpp/llama-grpc purge
--- a/pkg/model/initializers.go
+++ b/pkg/model/initializers.go
@ -37,6 +37,7 @@ const (
 	LLamaCPPAVX      = "llama-cpp-avx"
 	LLamaCPPFallback = "llama-cpp-fallback"
 	LLamaCPPCUDA     = "llama-cpp-cuda"
 	LLamaCPPHipblas  = "llama-cpp-hipblas"
 	LLamaCPPGRPC     = "llama-cpp-grpc"
 	Gpt4AllLlamaBackend = "gpt4all-llama"
@ -93,7 +94,7 @@ ENTRY:
 	if autoDetect {
 		// if we find the llama.cpp variants, show them of as a single backend (llama-cpp) as later we are going to pick that up
 		// when starting the service
-		foundLCPPAVX, foundLCPPAVX2, foundLCPPFallback, foundLCPPGRPC, foundLCPPCuda := false, false, false, false, false
+		foundLCPPAVX, foundLCPPAVX2, foundLCPPFallback, foundLCPPGRPC, foundLCPPCuda, foundLCPPHipblas := false, false, false, false, false, false
 		if _, ok := backends[LLamaCPP]; !ok {
 			for _, e := range entry {
 				if strings.Contains(e.Name(), LLamaCPPAVX2) && !foundLCPPAVX2 {
@ -116,6 +117,10 @@ ENTRY:
 					backends[LLamaCPP] = append(backends[LLamaCPP], LLamaCPPCUDA)
 					foundLCPPCuda = true
 				}
 				if strings.Contains(e.Name(), LLamaCPPHipblas) && !foundLCPPHipblas {
 					backends[LLamaCPP] = append(backends[LLamaCPP], LLamaCPPHipblas)
 					foundLCPPHipblas = true
 				}
 			}
 		}
 	}
@ -169,6 +174,7 @@ ENTRY:
 // selectGRPCProcess selects the GRPC process to start based on system capabilities
 func selectGRPCProcess(backend, assetDir string) string {
 	foundCUDA := false
 	foundAMDGPU := false
 	var grpcProcess string
 	// Select backend now just for llama.cpp
@ -195,10 +201,20 @@ func selectGRPCProcess(backend, assetDir string) string {
 					log.Info().Msgf("GPU device found but no CUDA backend present")
 				}
 			}
 			if strings.Contains(gpu.String(), "amd") {
 				p := backendPath(assetDir, LLamaCPPHipblas)
 				if _, err := os.Stat(p); err == nil {
 					log.Info().Msgf("[%s] attempting to load with HIPBLAS variant", backend)
 					grpcProcess = p
 					foundAMDGPU = true
 				} else {
 					log.Info().Msgf("GPU device found but no HIPBLAS backend present")
 				}
 			}
 		}
 	}
-	if foundCUDA {
+	if foundCUDA || foundAMDGPU {
 		return grpcProcess
 	}