mirror of
https://github.com/mudler/LocalAI.git
synced 2024-12-18 20:27:57 +00:00
feat(amdgpu): try to build in single binary (#2485)
* feat(amdgpu): try to build in single binary Signed-off-by: Ettore Di Giacinto <mudler@localai.io> * Release space from worker Signed-off-by: Ettore Di Giacinto <mudler@localai.io> --------- Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
This commit is contained in:
parent
fab3e711ff
commit
17cf6c4a4d
66
.github/workflows/release.yaml
vendored
66
.github/workflows/release.yaml
vendored
@ -18,6 +18,46 @@ jobs:
|
|||||||
build-linux:
|
build-linux:
|
||||||
runs-on: ubuntu-latest
|
runs-on: ubuntu-latest
|
||||||
steps:
|
steps:
|
||||||
|
- name: Release space from worker
|
||||||
|
run: |
|
||||||
|
echo "Listing top largest packages"
|
||||||
|
pkgs=$(dpkg-query -Wf '${Installed-Size}\t${Package}\t${Status}\n' | awk '$NF == "installed"{print $1 "\t" $2}' | sort -nr)
|
||||||
|
head -n 30 <<< "${pkgs}"
|
||||||
|
echo
|
||||||
|
df -h
|
||||||
|
echo
|
||||||
|
sudo apt-get remove -y '^llvm-.*|^libllvm.*' || true
|
||||||
|
sudo apt-get remove --auto-remove android-sdk-platform-tools || true
|
||||||
|
sudo apt-get purge --auto-remove android-sdk-platform-tools || true
|
||||||
|
sudo rm -rf /usr/local/lib/android
|
||||||
|
sudo apt-get remove -y '^dotnet-.*|^aspnetcore-.*' || true
|
||||||
|
sudo rm -rf /usr/share/dotnet
|
||||||
|
sudo apt-get remove -y '^mono-.*' || true
|
||||||
|
sudo apt-get remove -y '^ghc-.*' || true
|
||||||
|
sudo apt-get remove -y '.*jdk.*|.*jre.*' || true
|
||||||
|
sudo apt-get remove -y 'php.*' || true
|
||||||
|
sudo apt-get remove -y hhvm powershell firefox monodoc-manual msbuild || true
|
||||||
|
sudo apt-get remove -y '^google-.*' || true
|
||||||
|
sudo apt-get remove -y azure-cli || true
|
||||||
|
sudo apt-get remove -y '^mongo.*-.*|^postgresql-.*|^mysql-.*|^mssql-.*' || true
|
||||||
|
sudo apt-get remove -y '^gfortran-.*' || true
|
||||||
|
sudo apt-get remove -y microsoft-edge-stable || true
|
||||||
|
sudo apt-get remove -y firefox || true
|
||||||
|
sudo apt-get remove -y powershell || true
|
||||||
|
sudo apt-get remove -y r-base-core || true
|
||||||
|
sudo apt-get autoremove -y
|
||||||
|
sudo apt-get clean
|
||||||
|
echo
|
||||||
|
echo "Listing top largest packages"
|
||||||
|
pkgs=$(dpkg-query -Wf '${Installed-Size}\t${Package}\t${Status}\n' | awk '$NF == "installed"{print $1 "\t" $2}' | sort -nr)
|
||||||
|
head -n 30 <<< "${pkgs}"
|
||||||
|
echo
|
||||||
|
sudo rm -rfv build || true
|
||||||
|
sudo rm -rf /usr/share/dotnet || true
|
||||||
|
sudo rm -rf /opt/ghc || true
|
||||||
|
sudo rm -rf "/usr/local/share/boost" || true
|
||||||
|
sudo rm -rf "$AGENT_TOOLSDIRECTORY" || true
|
||||||
|
df -h
|
||||||
- name: Clone
|
- name: Clone
|
||||||
uses: actions/checkout@v4
|
uses: actions/checkout@v4
|
||||||
with:
|
with:
|
||||||
@ -38,6 +78,31 @@ jobs:
|
|||||||
sudo apt-get install -y cuda-nvcc-${CUDA_VERSION} libcublas-dev-${CUDA_VERSION}
|
sudo apt-get install -y cuda-nvcc-${CUDA_VERSION} libcublas-dev-${CUDA_VERSION}
|
||||||
env:
|
env:
|
||||||
CUDA_VERSION: 12-3
|
CUDA_VERSION: 12-3
|
||||||
|
- name: "Install Hipblas"
|
||||||
|
env:
|
||||||
|
ROCM_VERSION: "6.1"
|
||||||
|
AMDGPU_VERSION: "6.1"
|
||||||
|
run: |
|
||||||
|
set -ex
|
||||||
|
|
||||||
|
sudo apt-get update
|
||||||
|
sudo DEBIAN_FRONTEND=noninteractive apt-get install -y --no-install-recommends ca-certificates curl libnuma-dev gnupg
|
||||||
|
|
||||||
|
curl -sL https://repo.radeon.com/rocm/rocm.gpg.key | sudo apt-key add -
|
||||||
|
|
||||||
|
printf "deb [arch=amd64] https://repo.radeon.com/rocm/apt/$ROCM_VERSION/ jammy main" | sudo tee /etc/apt/sources.list.d/rocm.list
|
||||||
|
|
||||||
|
printf "deb [arch=amd64] https://repo.radeon.com/amdgpu/$AMDGPU_VERSION/ubuntu jammy main" | sudo tee /etc/apt/sources.list.d/amdgpu.list
|
||||||
|
printf 'Package: *\nPin: release o=repo.radeon.com\nPin-Priority: 600' | sudo tee /etc/apt/preferences.d/rocm-pin-600
|
||||||
|
sudo apt-get update
|
||||||
|
|
||||||
|
sudo DEBIAN_FRONTEND=noninteractive apt-get install -y \
|
||||||
|
hipblas-dev rocm-dev \
|
||||||
|
rocblas-dev
|
||||||
|
|
||||||
|
sudo apt-get clean
|
||||||
|
sudo rm -rf /var/lib/apt/lists/*
|
||||||
|
sudo ldconfig
|
||||||
- name: Cache grpc
|
- name: Cache grpc
|
||||||
id: cache-grpc
|
id: cache-grpc
|
||||||
uses: actions/cache@v4
|
uses: actions/cache@v4
|
||||||
@ -61,6 +126,7 @@ jobs:
|
|||||||
go install google.golang.org/protobuf/cmd/protoc-gen-go@v1.34.0
|
go install google.golang.org/protobuf/cmd/protoc-gen-go@v1.34.0
|
||||||
export PATH=$PATH:$GOPATH/bin
|
export PATH=$PATH:$GOPATH/bin
|
||||||
export PATH=/usr/local/cuda/bin:$PATH
|
export PATH=/usr/local/cuda/bin:$PATH
|
||||||
|
export PATH=/opt/rocm/bin:$PATH
|
||||||
GO_TAGS=p2p make dist
|
GO_TAGS=p2p make dist
|
||||||
- uses: actions/upload-artifact@v4
|
- uses: actions/upload-artifact@v4
|
||||||
with:
|
with:
|
||||||
|
8
Makefile
8
Makefile
@ -327,6 +327,7 @@ ifeq ($(OS),Darwin)
|
|||||||
$(info ${GREEN}I Skip CUDA build on MacOS${RESET})
|
$(info ${GREEN}I Skip CUDA build on MacOS${RESET})
|
||||||
else
|
else
|
||||||
$(MAKE) backend-assets/grpc/llama-cpp-cuda
|
$(MAKE) backend-assets/grpc/llama-cpp-cuda
|
||||||
|
$(MAKE) backend-assets/grpc/llama-cpp-hipblas
|
||||||
endif
|
endif
|
||||||
$(MAKE) build
|
$(MAKE) build
|
||||||
mkdir -p release
|
mkdir -p release
|
||||||
@ -712,6 +713,13 @@ backend-assets/grpc/llama-cpp-cuda: backend-assets/grpc
|
|||||||
CMAKE_ARGS="$(CMAKE_ARGS) -DLLAMA_AVX=on -DLLAMA_AVX2=off -DLLAMA_AVX512=off -DLLAMA_FMA=off -DLLAMA_F16C=off -DLLAMA_CUDA=ON" $(MAKE) VARIANT="llama-cuda" build-llama-cpp-grpc-server
|
CMAKE_ARGS="$(CMAKE_ARGS) -DLLAMA_AVX=on -DLLAMA_AVX2=off -DLLAMA_AVX512=off -DLLAMA_FMA=off -DLLAMA_F16C=off -DLLAMA_CUDA=ON" $(MAKE) VARIANT="llama-cuda" build-llama-cpp-grpc-server
|
||||||
cp -rfv backend/cpp/llama-cuda/grpc-server backend-assets/grpc/llama-cpp-cuda
|
cp -rfv backend/cpp/llama-cuda/grpc-server backend-assets/grpc/llama-cpp-cuda
|
||||||
|
|
||||||
|
backend-assets/grpc/llama-cpp-hipblas: backend-assets/grpc
|
||||||
|
cp -rf backend/cpp/llama backend/cpp/llama-hipblas
|
||||||
|
$(MAKE) -C backend/cpp/llama-hipblas purge
|
||||||
|
$(info ${GREEN}I llama-cpp build info:hipblas${RESET})
|
||||||
|
BUILD_TYPE="hipblas" $(MAKE) VARIANT="llama-hipblas" build-llama-cpp-grpc-server
|
||||||
|
cp -rfv backend/cpp/llama-hipblas/grpc-server backend-assets/grpc/llama-cpp-hipblas
|
||||||
|
|
||||||
backend-assets/grpc/llama-cpp-grpc: backend-assets/grpc
|
backend-assets/grpc/llama-cpp-grpc: backend-assets/grpc
|
||||||
cp -rf backend/cpp/llama backend/cpp/llama-grpc
|
cp -rf backend/cpp/llama backend/cpp/llama-grpc
|
||||||
$(MAKE) -C backend/cpp/llama-grpc purge
|
$(MAKE) -C backend/cpp/llama-grpc purge
|
||||||
|
@ -37,6 +37,7 @@ const (
|
|||||||
LLamaCPPAVX = "llama-cpp-avx"
|
LLamaCPPAVX = "llama-cpp-avx"
|
||||||
LLamaCPPFallback = "llama-cpp-fallback"
|
LLamaCPPFallback = "llama-cpp-fallback"
|
||||||
LLamaCPPCUDA = "llama-cpp-cuda"
|
LLamaCPPCUDA = "llama-cpp-cuda"
|
||||||
|
LLamaCPPHipblas = "llama-cpp-hipblas"
|
||||||
LLamaCPPGRPC = "llama-cpp-grpc"
|
LLamaCPPGRPC = "llama-cpp-grpc"
|
||||||
|
|
||||||
Gpt4AllLlamaBackend = "gpt4all-llama"
|
Gpt4AllLlamaBackend = "gpt4all-llama"
|
||||||
@ -93,7 +94,7 @@ ENTRY:
|
|||||||
if autoDetect {
|
if autoDetect {
|
||||||
// if we find the llama.cpp variants, show them of as a single backend (llama-cpp) as later we are going to pick that up
|
// if we find the llama.cpp variants, show them of as a single backend (llama-cpp) as later we are going to pick that up
|
||||||
// when starting the service
|
// when starting the service
|
||||||
foundLCPPAVX, foundLCPPAVX2, foundLCPPFallback, foundLCPPGRPC, foundLCPPCuda := false, false, false, false, false
|
foundLCPPAVX, foundLCPPAVX2, foundLCPPFallback, foundLCPPGRPC, foundLCPPCuda, foundLCPPHipblas := false, false, false, false, false, false
|
||||||
if _, ok := backends[LLamaCPP]; !ok {
|
if _, ok := backends[LLamaCPP]; !ok {
|
||||||
for _, e := range entry {
|
for _, e := range entry {
|
||||||
if strings.Contains(e.Name(), LLamaCPPAVX2) && !foundLCPPAVX2 {
|
if strings.Contains(e.Name(), LLamaCPPAVX2) && !foundLCPPAVX2 {
|
||||||
@ -116,6 +117,10 @@ ENTRY:
|
|||||||
backends[LLamaCPP] = append(backends[LLamaCPP], LLamaCPPCUDA)
|
backends[LLamaCPP] = append(backends[LLamaCPP], LLamaCPPCUDA)
|
||||||
foundLCPPCuda = true
|
foundLCPPCuda = true
|
||||||
}
|
}
|
||||||
|
if strings.Contains(e.Name(), LLamaCPPHipblas) && !foundLCPPHipblas {
|
||||||
|
backends[LLamaCPP] = append(backends[LLamaCPP], LLamaCPPHipblas)
|
||||||
|
foundLCPPHipblas = true
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -169,6 +174,7 @@ ENTRY:
|
|||||||
// selectGRPCProcess selects the GRPC process to start based on system capabilities
|
// selectGRPCProcess selects the GRPC process to start based on system capabilities
|
||||||
func selectGRPCProcess(backend, assetDir string) string {
|
func selectGRPCProcess(backend, assetDir string) string {
|
||||||
foundCUDA := false
|
foundCUDA := false
|
||||||
|
foundAMDGPU := false
|
||||||
var grpcProcess string
|
var grpcProcess string
|
||||||
|
|
||||||
// Select backend now just for llama.cpp
|
// Select backend now just for llama.cpp
|
||||||
@ -195,10 +201,20 @@ func selectGRPCProcess(backend, assetDir string) string {
|
|||||||
log.Info().Msgf("GPU device found but no CUDA backend present")
|
log.Info().Msgf("GPU device found but no CUDA backend present")
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
if strings.Contains(gpu.String(), "amd") {
|
||||||
|
p := backendPath(assetDir, LLamaCPPHipblas)
|
||||||
|
if _, err := os.Stat(p); err == nil {
|
||||||
|
log.Info().Msgf("[%s] attempting to load with HIPBLAS variant", backend)
|
||||||
|
grpcProcess = p
|
||||||
|
foundAMDGPU = true
|
||||||
|
} else {
|
||||||
|
log.Info().Msgf("GPU device found but no HIPBLAS backend present")
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
if foundCUDA {
|
if foundCUDA || foundAMDGPU {
|
||||||
return grpcProcess
|
return grpcProcess
|
||||||
}
|
}
|
||||||
|
|
||||||
|
Loading…
Reference in New Issue
Block a user