From 6c087ae743000fbc0f9c5c747df5655e2fdf72b7 Mon Sep 17 00:00:00 2001 From: Ettore Di Giacinto Date: Sun, 9 Jun 2024 15:11:37 +0200 Subject: [PATCH] feat(arm64): enable single-binary builds (#2490) * ci: try to build for arm64 Signed-off-by: Ettore Di Giacinto * Allow to skip hipblas on make dist Signed-off-by: Ettore Di Giacinto * use arm64 cross compiler Signed-off-by: Ettore Di Giacinto * correctly target go arm64 Signed-off-by: Ettore Di Giacinto * create a separate target Signed-off-by: Ettore Di Giacinto * cross-compile grpc Signed-off-by: Ettore Di Giacinto * Add Protobuf include dirs Signed-off-by: Ettore Di Giacinto * temp disable CUDA build Signed-off-by: Ettore Di Giacinto * aarch64 builds: Reduce backends Signed-off-by: Ettore Di Giacinto * Even less backends Signed-off-by: Ettore Di Giacinto * Even less backends Signed-off-by: Ettore Di Giacinto * feat(startup): allow to load libs from extracted assets Signed-off-by: Ettore Di Giacinto * makefile: set arch Signed-off-by: Ettore Di Giacinto --------- Signed-off-by: Ettore Di Giacinto --- .github/workflows/release.yaml | 97 ++++++++++++++++++++++++++++++++++ Makefile | 15 +++++- pkg/assets/extract.go | 20 +++++++ 3 files changed, 131 insertions(+), 1 deletion(-) diff --git a/.github/workflows/release.yaml b/.github/workflows/release.yaml index 781a66e0..25357ce5 100644 --- a/.github/workflows/release.yaml +++ b/.github/workflows/release.yaml @@ -15,6 +15,103 @@ concurrency: cancel-in-progress: true jobs: + + build-linux-arm: + runs-on: ubuntu-latest + steps: + - name: Clone + uses: actions/checkout@v4 + with: + submodules: true + - uses: actions/setup-go@v5 + with: + go-version: '1.21.x' + cache: false + + - name: Dependencies + run: | + sudo apt-get update + sudo apt-get install build-essential ffmpeg protobuf-compiler ccache + sudo apt-get install -qy binutils-aarch64-linux-gnu gcc-aarch64-linux-gnu g++-aarch64-linux-gnu + - name: Install CUDA Dependencies + run: | + curl -O https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2204/cross-linux-aarch64/cuda-keyring_1.1-1_all.deb + sudo dpkg -i cuda-keyring_1.1-1_all.deb + sudo apt-get update + sudo apt-get install -y cuda-cross-aarch64 cuda-nvcc-cross-aarch64-${CUDA_VERSION} libcublas-cross-aarch64-${CUDA_VERSION} + env: + CUDA_VERSION: 12-4 + - name: Cache grpc + id: cache-grpc + uses: actions/cache@v4 + with: + path: grpc + key: ${{ runner.os }}-arm-grpc-${{ env.GRPC_VERSION }} + - name: Build grpc + if: steps.cache-grpc.outputs.cache-hit != 'true' + run: | + + git clone --recurse-submodules -b ${{ env.GRPC_VERSION }} --depth 1 --shallow-submodules https://github.com/grpc/grpc && \ + cd grpc && mkdir -p cmake/build && cd cmake/build && cmake -DgRPC_INSTALL=ON \ + -DgRPC_BUILD_TESTS=OFF \ + ../.. && sudo make --jobs 5 --output-sync=target + - name: Install gRPC + run: | + GNU_HOST=aarch64-linux-gnu + C_COMPILER_ARM_LINUX=$GNU_HOST-gcc + CXX_COMPILER_ARM_LINUX=$GNU_HOST-g++ + + CROSS_TOOLCHAIN=/usr/$GNU_HOST + CROSS_STAGING_PREFIX=$CROSS_TOOLCHAIN/stage + CMAKE_CROSS_TOOLCHAIN=/tmp/arm.toolchain.cmake + + # https://cmake.org/cmake/help/v3.13/manual/cmake-toolchains.7.html#cross-compiling-for-linux + echo "set(CMAKE_SYSTEM_NAME Linux)" >> $CMAKE_CROSS_TOOLCHAIN && \ + echo "set(CMAKE_SYSTEM_PROCESSOR arm)" >> $CMAKE_CROSS_TOOLCHAIN && \ + echo "set(CMAKE_STAGING_PREFIX $CROSS_STAGING_PREFIX)" >> $CMAKE_CROSS_TOOLCHAIN && \ + echo "set(CMAKE_SYSROOT ${CROSS_TOOLCHAIN}/sysroot)" >> $CMAKE_CROSS_TOOLCHAIN && \ + echo "set(CMAKE_C_COMPILER /usr/bin/$C_COMPILER_ARM_LINUX)" >> $CMAKE_CROSS_TOOLCHAIN && \ + echo "set(CMAKE_CXX_COMPILER /usr/bin/$CXX_COMPILER_ARM_LINUX)" >> $CMAKE_CROSS_TOOLCHAIN && \ + echo "set(CMAKE_FIND_ROOT_PATH_MODE_PROGRAM NEVER)" >> $CMAKE_CROSS_TOOLCHAIN && \ + echo "set(CMAKE_FIND_ROOT_PATH_MODE_LIBRARY ONLY)" >> $CMAKE_CROSS_TOOLCHAIN && \ + echo "set(CMAKE_FIND_ROOT_PATH_MODE_INCLUDE ONLY)" >> $CMAKE_CROSS_TOOLCHAIN && \ + echo "set(CMAKE_FIND_ROOT_PATH_MODE_PACKAGE ONLY)" >> $CMAKE_CROSS_TOOLCHAIN + GRPC_DIR=$PWD/grpc + cd grpc && cd cmake/build && sudo make --jobs 5 --output-sync=target install && \ + GRPC_CROSS_BUILD_DIR=$GRPC_DIR/cmake/cross_build && \ + mkdir -p $GRPC_CROSS_BUILD_DIR && \ + cd $GRPC_CROSS_BUILD_DIR && \ + cmake -DCMAKE_TOOLCHAIN_FILE=$CMAKE_CROSS_TOOLCHAIN \ + -DCMAKE_BUILD_TYPE=Release \ + -DCMAKE_INSTALL_PREFIX=$CROSS_TOOLCHAIN/grpc_install \ + ../.. && \ + sudo make -j`nproc` install + - name: Build + id: build + run: | + GNU_HOST=aarch64-linux-gnu + C_COMPILER_ARM_LINUX=$GNU_HOST-gcc + CXX_COMPILER_ARM_LINUX=$GNU_HOST-g++ + + CROSS_TOOLCHAIN=/usr/$GNU_HOST + CROSS_STAGING_PREFIX=$CROSS_TOOLCHAIN/stage + CMAKE_CROSS_TOOLCHAIN=/tmp/arm.toolchain.cmake + go install google.golang.org/grpc/cmd/protoc-gen-go-grpc@8ba23be9613c672d40ae261d2a1335d639bdd59b + go install google.golang.org/protobuf/cmd/protoc-gen-go@v1.34.0 + export PATH=$PATH:$GOPATH/bin + export PATH=/usr/local/cuda/bin:$PATH + GO_TAGS=p2p GOOS=linux GOARCH=arm64 CMAKE_ARGS="-DProtobuf_INCLUDE_DIRS=$CROSS_STAGING_PREFIX/include -DProtobuf_DIR=$CROSS_STAGING_PREFIX/lib/cmake/protobuf -DgRPC_DIR=$CROSS_STAGING_PREFIX/lib/cmake/grpc -DCMAKE_TOOLCHAIN_FILE=$CMAKE_CROSS_TOOLCHAIN -DCMAKE_C_COMPILER=aarch64-linux-gnu-gcc -DCMAKE_CXX_COMPILER=aarch64-linux-gnu-g++" make dist-cross-linux-arm64 + - uses: actions/upload-artifact@v4 + with: + name: LocalAI-linux-arm64 + path: release/ + - name: Release + uses: softprops/action-gh-release@v2 + if: startsWith(github.ref, 'refs/tags/') + with: + files: | + release/* + build-linux: runs-on: arc-runner-set steps: diff --git a/Makefile b/Makefile index 98633053..9bda2b5f 100644 --- a/Makefile +++ b/Makefile @@ -324,7 +324,7 @@ build-api: dist: STATIC=true $(MAKE) backend-assets/grpc/llama-cpp-avx2 ifeq ($(OS),Darwin) - $(info ${GREEN}I Skip CUDA build on MacOS${RESET}) + $(info ${GREEN}I Skip CUDA/hipblas build on MacOS${RESET}) else $(MAKE) backend-assets/grpc/llama-cpp-cuda $(MAKE) backend-assets/grpc/llama-cpp-hipblas @@ -342,6 +342,19 @@ else shasum -a 256 release/$(BINARY_NAME)-$(BUILD_ID)-$(OS)-$(ARCH) > release/$(BINARY_NAME)-$(BUILD_ID)-$(OS)-$(ARCH).sha256 endif +dist-cross-linux-arm64: + CMAKE_ARGS="$(CMAKE_ARGS) -DLLAMA_NATIVE=off" GRPC_BACKENDS="backend-assets/grpc/llama-cpp-fallback backend-assets/grpc/llama-cpp-grpc backend-assets/util/llama-cpp-rpc-server" \ + $(MAKE) build + mkdir -p release +# if BUILD_ID is empty, then we don't append it to the binary name +ifeq ($(BUILD_ID),) + cp $(BINARY_NAME) release/$(BINARY_NAME)-$(OS)-arm64 + shasum -a 256 release/$(BINARY_NAME)-$(OS)-arm64 > release/$(BINARY_NAME)-$(OS)-arm64.sha256 +else + cp $(BINARY_NAME) release/$(BINARY_NAME)-$(BUILD_ID)-$(OS)-arm64 + shasum -a 256 release/$(BINARY_NAME)-$(BUILD_ID)-$(OS)-arm64 > release/$(BINARY_NAME)-$(BUILD_ID)-$(OS)-arm64.sha256 +endif + osx-signed: build codesign --deep --force --sign "$(OSX_SIGNING_IDENTITY)" --entitlements "./Entitlements.plist" "./$(BINARY_NAME)" diff --git a/pkg/assets/extract.go b/pkg/assets/extract.go index 8f668a1a..7727edfa 100644 --- a/pkg/assets/extract.go +++ b/pkg/assets/extract.go @@ -51,5 +51,25 @@ func ExtractFiles(content embed.FS, extractDir string) error { return nil }) + // If there is a lib directory, set LD_LIBRARY_PATH to include it + // we might use this mechanism to carry over e.g. Nvidia CUDA libraries + // from the embedded FS to the target directory + + // Skip this if LOCALAI_SKIP_LD_LIBRARY_PATH is set + if os.Getenv("LOCALAI_SKIP_LD_LIBRARY_PATH") != "" { + return err + } + + for _, libDir := range []string{filepath.Join(extractDir, "backend_assets", "lib"), filepath.Join(extractDir, "lib")} { + if _, err := os.Stat(libDir); err == nil { + ldLibraryPath := os.Getenv("LD_LIBRARY_PATH") + if ldLibraryPath == "" { + ldLibraryPath = libDir + } else { + ldLibraryPath = fmt.Sprintf("%s:%s", ldLibraryPath, libDir) + } + os.Setenv("LD_LIBRARY_PATH", ldLibraryPath) + } + } return err }