From ddd21f1644ea8f6aff2e01f34e2b54b16db8964d Mon Sep 17 00:00:00 2001 From: Ettore Di Giacinto Date: Thu, 8 Feb 2024 20:12:51 +0100 Subject: [PATCH] feat: Use ubuntu as base for container images, drop deprecated ggml-transformers backends (#1689) * cleanup backends * switch image to ubuntu 22.04 * adapt commands for ubuntu * transformers cleanup * no contrib on ubuntu * Change test model to gguf * ci: disable bark tests (too cpu-intensive) Signed-off-by: Ettore Di Giacinto * cleanup * refinements * use intel base image * Makefile: Add docker targets * Change test model --------- Signed-off-by: Ettore Di Giacinto --- .github/workflows/image-pr.yml | 7 ++ .github/workflows/image.yml | 18 ++++ .github/workflows/image_build.yml | 6 ++ .github/workflows/test-extra.yml | 132 +++++++++++------------ Dockerfile | 29 +++-- Makefile | 67 ++++++------ api/api_test.go | 23 ++-- backend/go/llm/transformers/dolly.go | 44 -------- backend/go/llm/transformers/gpt2.go | 42 -------- backend/go/llm/transformers/gptj.go | 42 -------- backend/go/llm/transformers/gptneox.go | 42 -------- backend/go/llm/transformers/mpt.go | 42 -------- backend/go/llm/transformers/predict.go | 26 ----- backend/go/llm/transformers/replit.go | 42 -------- backend/go/llm/transformers/starcoder.go | 43 -------- entrypoint.sh | 4 - pkg/model/initializers.go | 10 -- tests/models_fixtures/config.yaml | 4 +- tests/models_fixtures/gpt4.yaml | 2 +- tests/models_fixtures/gpt4_2.yaml | 2 +- 20 files changed, 161 insertions(+), 466 deletions(-) delete mode 100644 backend/go/llm/transformers/dolly.go delete mode 100644 backend/go/llm/transformers/gpt2.go delete mode 100644 backend/go/llm/transformers/gptj.go delete mode 100644 backend/go/llm/transformers/gptneox.go delete mode 100644 backend/go/llm/transformers/mpt.go delete mode 100644 backend/go/llm/transformers/predict.go delete mode 100644 backend/go/llm/transformers/replit.go delete mode 100644 backend/go/llm/transformers/starcoder.go diff --git a/.github/workflows/image-pr.yml b/.github/workflows/image-pr.yml index 8dd699f5..ae8bd070 100644 --- a/.github/workflows/image-pr.yml +++ b/.github/workflows/image-pr.yml @@ -21,6 +21,7 @@ jobs: cuda-minor-version: ${{ matrix.cuda-minor-version }} platforms: ${{ matrix.platforms }} runs-on: ${{ matrix.runs-on }} + base-image: ${{ matrix.base-image }} secrets: dockerUsername: ${{ secrets.DOCKERHUB_USERNAME }} dockerPassword: ${{ secrets.DOCKERHUB_PASSWORD }} @@ -39,6 +40,7 @@ jobs: ffmpeg: 'true' image-type: 'extras' runs-on: 'arc-runner-set' + base-image: "ubuntu:22.04" - build-type: 'cublas' cuda-major-version: "12" cuda-minor-version: "1" @@ -48,6 +50,7 @@ jobs: ffmpeg: 'true' image-type: 'extras' runs-on: 'arc-runner-set' + base-image: "ubuntu:22.04" core-image-build: uses: ./.github/workflows/image_build.yml with: @@ -60,6 +63,7 @@ jobs: cuda-minor-version: ${{ matrix.cuda-minor-version }} platforms: ${{ matrix.platforms }} runs-on: ${{ matrix.runs-on }} + base-image: ${{ matrix.base-image }} secrets: dockerUsername: ${{ secrets.DOCKERHUB_USERNAME }} dockerPassword: ${{ secrets.DOCKERHUB_PASSWORD }} @@ -75,9 +79,11 @@ jobs: ffmpeg: 'true' image-type: 'core' runs-on: 'ubuntu-latest' + base-image: "ubuntu:22.04" - build-type: 'sycl_f16' platforms: 'linux/amd64' tag-latest: 'false' + base-image: "intel/oneapi-basekit:2024.0.1-devel-ubuntu22.04" tag-suffix: 'sycl-f16-ffmpeg-core' ffmpeg: 'true' image-type: 'core' @@ -91,3 +97,4 @@ jobs: ffmpeg: 'true' image-type: 'core' runs-on: 'ubuntu-latest' + base-image: "ubuntu:22.04" diff --git a/.github/workflows/image.yml b/.github/workflows/image.yml index c23cdabf..ac61deec 100644 --- a/.github/workflows/image.yml +++ b/.github/workflows/image.yml @@ -25,6 +25,7 @@ jobs: cuda-minor-version: ${{ matrix.cuda-minor-version }} platforms: ${{ matrix.platforms }} runs-on: ${{ matrix.runs-on }} + base-image: ${{ matrix.base-image }} secrets: dockerUsername: ${{ secrets.DOCKERHUB_USERNAME }} dockerPassword: ${{ secrets.DOCKERHUB_PASSWORD }} @@ -44,6 +45,7 @@ jobs: ffmpeg: '' image-type: 'extras' runs-on: 'arc-runner-set' + base-image: "ubuntu:22.04" - build-type: '' platforms: 'linux/amd64' tag-latest: 'false' @@ -51,6 +53,7 @@ jobs: ffmpeg: 'true' image-type: 'extras' runs-on: 'arc-runner-set' + base-image: "ubuntu:22.04" - build-type: 'cublas' cuda-major-version: "11" cuda-minor-version: "7" @@ -60,6 +63,7 @@ jobs: ffmpeg: '' image-type: 'extras' runs-on: 'arc-runner-set' + base-image: "ubuntu:22.04" - build-type: 'cublas' cuda-major-version: "12" cuda-minor-version: "1" @@ -69,6 +73,7 @@ jobs: ffmpeg: '' image-type: 'extras' runs-on: 'arc-runner-set' + base-image: "ubuntu:22.04" - build-type: 'cublas' cuda-major-version: "11" cuda-minor-version: "7" @@ -78,6 +83,7 @@ jobs: ffmpeg: 'true' image-type: 'extras' runs-on: 'arc-runner-set' + base-image: "ubuntu:22.04" - build-type: 'cublas' cuda-major-version: "12" cuda-minor-version: "1" @@ -87,6 +93,7 @@ jobs: ffmpeg: 'true' image-type: 'extras' runs-on: 'arc-runner-set' + base-image: "ubuntu:22.04" - build-type: '' #platforms: 'linux/amd64,linux/arm64' platforms: 'linux/amd64' @@ -94,6 +101,7 @@ jobs: tag-suffix: '' ffmpeg: '' image-type: 'extras' + base-image: "ubuntu:22.04" runs-on: 'arc-runner-set' core-image-build: uses: ./.github/workflows/image_build.yml @@ -107,6 +115,7 @@ jobs: cuda-minor-version: ${{ matrix.cuda-minor-version }} platforms: ${{ matrix.platforms }} runs-on: ${{ matrix.runs-on }} + base-image: ${{ matrix.base-image }} secrets: dockerUsername: ${{ secrets.DOCKERHUB_USERNAME }} dockerPassword: ${{ secrets.DOCKERHUB_PASSWORD }} @@ -121,10 +130,12 @@ jobs: tag-suffix: '-ffmpeg-core' ffmpeg: 'true' image-type: 'core' + base-image: "ubuntu:22.04" runs-on: 'ubuntu-latest' - build-type: 'sycl_f16' platforms: 'linux/amd64' tag-latest: 'false' + base-image: "intel/oneapi-basekit:2024.0.1-devel-ubuntu22.04" tag-suffix: '-sycl-f16-core' ffmpeg: 'false' image-type: 'core' @@ -132,6 +143,7 @@ jobs: - build-type: 'sycl_f32' platforms: 'linux/amd64' tag-latest: 'false' + base-image: "intel/oneapi-basekit:2024.0.1-devel-ubuntu22.04" tag-suffix: '-sycl-f32-core' ffmpeg: 'false' image-type: 'core' @@ -139,6 +151,7 @@ jobs: - build-type: 'sycl_f16' platforms: 'linux/amd64' tag-latest: 'false' + base-image: "intel/oneapi-basekit:2024.0.1-devel-ubuntu22.04" tag-suffix: '-sycl-f16-ffmpeg-core' ffmpeg: 'true' image-type: 'core' @@ -146,6 +159,7 @@ jobs: - build-type: 'sycl_f32' platforms: 'linux/amd64' tag-latest: 'false' + base-image: "intel/oneapi-basekit:2024.0.1-devel-ubuntu22.04" tag-suffix: '-sycl-f32-ffmpeg-core' ffmpeg: 'true' image-type: 'core' @@ -158,6 +172,7 @@ jobs: tag-suffix: '-cublas-cuda11-core' ffmpeg: '' image-type: 'core' + base-image: "ubuntu:22.04" runs-on: 'ubuntu-latest' - build-type: 'cublas' cuda-major-version: "12" @@ -167,6 +182,7 @@ jobs: tag-suffix: '-cublas-cuda12-core' ffmpeg: '' image-type: 'core' + base-image: "ubuntu:22.04" runs-on: 'ubuntu-latest' - build-type: 'cublas' cuda-major-version: "11" @@ -177,6 +193,7 @@ jobs: ffmpeg: 'true' image-type: 'core' runs-on: 'ubuntu-latest' + base-image: "ubuntu:22.04" - build-type: 'cublas' cuda-major-version: "12" cuda-minor-version: "1" @@ -186,3 +203,4 @@ jobs: ffmpeg: 'true' image-type: 'core' runs-on: 'ubuntu-latest' + base-image: "ubuntu:22.04" diff --git a/.github/workflows/image_build.yml b/.github/workflows/image_build.yml index ff9b751f..a45473b4 100644 --- a/.github/workflows/image_build.yml +++ b/.github/workflows/image_build.yml @@ -4,6 +4,11 @@ name: 'build container images (reusable)' on: workflow_call: inputs: + base-image: + description: 'Base image' + required: false + default: '' + type: string build-type: description: 'Build type' default: '' @@ -154,6 +159,7 @@ jobs: CUDA_MINOR_VERSION=${{ inputs.cuda-minor-version }} FFMPEG=${{ inputs.ffmpeg }} IMAGE_TYPE=${{ inputs.image-type }} + BASE_IMAGE=${{ inputs.base-image }} context: . file: ./Dockerfile platforms: ${{ inputs.platforms }} diff --git a/.github/workflows/test-extra.yml b/.github/workflows/test-extra.yml index b1ecec25..68da2c56 100644 --- a/.github/workflows/test-extra.yml +++ b/.github/workflows/test-extra.yml @@ -164,74 +164,74 @@ jobs: - tests-bark: - runs-on: ubuntu-latest - steps: - - name: Release space from worker - run: | - echo "Listing top largest packages" - pkgs=$(dpkg-query -Wf '${Installed-Size}\t${Package}\t${Status}\n' | awk '$NF == "installed"{print $1 "\t" $2}' | sort -nr) - head -n 30 <<< "${pkgs}" - echo - df -h - echo - sudo apt-get remove -y '^llvm-.*|^libllvm.*' || true - sudo apt-get remove --auto-remove android-sdk-platform-tools || true - sudo apt-get purge --auto-remove android-sdk-platform-tools || true - sudo rm -rf /usr/local/lib/android - sudo apt-get remove -y '^dotnet-.*|^aspnetcore-.*' || true - sudo rm -rf /usr/share/dotnet - sudo apt-get remove -y '^mono-.*' || true - sudo apt-get remove -y '^ghc-.*' || true - sudo apt-get remove -y '.*jdk.*|.*jre.*' || true - sudo apt-get remove -y 'php.*' || true - sudo apt-get remove -y hhvm powershell firefox monodoc-manual msbuild || true - sudo apt-get remove -y '^google-.*' || true - sudo apt-get remove -y azure-cli || true - sudo apt-get remove -y '^mongo.*-.*|^postgresql-.*|^mysql-.*|^mssql-.*' || true - sudo apt-get remove -y '^gfortran-.*' || true - sudo apt-get remove -y microsoft-edge-stable || true - sudo apt-get remove -y firefox || true - sudo apt-get remove -y powershell || true - sudo apt-get remove -y r-base-core || true - sudo apt-get autoremove -y - sudo apt-get clean - echo - echo "Listing top largest packages" - pkgs=$(dpkg-query -Wf '${Installed-Size}\t${Package}\t${Status}\n' | awk '$NF == "installed"{print $1 "\t" $2}' | sort -nr) - head -n 30 <<< "${pkgs}" - echo - sudo rm -rfv build || true - sudo rm -rf /usr/share/dotnet || true - sudo rm -rf /opt/ghc || true - sudo rm -rf "/usr/local/share/boost" || true - sudo rm -rf "$AGENT_TOOLSDIRECTORY" || true - df -h - - name: Clone - uses: actions/checkout@v4 - with: - submodules: true - - name: Dependencies - run: | - sudo apt-get update - sudo apt-get install build-essential ffmpeg - curl https://repo.anaconda.com/pkgs/misc/gpgkeys/anaconda.asc | gpg --dearmor > conda.gpg && \ - sudo install -o root -g root -m 644 conda.gpg /usr/share/keyrings/conda-archive-keyring.gpg && \ - gpg --keyring /usr/share/keyrings/conda-archive-keyring.gpg --no-default-keyring --fingerprint 34161F5BF5EB1D4BFBBB8F0A8AEB4F8B29D82806 && \ - sudo /bin/bash -c 'echo "deb [arch=amd64 signed-by=/usr/share/keyrings/conda-archive-keyring.gpg] https://repo.anaconda.com/pkgs/misc/debrepo/conda stable main" > /etc/apt/sources.list.d/conda.list' && \ - sudo /bin/bash -c 'echo "deb [arch=amd64 signed-by=/usr/share/keyrings/conda-archive-keyring.gpg] https://repo.anaconda.com/pkgs/misc/debrepo/conda stable main" | tee -a /etc/apt/sources.list.d/conda.list' && \ - sudo apt-get update && \ - sudo apt-get install -y conda - sudo apt-get install -y ca-certificates cmake curl patch - sudo apt-get install -y libopencv-dev && sudo ln -s /usr/include/opencv4/opencv2 /usr/include/opencv2 + # tests-bark: + # runs-on: ubuntu-latest + # steps: + # - name: Release space from worker + # run: | + # echo "Listing top largest packages" + # pkgs=$(dpkg-query -Wf '${Installed-Size}\t${Package}\t${Status}\n' | awk '$NF == "installed"{print $1 "\t" $2}' | sort -nr) + # head -n 30 <<< "${pkgs}" + # echo + # df -h + # echo + # sudo apt-get remove -y '^llvm-.*|^libllvm.*' || true + # sudo apt-get remove --auto-remove android-sdk-platform-tools || true + # sudo apt-get purge --auto-remove android-sdk-platform-tools || true + # sudo rm -rf /usr/local/lib/android + # sudo apt-get remove -y '^dotnet-.*|^aspnetcore-.*' || true + # sudo rm -rf /usr/share/dotnet + # sudo apt-get remove -y '^mono-.*' || true + # sudo apt-get remove -y '^ghc-.*' || true + # sudo apt-get remove -y '.*jdk.*|.*jre.*' || true + # sudo apt-get remove -y 'php.*' || true + # sudo apt-get remove -y hhvm powershell firefox monodoc-manual msbuild || true + # sudo apt-get remove -y '^google-.*' || true + # sudo apt-get remove -y azure-cli || true + # sudo apt-get remove -y '^mongo.*-.*|^postgresql-.*|^mysql-.*|^mssql-.*' || true + # sudo apt-get remove -y '^gfortran-.*' || true + # sudo apt-get remove -y microsoft-edge-stable || true + # sudo apt-get remove -y firefox || true + # sudo apt-get remove -y powershell || true + # sudo apt-get remove -y r-base-core || true + # sudo apt-get autoremove -y + # sudo apt-get clean + # echo + # echo "Listing top largest packages" + # pkgs=$(dpkg-query -Wf '${Installed-Size}\t${Package}\t${Status}\n' | awk '$NF == "installed"{print $1 "\t" $2}' | sort -nr) + # head -n 30 <<< "${pkgs}" + # echo + # sudo rm -rfv build || true + # sudo rm -rf /usr/share/dotnet || true + # sudo rm -rf /opt/ghc || true + # sudo rm -rf "/usr/local/share/boost" || true + # sudo rm -rf "$AGENT_TOOLSDIRECTORY" || true + # df -h + # - name: Clone + # uses: actions/checkout@v4 + # with: + # submodules: true + # - name: Dependencies + # run: | + # sudo apt-get update + # sudo apt-get install build-essential ffmpeg + # curl https://repo.anaconda.com/pkgs/misc/gpgkeys/anaconda.asc | gpg --dearmor > conda.gpg && \ + # sudo install -o root -g root -m 644 conda.gpg /usr/share/keyrings/conda-archive-keyring.gpg && \ + # gpg --keyring /usr/share/keyrings/conda-archive-keyring.gpg --no-default-keyring --fingerprint 34161F5BF5EB1D4BFBBB8F0A8AEB4F8B29D82806 && \ + # sudo /bin/bash -c 'echo "deb [arch=amd64 signed-by=/usr/share/keyrings/conda-archive-keyring.gpg] https://repo.anaconda.com/pkgs/misc/debrepo/conda stable main" > /etc/apt/sources.list.d/conda.list' && \ + # sudo /bin/bash -c 'echo "deb [arch=amd64 signed-by=/usr/share/keyrings/conda-archive-keyring.gpg] https://repo.anaconda.com/pkgs/misc/debrepo/conda stable main" | tee -a /etc/apt/sources.list.d/conda.list' && \ + # sudo apt-get update && \ + # sudo apt-get install -y conda + # sudo apt-get install -y ca-certificates cmake curl patch + # sudo apt-get install -y libopencv-dev && sudo ln -s /usr/include/opencv4/opencv2 /usr/include/opencv2 - sudo rm -rfv /usr/bin/conda || true + # sudo rm -rfv /usr/bin/conda || true - - name: Test bark - run: | - export PATH=$PATH:/opt/conda/bin - make -C backend/python/bark - make -C backend/python/bark test + # - name: Test bark + # run: | + # export PATH=$PATH:/opt/conda/bin + # make -C backend/python/bark + # make -C backend/python/bark test # Below tests needs GPU. Commented out for now diff --git a/Dockerfile b/Dockerfile index f81b5ee3..5cee6a23 100644 --- a/Dockerfile +++ b/Dockerfile @@ -1,9 +1,11 @@ -ARG GO_VERSION=1.21-bullseye +ARG GO_VERSION=1.21 ARG IMAGE_TYPE=extras +ARG BASE_IMAGE=ubuntu:22.04 + # extras or core +FROM ${BASE_IMAGE} as requirements-core -FROM golang:$GO_VERSION as requirements-core - +ARG GO_VERSION=1.21.7 ARG BUILD_TYPE ARG CUDA_MAJOR_VERSION=11 ARG CUDA_MINOR_VERSION=7 @@ -11,14 +13,17 @@ ARG TARGETARCH ARG TARGETVARIANT ENV BUILD_TYPE=${BUILD_TYPE} - +ENV DEBIAN_FRONTEND=noninteractive ENV EXTERNAL_GRPC_BACKENDS="coqui:/build/backend/python/coqui/run.sh,huggingface-embeddings:/build/backend/python/sentencetransformers/run.sh,petals:/build/backend/python/petals/run.sh,transformers:/build/backend/python/transformers/run.sh,sentencetransformers:/build/backend/python/sentencetransformers/run.sh,autogptq:/build/backend/python/autogptq/run.sh,bark:/build/backend/python/bark/run.sh,diffusers:/build/backend/python/diffusers/run.sh,exllama:/build/backend/python/exllama/run.sh,vall-e-x:/build/backend/python/vall-e-x/run.sh,vllm:/build/backend/python/vllm/run.sh,mamba:/build/backend/python/mamba/run.sh,exllama2:/build/backend/python/exllama2/run.sh,transformers-musicgen:/build/backend/python/transformers-musicgen/run.sh" ARG GO_TAGS="stablediffusion tinydream tts" RUN apt-get update && \ - apt-get install -y ca-certificates curl patch pip cmake && apt-get clean + apt-get install -y ca-certificates curl patch pip cmake git && apt-get clean +# Install Go +RUN curl -L -s https://go.dev/dl/go$GO_VERSION.linux-$TARGETARCH.tar.gz | tar -v -C /usr/local -xz +ENV PATH $PATH:/usr/local/go/bin COPY --chmod=644 custom-ca-certs/* /usr/local/share/ca-certificates/ RUN update-ca-certificates @@ -30,21 +35,13 @@ RUN echo "Target Variant: $TARGETVARIANT" # CuBLAS requirements RUN if [ "${BUILD_TYPE}" = "cublas" ]; then \ apt-get install -y software-properties-common && \ - apt-add-repository contrib && \ - curl -O https://developer.download.nvidia.com/compute/cuda/repos/debian11/x86_64/cuda-keyring_1.0-1_all.deb && \ - dpkg -i cuda-keyring_1.0-1_all.deb && \ - rm -f cuda-keyring_1.0-1_all.deb && \ + curl -O https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2204/x86_64/cuda-keyring_1.1-1_all.deb && \ + dpkg -i cuda-keyring_1.1-1_all.deb && \ + rm -f cuda-keyring_1.1-1_all.deb && \ apt-get update && \ apt-get install -y cuda-nvcc-${CUDA_MAJOR_VERSION}-${CUDA_MINOR_VERSION} libcublas-dev-${CUDA_MAJOR_VERSION}-${CUDA_MINOR_VERSION} libcusparse-dev-${CUDA_MAJOR_VERSION}-${CUDA_MINOR_VERSION} libcusolver-dev-${CUDA_MAJOR_VERSION}-${CUDA_MINOR_VERSION} && apt-get clean \ ; fi -# oneapi requirements -RUN if [ "${BUILD_TYPE}" = "sycl_f16" ] || [ "${BUILD_TYPE}" = "sycl_f32" ]; then \ - wget -q https://registrationcenter-download.intel.com/akdlm/IRC_NAS/163da6e4-56eb-4948-aba3-debcec61c064/l_BaseKit_p_2024.0.1.46_offline.sh && \ - sh ./l_BaseKit_p_2024.0.1.46_offline.sh -a -s --eula accept && \ - rm -rf l_BaseKit_p_2024.0.1.46_offline.sh \ - ; fi - ENV PATH /usr/local/cuda/bin:${PATH} # OpenBLAS requirements and stable diffusion diff --git a/Makefile b/Makefile index a6890759..51c941d4 100644 --- a/Makefile +++ b/Makefile @@ -14,9 +14,6 @@ CPPLLAMA_VERSION?=1cfb5372cf5707c8ec6dde7c874f4a44a6c4c915 GPT4ALL_REPO?=https://github.com/nomic-ai/gpt4all GPT4ALL_VERSION?=27a8b020c36b0df8f8b82a252d261cda47cf44b8 -# go-ggml-transformers version -GOGGMLTRANSFORMERS_VERSION?=ffb09d7dd71e2cbc6c5d7d05357d230eea6f369a - # go-rwkv version RWKV_REPO?=https://github.com/donomii/go-rwkv.cpp RWKV_VERSION?=633c5a3485c403cb2520693dc0991a25dace9f0f @@ -145,7 +142,16 @@ ifeq ($(findstring tts,$(GO_TAGS)),tts) OPTIONAL_GRPC+=backend-assets/grpc/piper endif -ALL_GRPC_BACKENDS=backend-assets/grpc/langchain-huggingface backend-assets/grpc/bert-embeddings backend-assets/grpc/llama backend-assets/grpc/llama-cpp backend-assets/grpc/llama-ggml backend-assets/grpc/gpt4all backend-assets/grpc/dolly backend-assets/grpc/gptj backend-assets/grpc/gptneox backend-assets/grpc/mpt backend-assets/grpc/replit backend-assets/grpc/rwkv backend-assets/grpc/whisper $(OPTIONAL_GRPC) +ALL_GRPC_BACKENDS=backend-assets/grpc/langchain-huggingface +ALL_GRPC_BACKENDS+=backend-assets/grpc/bert-embeddings +ALL_GRPC_BACKENDS+=backend-assets/grpc/llama +ALL_GRPC_BACKENDS+=backend-assets/grpc/llama-cpp +ALL_GRPC_BACKENDS+=backend-assets/grpc/llama-ggml +ALL_GRPC_BACKENDS+=backend-assets/grpc/gpt4all +ALL_GRPC_BACKENDS+=backend-assets/grpc/rwkv +ALL_GRPC_BACKENDS+=backend-assets/grpc/whisper +ALL_GRPC_BACKENDS+=$(OPTIONAL_GRPC) + GRPC_BACKENDS?=$(ALL_GRPC_BACKENDS) $(OPTIONAL_GRPC) # If empty, then we build all @@ -217,14 +223,6 @@ backend-assets/espeak-ng-data: sources/go-piper sources/gpt4all/gpt4all-bindings/golang/libgpt4all.a: sources/gpt4all $(MAKE) -C sources/gpt4all/gpt4all-bindings/golang/ libgpt4all.a -## CEREBRAS GPT -sources/go-ggml-transformers: - git clone --recurse-submodules https://github.com/go-skynet/go-ggml-transformers.cpp sources/go-ggml-transformers - cd sources/go-ggml-transformers && git checkout -b build $(GOGPT2_VERSION) && git submodule update --init --recursive --depth 1 - -sources/go-ggml-transformers/libtransformers.a: sources/go-ggml-transformers - $(MAKE) -C sources/go-ggml-transformers BUILD_TYPE=$(BUILD_TYPE) libtransformers.a - sources/whisper.cpp: git clone https://github.com/ggerganov/whisper.cpp.git sources/whisper.cpp cd sources/whisper.cpp && git checkout -b build $(WHISPER_CPP_VERSION) && git submodule update --init --recursive --depth 1 @@ -252,12 +250,11 @@ sources/go-piper/libpiper_binding.a: sources/go-piper backend/cpp/llama/llama.cpp: LLAMA_VERSION=$(CPPLLAMA_VERSION) $(MAKE) -C backend/cpp/llama llama.cpp -get-sources: backend/cpp/llama/llama.cpp sources/go-llama sources/go-llama-ggml sources/go-ggml-transformers sources/gpt4all sources/go-piper sources/go-rwkv sources/whisper.cpp sources/go-bert sources/go-stable-diffusion sources/go-tiny-dream +get-sources: backend/cpp/llama/llama.cpp sources/go-llama sources/go-llama-ggml sources/gpt4all sources/go-piper sources/go-rwkv sources/whisper.cpp sources/go-bert sources/go-stable-diffusion sources/go-tiny-dream touch $@ replace: $(GOCMD) mod edit -replace github.com/nomic-ai/gpt4all/gpt4all-bindings/golang=$(CURDIR)/sources/gpt4all/gpt4all-bindings/golang - $(GOCMD) mod edit -replace github.com/go-skynet/go-ggml-transformers.cpp=$(CURDIR)/sources/go-ggml-transformers $(GOCMD) mod edit -replace github.com/donomii/go-rwkv.cpp=$(CURDIR)/sources/go-rwkv $(GOCMD) mod edit -replace github.com/ggerganov/whisper.cpp=$(CURDIR)/sources/whisper.cpp $(GOCMD) mod edit -replace github.com/ggerganov/whisper.cpp/bindings/go=$(CURDIR)/sources/whisper.cpp/bindings/go @@ -276,7 +273,6 @@ rebuild: ## Rebuilds the project $(MAKE) -C sources/go-llama clean $(MAKE) -C sources/go-llama-ggml clean $(MAKE) -C sources/gpt4all/gpt4all-bindings/golang/ clean - $(MAKE) -C sources/go-ggml-transformers clean $(MAKE) -C sources/go-rwkv clean $(MAKE) -C sources/whisper.cpp clean $(MAKE) -C sources/go-stable-diffusion clean @@ -321,7 +317,7 @@ run: prepare ## run local-ai test-models/testmodel: mkdir test-models mkdir test-dir - wget -q https://huggingface.co/nnakasato/ggml-model-test/resolve/main/ggml-model-q4.bin -O test-models/testmodel + wget -q https://huggingface.co/TheBloke/orca_mini_3B-GGML/resolve/main/orca-mini-3b.ggmlv3.q4_0.bin -O test-models/testmodel wget -q https://huggingface.co/ggerganov/whisper.cpp/resolve/main/ggml-base.en.bin -O test-models/whisper-en wget -q https://huggingface.co/mudler/all-MiniLM-L6-v2/resolve/main/ggml-model-q4_0.bin -O test-models/bert wget -q https://cdn.openai.com/whisper/draft-20220913a/micro-machines.wav -O test-dir/audio.wav @@ -505,26 +501,6 @@ backend-assets/grpc/gpt4all: backend-assets/grpc backend-assets/gpt4all sources/ CGO_LDFLAGS="$(CGO_LDFLAGS)" C_INCLUDE_PATH=$(CURDIR)/sources/gpt4all/gpt4all-bindings/golang/ LIBRARY_PATH=$(CURDIR)/sources/gpt4all/gpt4all-bindings/golang/ \ $(GOCMD) build -ldflags "$(LD_FLAGS)" -tags "$(GO_TAGS)" -o backend-assets/grpc/gpt4all ./backend/go/llm/gpt4all/ -backend-assets/grpc/dolly: backend-assets/grpc sources/go-ggml-transformers/libtransformers.a - CGO_LDFLAGS="$(CGO_LDFLAGS)" C_INCLUDE_PATH=$(CURDIR)/sources/go-ggml-transformers LIBRARY_PATH=$(CURDIR)/sources/go-ggml-transformers \ - $(GOCMD) build -ldflags "$(LD_FLAGS)" -tags "$(GO_TAGS)" -o backend-assets/grpc/dolly ./backend/go/llm/dolly/ - -backend-assets/grpc/gptj: backend-assets/grpc sources/go-ggml-transformers/libtransformers.a - CGO_LDFLAGS="$(CGO_LDFLAGS)" C_INCLUDE_PATH=$(CURDIR)/sources/go-ggml-transformers LIBRARY_PATH=$(CURDIR)/sources/go-ggml-transformers \ - $(GOCMD) build -ldflags "$(LD_FLAGS)" -tags "$(GO_TAGS)" -o backend-assets/grpc/gptj ./backend/go/llm/gptj/ - -backend-assets/grpc/gptneox: backend-assets/grpc sources/go-ggml-transformers/libtransformers.a - CGO_LDFLAGS="$(CGO_LDFLAGS)" C_INCLUDE_PATH=$(CURDIR)/sources/go-ggml-transformers LIBRARY_PATH=$(CURDIR)/sources/go-ggml-transformers \ - $(GOCMD) build -ldflags "$(LD_FLAGS)" -tags "$(GO_TAGS)" -o backend-assets/grpc/gptneox ./backend/go/llm/gptneox/ - -backend-assets/grpc/mpt: backend-assets/grpc sources/go-ggml-transformers/libtransformers.a - CGO_LDFLAGS="$(CGO_LDFLAGS)" C_INCLUDE_PATH=$(CURDIR)/sources/go-ggml-transformers LIBRARY_PATH=$(CURDIR)/sources/go-ggml-transformers \ - $(GOCMD) build -ldflags "$(LD_FLAGS)" -tags "$(GO_TAGS)" -o backend-assets/grpc/mpt ./backend/go/llm/mpt/ - -backend-assets/grpc/replit: backend-assets/grpc sources/go-ggml-transformers/libtransformers.a - CGO_LDFLAGS="$(CGO_LDFLAGS)" C_INCLUDE_PATH=$(CURDIR)/sources/go-ggml-transformers LIBRARY_PATH=$(CURDIR)/sources/go-ggml-transformers \ - $(GOCMD) build -ldflags "$(LD_FLAGS)" -tags "$(GO_TAGS)" -o backend-assets/grpc/replit ./backend/go/llm/replit/ - backend-assets/grpc/rwkv: backend-assets/grpc sources/go-rwkv/librwkv.a CGO_LDFLAGS="$(CGO_LDFLAGS)" C_INCLUDE_PATH=$(CURDIR)/sources/go-rwkv LIBRARY_PATH=$(CURDIR)/sources/go-rwkv \ $(GOCMD) build -ldflags "$(LD_FLAGS)" -tags "$(GO_TAGS)" -o backend-assets/grpc/rwkv ./backend/go/llm/rwkv @@ -556,3 +532,22 @@ backend-assets/grpc/whisper: backend-assets/grpc sources/whisper.cpp/libwhisper. $(GOCMD) build -ldflags "$(LD_FLAGS)" -tags "$(GO_TAGS)" -o backend-assets/grpc/whisper ./backend/go/transcribe/ grpcs: prepare $(GRPC_BACKENDS) + +DOCKER_IMAGE?=local-ai +IMAGE_TYPE?=core +BASE_IMAGE?=ubuntu:22.04 + +docker: + docker build \ + --build-arg BASE_IMAGE=$(BASE_IMAGE) \ + --build-arg IMAGE_TYPE=$(IMAGE_TYPE) \ + --build-arg GO_TAGS=$(GO_TAGS) \ + --build-arg BUILD_TYPE=$(BUILD_TYPE) \ + -t $(DOCKER_IMAGE) . + +docker-image-intel: + docker build \ + --build-arg BASE_IMAGE=intel/oneapi-basekit:2024.0.1-devel-ubuntu22.04 \ + --build-arg IMAGE_TYPE=$(IMAGE_TYPE) \ + --build-arg GO_TAGS="none" \ + --build-arg BUILD_TYPE=sycl_f16 -t $(DOCKER_IMAGE) . \ No newline at end of file diff --git a/api/api_test.go b/api/api_test.go index 491a56b5..04d2d6fe 100644 --- a/api/api_test.go +++ b/api/api_test.go @@ -29,6 +29,15 @@ import ( "github.com/sashabaranov/go-openai/jsonschema" ) +const testPrompt = `### System: +You are an AI assistant that follows instruction extremely well. Help as much as you can. + +### User: + +Can you help rephrasing sentences? + +### Response:` + type modelApplyRequest struct { ID string `json:"id"` URL string `json:"url"` @@ -629,28 +638,28 @@ var _ = Describe("API test", func() { Expect(len(models.Models)).To(Equal(6)) // If "config.yaml" should be included, this should be 8? }) It("can generate completions", func() { - resp, err := client.CreateCompletion(context.TODO(), openai.CompletionRequest{Model: "testmodel", Prompt: "abcdedfghikl"}) + resp, err := client.CreateCompletion(context.TODO(), openai.CompletionRequest{Model: "testmodel", Prompt: testPrompt}) Expect(err).ToNot(HaveOccurred()) Expect(len(resp.Choices)).To(Equal(1)) Expect(resp.Choices[0].Text).ToNot(BeEmpty()) }) It("can generate chat completions ", func() { - resp, err := client.CreateChatCompletion(context.TODO(), openai.ChatCompletionRequest{Model: "testmodel", Messages: []openai.ChatCompletionMessage{openai.ChatCompletionMessage{Role: "user", Content: "abcdedfghikl"}}}) + resp, err := client.CreateChatCompletion(context.TODO(), openai.ChatCompletionRequest{Model: "testmodel", Messages: []openai.ChatCompletionMessage{openai.ChatCompletionMessage{Role: "user", Content: testPrompt}}}) Expect(err).ToNot(HaveOccurred()) Expect(len(resp.Choices)).To(Equal(1)) Expect(resp.Choices[0].Message.Content).ToNot(BeEmpty()) }) It("can generate completions from model configs", func() { - resp, err := client.CreateCompletion(context.TODO(), openai.CompletionRequest{Model: "gpt4all", Prompt: "abcdedfghikl"}) + resp, err := client.CreateCompletion(context.TODO(), openai.CompletionRequest{Model: "gpt4all", Prompt: testPrompt}) Expect(err).ToNot(HaveOccurred()) Expect(len(resp.Choices)).To(Equal(1)) Expect(resp.Choices[0].Text).ToNot(BeEmpty()) }) It("can generate chat completions from model configs", func() { - resp, err := client.CreateChatCompletion(context.TODO(), openai.ChatCompletionRequest{Model: "gpt4all-2", Messages: []openai.ChatCompletionMessage{openai.ChatCompletionMessage{Role: "user", Content: "abcdedfghikl"}}}) + resp, err := client.CreateChatCompletion(context.TODO(), openai.ChatCompletionRequest{Model: "gpt4all-2", Messages: []openai.ChatCompletionMessage{openai.ChatCompletionMessage{Role: "user", Content: testPrompt}}}) Expect(err).ToNot(HaveOccurred()) Expect(len(resp.Choices)).To(Equal(1)) Expect(resp.Choices[0].Message.Content).ToNot(BeEmpty()) @@ -658,7 +667,7 @@ var _ = Describe("API test", func() { It("returns errors", func() { backends := len(model.AutoLoadBackends) + 1 // +1 for huggingface - _, err := client.CreateCompletion(context.TODO(), openai.CompletionRequest{Model: "foomodel", Prompt: "abcdedfghikl"}) + _, err := client.CreateCompletion(context.TODO(), openai.CompletionRequest{Model: "foomodel", Prompt: testPrompt}) Expect(err).To(HaveOccurred()) Expect(err.Error()).To(ContainSubstring(fmt.Sprintf("error, status code: 500, message: could not load model - all backends returned error: %d errors occurred:", backends))) }) @@ -834,13 +843,13 @@ var _ = Describe("API test", func() { app.Shutdown() }) It("can generate chat completions from config file (list1)", func() { - resp, err := client.CreateChatCompletion(context.TODO(), openai.ChatCompletionRequest{Model: "list1", Messages: []openai.ChatCompletionMessage{{Role: "user", Content: "abcdedfghikl"}}}) + resp, err := client.CreateChatCompletion(context.TODO(), openai.ChatCompletionRequest{Model: "list1", Messages: []openai.ChatCompletionMessage{{Role: "user", Content: testPrompt}}}) Expect(err).ToNot(HaveOccurred()) Expect(len(resp.Choices)).To(Equal(1)) Expect(resp.Choices[0].Message.Content).ToNot(BeEmpty()) }) It("can generate chat completions from config file (list2)", func() { - resp, err := client.CreateChatCompletion(context.TODO(), openai.ChatCompletionRequest{Model: "list2", Messages: []openai.ChatCompletionMessage{{Role: "user", Content: "abcdedfghikl"}}}) + resp, err := client.CreateChatCompletion(context.TODO(), openai.ChatCompletionRequest{Model: "list2", Messages: []openai.ChatCompletionMessage{{Role: "user", Content: testPrompt}}}) Expect(err).ToNot(HaveOccurred()) Expect(len(resp.Choices)).To(Equal(1)) Expect(resp.Choices[0].Message.Content).ToNot(BeEmpty()) diff --git a/backend/go/llm/transformers/dolly.go b/backend/go/llm/transformers/dolly.go deleted file mode 100644 index b3579b04..00000000 --- a/backend/go/llm/transformers/dolly.go +++ /dev/null @@ -1,44 +0,0 @@ -package transformers - -// This is a wrapper to statisfy the GRPC service interface -// It is meant to be used by the main executable that is the server for the specific backend type (falcon, gpt3, etc) -import ( - "fmt" - - "github.com/go-skynet/LocalAI/pkg/grpc/base" - pb "github.com/go-skynet/LocalAI/pkg/grpc/proto" - - transformers "github.com/go-skynet/go-ggml-transformers.cpp" -) - -type Dolly struct { - base.SingleThread - - dolly *transformers.Dolly -} - -func (llm *Dolly) Load(opts *pb.ModelOptions) error { - model, err := transformers.NewDolly(opts.ModelFile) - llm.dolly = model - return err -} - -func (llm *Dolly) Predict(opts *pb.PredictOptions) (string, error) { - return llm.dolly.Predict(opts.Prompt, buildPredictOptions(opts)...) -} - -// fallback to Predict -func (llm *Dolly) PredictStream(opts *pb.PredictOptions, results chan string) error { - - go func() { - res, err := llm.dolly.Predict(opts.Prompt, buildPredictOptions(opts)...) - - if err != nil { - fmt.Println("err: ", err) - } - results <- res - close(results) - }() - - return nil -} diff --git a/backend/go/llm/transformers/gpt2.go b/backend/go/llm/transformers/gpt2.go deleted file mode 100644 index ab162a76..00000000 --- a/backend/go/llm/transformers/gpt2.go +++ /dev/null @@ -1,42 +0,0 @@ -package transformers - -// This is a wrapper to statisfy the GRPC service interface -// It is meant to be used by the main executable that is the server for the specific backend type (falcon, gpt3, etc) -import ( - "fmt" - - "github.com/go-skynet/LocalAI/pkg/grpc/base" - pb "github.com/go-skynet/LocalAI/pkg/grpc/proto" - - transformers "github.com/go-skynet/go-ggml-transformers.cpp" -) - -type GPT2 struct { - base.SingleThread - - gpt2 *transformers.GPT2 -} - -func (llm *GPT2) Load(opts *pb.ModelOptions) error { - model, err := transformers.New(opts.ModelFile) - llm.gpt2 = model - return err -} - -func (llm *GPT2) Predict(opts *pb.PredictOptions) (string, error) { - return llm.gpt2.Predict(opts.Prompt, buildPredictOptions(opts)...) -} - -// fallback to Predict -func (llm *GPT2) PredictStream(opts *pb.PredictOptions, results chan string) error { - go func() { - res, err := llm.gpt2.Predict(opts.Prompt, buildPredictOptions(opts)...) - - if err != nil { - fmt.Println("err: ", err) - } - results <- res - close(results) - }() - return nil -} diff --git a/backend/go/llm/transformers/gptj.go b/backend/go/llm/transformers/gptj.go deleted file mode 100644 index f00f1044..00000000 --- a/backend/go/llm/transformers/gptj.go +++ /dev/null @@ -1,42 +0,0 @@ -package transformers - -// This is a wrapper to statisfy the GRPC service interface -// It is meant to be used by the main executable that is the server for the specific backend type (falcon, gpt3, etc) -import ( - "fmt" - - "github.com/go-skynet/LocalAI/pkg/grpc/base" - pb "github.com/go-skynet/LocalAI/pkg/grpc/proto" - - transformers "github.com/go-skynet/go-ggml-transformers.cpp" -) - -type GPTJ struct { - base.SingleThread - - gptj *transformers.GPTJ -} - -func (llm *GPTJ) Load(opts *pb.ModelOptions) error { - model, err := transformers.NewGPTJ(opts.ModelFile) - llm.gptj = model - return err -} - -func (llm *GPTJ) Predict(opts *pb.PredictOptions) (string, error) { - return llm.gptj.Predict(opts.Prompt, buildPredictOptions(opts)...) -} - -// fallback to Predict -func (llm *GPTJ) PredictStream(opts *pb.PredictOptions, results chan string) error { - go func() { - res, err := llm.gptj.Predict(opts.Prompt, buildPredictOptions(opts)...) - - if err != nil { - fmt.Println("err: ", err) - } - results <- res - close(results) - }() - return nil -} diff --git a/backend/go/llm/transformers/gptneox.go b/backend/go/llm/transformers/gptneox.go deleted file mode 100644 index a06d910e..00000000 --- a/backend/go/llm/transformers/gptneox.go +++ /dev/null @@ -1,42 +0,0 @@ -package transformers - -// This is a wrapper to statisfy the GRPC service interface -// It is meant to be used by the main executable that is the server for the specific backend type (falcon, gpt3, etc) -import ( - "fmt" - - "github.com/go-skynet/LocalAI/pkg/grpc/base" - pb "github.com/go-skynet/LocalAI/pkg/grpc/proto" - - transformers "github.com/go-skynet/go-ggml-transformers.cpp" -) - -type GPTNeoX struct { - base.SingleThread - - gptneox *transformers.GPTNeoX -} - -func (llm *GPTNeoX) Load(opts *pb.ModelOptions) error { - model, err := transformers.NewGPTNeoX(opts.ModelFile) - llm.gptneox = model - return err -} - -func (llm *GPTNeoX) Predict(opts *pb.PredictOptions) (string, error) { - return llm.gptneox.Predict(opts.Prompt, buildPredictOptions(opts)...) -} - -// fallback to Predict -func (llm *GPTNeoX) PredictStream(opts *pb.PredictOptions, results chan string) error { - go func() { - res, err := llm.gptneox.Predict(opts.Prompt, buildPredictOptions(opts)...) - - if err != nil { - fmt.Println("err: ", err) - } - results <- res - close(results) - }() - return nil -} diff --git a/backend/go/llm/transformers/mpt.go b/backend/go/llm/transformers/mpt.go deleted file mode 100644 index f6e0a143..00000000 --- a/backend/go/llm/transformers/mpt.go +++ /dev/null @@ -1,42 +0,0 @@ -package transformers - -// This is a wrapper to statisfy the GRPC service interface -// It is meant to be used by the main executable that is the server for the specific backend type (falcon, gpt3, etc) -import ( - "fmt" - - "github.com/go-skynet/LocalAI/pkg/grpc/base" - pb "github.com/go-skynet/LocalAI/pkg/grpc/proto" - - transformers "github.com/go-skynet/go-ggml-transformers.cpp" -) - -type MPT struct { - base.SingleThread - - mpt *transformers.MPT -} - -func (llm *MPT) Load(opts *pb.ModelOptions) error { - model, err := transformers.NewMPT(opts.ModelFile) - llm.mpt = model - return err -} - -func (llm *MPT) Predict(opts *pb.PredictOptions) (string, error) { - return llm.mpt.Predict(opts.Prompt, buildPredictOptions(opts)...) -} - -// fallback to Predict -func (llm *MPT) PredictStream(opts *pb.PredictOptions, results chan string) error { - go func() { - res, err := llm.mpt.Predict(opts.Prompt, buildPredictOptions(opts)...) - - if err != nil { - fmt.Println("err: ", err) - } - results <- res - close(results) - }() - return nil -} diff --git a/backend/go/llm/transformers/predict.go b/backend/go/llm/transformers/predict.go deleted file mode 100644 index 861d1196..00000000 --- a/backend/go/llm/transformers/predict.go +++ /dev/null @@ -1,26 +0,0 @@ -package transformers - -import ( - pb "github.com/go-skynet/LocalAI/pkg/grpc/proto" - transformers "github.com/go-skynet/go-ggml-transformers.cpp" -) - -func buildPredictOptions(opts *pb.PredictOptions) []transformers.PredictOption { - predictOptions := []transformers.PredictOption{ - transformers.SetTemperature(float64(opts.Temperature)), - transformers.SetTopP(float64(opts.TopP)), - transformers.SetTopK(int(opts.TopK)), - transformers.SetTokens(int(opts.Tokens)), - transformers.SetThreads(int(opts.Threads)), - } - - if opts.Batch != 0 { - predictOptions = append(predictOptions, transformers.SetBatch(int(opts.Batch))) - } - - if opts.Seed != 0 { - predictOptions = append(predictOptions, transformers.SetSeed(int(opts.Seed))) - } - - return predictOptions -} diff --git a/backend/go/llm/transformers/replit.go b/backend/go/llm/transformers/replit.go deleted file mode 100644 index a979edcb..00000000 --- a/backend/go/llm/transformers/replit.go +++ /dev/null @@ -1,42 +0,0 @@ -package transformers - -// This is a wrapper to statisfy the GRPC service interface -// It is meant to be used by the main executable that is the server for the specific backend type (falcon, gpt3, etc) -import ( - "fmt" - - "github.com/go-skynet/LocalAI/pkg/grpc/base" - pb "github.com/go-skynet/LocalAI/pkg/grpc/proto" - - transformers "github.com/go-skynet/go-ggml-transformers.cpp" -) - -type Replit struct { - base.SingleThread - - replit *transformers.Replit -} - -func (llm *Replit) Load(opts *pb.ModelOptions) error { - model, err := transformers.NewReplit(opts.ModelFile) - llm.replit = model - return err -} - -func (llm *Replit) Predict(opts *pb.PredictOptions) (string, error) { - return llm.replit.Predict(opts.Prompt, buildPredictOptions(opts)...) -} - -// fallback to Predict -func (llm *Replit) PredictStream(opts *pb.PredictOptions, results chan string) error { - go func() { - res, err := llm.replit.Predict(opts.Prompt, buildPredictOptions(opts)...) - - if err != nil { - fmt.Println("err: ", err) - } - results <- res - close(results) - }() - return nil -} diff --git a/backend/go/llm/transformers/starcoder.go b/backend/go/llm/transformers/starcoder.go deleted file mode 100644 index 25a758a0..00000000 --- a/backend/go/llm/transformers/starcoder.go +++ /dev/null @@ -1,43 +0,0 @@ -package transformers - -// This is a wrapper to statisfy the GRPC service interface -// It is meant to be used by the main executable that is the server for the specific backend type (falcon, gpt3, etc) -import ( - "fmt" - - "github.com/go-skynet/LocalAI/pkg/grpc/base" - pb "github.com/go-skynet/LocalAI/pkg/grpc/proto" - - transformers "github.com/go-skynet/go-ggml-transformers.cpp" -) - -type Starcoder struct { - base.SingleThread - - starcoder *transformers.Starcoder -} - -func (llm *Starcoder) Load(opts *pb.ModelOptions) error { - model, err := transformers.NewStarcoder(opts.ModelFile) - llm.starcoder = model - return err -} - -func (llm *Starcoder) Predict(opts *pb.PredictOptions) (string, error) { - return llm.starcoder.Predict(opts.Prompt, buildPredictOptions(opts)...) -} - -// fallback to Predict -func (llm *Starcoder) PredictStream(opts *pb.PredictOptions, results chan string) error { - go func() { - res, err := llm.starcoder.Predict(opts.Prompt, buildPredictOptions(opts)...) - - if err != nil { - fmt.Println("err: ", err) - } - results <- res - close(results) - }() - - return nil -} diff --git a/entrypoint.sh b/entrypoint.sh index ae1976af..05f67128 100755 --- a/entrypoint.sh +++ b/entrypoint.sh @@ -13,10 +13,6 @@ if [ -n "$EXTRA_BACKENDS" ]; then done fi -if [ -e "/opt/intel/oneapi/setvars.sh" ]; then - source /opt/intel/oneapi/setvars.sh -fi - if [ "$REBUILD" != "false" ]; then rm -rf ./local-ai make build -j${BUILD_PARALLELISM:-1} diff --git a/pkg/model/initializers.go b/pkg/model/initializers.go index df0aaf2f..fce44fe1 100644 --- a/pkg/model/initializers.go +++ b/pkg/model/initializers.go @@ -23,11 +23,6 @@ const ( GoLlamaBackend = "llama" LlamaGGML = "llama-ggml" LLamaCPP = "llama-cpp" - GPTJBackend = "gptj" - DollyBackend = "dolly" - MPTBackend = "mpt" - GPTNeoXBackend = "gptneox" - ReplitBackend = "replit" Gpt4AllLlamaBackend = "gpt4all-llama" Gpt4AllMptBackend = "gpt4all-mpt" Gpt4AllJBackend = "gpt4all-j" @@ -50,12 +45,7 @@ var AutoLoadBackends []string = []string{ LlamaGGML, GoLlamaBackend, Gpt4All, - GPTNeoXBackend, BertEmbeddingsBackend, - GPTJBackend, - DollyBackend, - MPTBackend, - ReplitBackend, RwkvBackend, WhisperBackend, StableDiffusionBackend, diff --git a/tests/models_fixtures/config.yaml b/tests/models_fixtures/config.yaml index 3deabf9d..749d1699 100644 --- a/tests/models_fixtures/config.yaml +++ b/tests/models_fixtures/config.yaml @@ -4,7 +4,7 @@ top_p: 80 top_k: 0.9 temperature: 0.1 - context_size: 10 + context_size: 200 stopwords: - "HUMAN:" - "### Response:" @@ -20,7 +20,7 @@ top_k: 0.9 temperature: 0.1 model: testmodel - context_size: 10 + context_size: 200 stopwords: - "HUMAN:" - "### Response:" diff --git a/tests/models_fixtures/gpt4.yaml b/tests/models_fixtures/gpt4.yaml index 77b72b30..652a407c 100644 --- a/tests/models_fixtures/gpt4.yaml +++ b/tests/models_fixtures/gpt4.yaml @@ -4,7 +4,7 @@ parameters: top_p: 80 top_k: 0.9 temperature: 0.1 -context_size: 10 +context_size: 200 stopwords: - "HUMAN:" - "### Response:" diff --git a/tests/models_fixtures/gpt4_2.yaml b/tests/models_fixtures/gpt4_2.yaml index 62d9fdbc..904693ca 100644 --- a/tests/models_fixtures/gpt4_2.yaml +++ b/tests/models_fixtures/gpt4_2.yaml @@ -4,7 +4,7 @@ parameters: top_p: 80 top_k: 0.9 temperature: 0.1 -context_size: 10 +context_size: 200 stopwords: - "HUMAN:" - "### Response:"