mirror of
https://github.com/mudler/LocalAI.git
synced 2024-12-18 20:27:57 +00:00
feat: Use ubuntu as base for container images, drop deprecated ggml-transformers backends (#1689)
* cleanup backends * switch image to ubuntu 22.04 * adapt commands for ubuntu * transformers cleanup * no contrib on ubuntu * Change test model to gguf * ci: disable bark tests (too cpu-intensive) Signed-off-by: Ettore Di Giacinto <mudler@localai.io> * cleanup * refinements * use intel base image * Makefile: Add docker targets * Change test model --------- Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
This commit is contained in:
parent
d0a6a35b55
commit
ddd21f1644
7
.github/workflows/image-pr.yml
vendored
7
.github/workflows/image-pr.yml
vendored
@ -21,6 +21,7 @@ jobs:
|
|||||||
cuda-minor-version: ${{ matrix.cuda-minor-version }}
|
cuda-minor-version: ${{ matrix.cuda-minor-version }}
|
||||||
platforms: ${{ matrix.platforms }}
|
platforms: ${{ matrix.platforms }}
|
||||||
runs-on: ${{ matrix.runs-on }}
|
runs-on: ${{ matrix.runs-on }}
|
||||||
|
base-image: ${{ matrix.base-image }}
|
||||||
secrets:
|
secrets:
|
||||||
dockerUsername: ${{ secrets.DOCKERHUB_USERNAME }}
|
dockerUsername: ${{ secrets.DOCKERHUB_USERNAME }}
|
||||||
dockerPassword: ${{ secrets.DOCKERHUB_PASSWORD }}
|
dockerPassword: ${{ secrets.DOCKERHUB_PASSWORD }}
|
||||||
@ -39,6 +40,7 @@ jobs:
|
|||||||
ffmpeg: 'true'
|
ffmpeg: 'true'
|
||||||
image-type: 'extras'
|
image-type: 'extras'
|
||||||
runs-on: 'arc-runner-set'
|
runs-on: 'arc-runner-set'
|
||||||
|
base-image: "ubuntu:22.04"
|
||||||
- build-type: 'cublas'
|
- build-type: 'cublas'
|
||||||
cuda-major-version: "12"
|
cuda-major-version: "12"
|
||||||
cuda-minor-version: "1"
|
cuda-minor-version: "1"
|
||||||
@ -48,6 +50,7 @@ jobs:
|
|||||||
ffmpeg: 'true'
|
ffmpeg: 'true'
|
||||||
image-type: 'extras'
|
image-type: 'extras'
|
||||||
runs-on: 'arc-runner-set'
|
runs-on: 'arc-runner-set'
|
||||||
|
base-image: "ubuntu:22.04"
|
||||||
core-image-build:
|
core-image-build:
|
||||||
uses: ./.github/workflows/image_build.yml
|
uses: ./.github/workflows/image_build.yml
|
||||||
with:
|
with:
|
||||||
@ -60,6 +63,7 @@ jobs:
|
|||||||
cuda-minor-version: ${{ matrix.cuda-minor-version }}
|
cuda-minor-version: ${{ matrix.cuda-minor-version }}
|
||||||
platforms: ${{ matrix.platforms }}
|
platforms: ${{ matrix.platforms }}
|
||||||
runs-on: ${{ matrix.runs-on }}
|
runs-on: ${{ matrix.runs-on }}
|
||||||
|
base-image: ${{ matrix.base-image }}
|
||||||
secrets:
|
secrets:
|
||||||
dockerUsername: ${{ secrets.DOCKERHUB_USERNAME }}
|
dockerUsername: ${{ secrets.DOCKERHUB_USERNAME }}
|
||||||
dockerPassword: ${{ secrets.DOCKERHUB_PASSWORD }}
|
dockerPassword: ${{ secrets.DOCKERHUB_PASSWORD }}
|
||||||
@ -75,9 +79,11 @@ jobs:
|
|||||||
ffmpeg: 'true'
|
ffmpeg: 'true'
|
||||||
image-type: 'core'
|
image-type: 'core'
|
||||||
runs-on: 'ubuntu-latest'
|
runs-on: 'ubuntu-latest'
|
||||||
|
base-image: "ubuntu:22.04"
|
||||||
- build-type: 'sycl_f16'
|
- build-type: 'sycl_f16'
|
||||||
platforms: 'linux/amd64'
|
platforms: 'linux/amd64'
|
||||||
tag-latest: 'false'
|
tag-latest: 'false'
|
||||||
|
base-image: "intel/oneapi-basekit:2024.0.1-devel-ubuntu22.04"
|
||||||
tag-suffix: 'sycl-f16-ffmpeg-core'
|
tag-suffix: 'sycl-f16-ffmpeg-core'
|
||||||
ffmpeg: 'true'
|
ffmpeg: 'true'
|
||||||
image-type: 'core'
|
image-type: 'core'
|
||||||
@ -91,3 +97,4 @@ jobs:
|
|||||||
ffmpeg: 'true'
|
ffmpeg: 'true'
|
||||||
image-type: 'core'
|
image-type: 'core'
|
||||||
runs-on: 'ubuntu-latest'
|
runs-on: 'ubuntu-latest'
|
||||||
|
base-image: "ubuntu:22.04"
|
||||||
|
18
.github/workflows/image.yml
vendored
18
.github/workflows/image.yml
vendored
@ -25,6 +25,7 @@ jobs:
|
|||||||
cuda-minor-version: ${{ matrix.cuda-minor-version }}
|
cuda-minor-version: ${{ matrix.cuda-minor-version }}
|
||||||
platforms: ${{ matrix.platforms }}
|
platforms: ${{ matrix.platforms }}
|
||||||
runs-on: ${{ matrix.runs-on }}
|
runs-on: ${{ matrix.runs-on }}
|
||||||
|
base-image: ${{ matrix.base-image }}
|
||||||
secrets:
|
secrets:
|
||||||
dockerUsername: ${{ secrets.DOCKERHUB_USERNAME }}
|
dockerUsername: ${{ secrets.DOCKERHUB_USERNAME }}
|
||||||
dockerPassword: ${{ secrets.DOCKERHUB_PASSWORD }}
|
dockerPassword: ${{ secrets.DOCKERHUB_PASSWORD }}
|
||||||
@ -44,6 +45,7 @@ jobs:
|
|||||||
ffmpeg: ''
|
ffmpeg: ''
|
||||||
image-type: 'extras'
|
image-type: 'extras'
|
||||||
runs-on: 'arc-runner-set'
|
runs-on: 'arc-runner-set'
|
||||||
|
base-image: "ubuntu:22.04"
|
||||||
- build-type: ''
|
- build-type: ''
|
||||||
platforms: 'linux/amd64'
|
platforms: 'linux/amd64'
|
||||||
tag-latest: 'false'
|
tag-latest: 'false'
|
||||||
@ -51,6 +53,7 @@ jobs:
|
|||||||
ffmpeg: 'true'
|
ffmpeg: 'true'
|
||||||
image-type: 'extras'
|
image-type: 'extras'
|
||||||
runs-on: 'arc-runner-set'
|
runs-on: 'arc-runner-set'
|
||||||
|
base-image: "ubuntu:22.04"
|
||||||
- build-type: 'cublas'
|
- build-type: 'cublas'
|
||||||
cuda-major-version: "11"
|
cuda-major-version: "11"
|
||||||
cuda-minor-version: "7"
|
cuda-minor-version: "7"
|
||||||
@ -60,6 +63,7 @@ jobs:
|
|||||||
ffmpeg: ''
|
ffmpeg: ''
|
||||||
image-type: 'extras'
|
image-type: 'extras'
|
||||||
runs-on: 'arc-runner-set'
|
runs-on: 'arc-runner-set'
|
||||||
|
base-image: "ubuntu:22.04"
|
||||||
- build-type: 'cublas'
|
- build-type: 'cublas'
|
||||||
cuda-major-version: "12"
|
cuda-major-version: "12"
|
||||||
cuda-minor-version: "1"
|
cuda-minor-version: "1"
|
||||||
@ -69,6 +73,7 @@ jobs:
|
|||||||
ffmpeg: ''
|
ffmpeg: ''
|
||||||
image-type: 'extras'
|
image-type: 'extras'
|
||||||
runs-on: 'arc-runner-set'
|
runs-on: 'arc-runner-set'
|
||||||
|
base-image: "ubuntu:22.04"
|
||||||
- build-type: 'cublas'
|
- build-type: 'cublas'
|
||||||
cuda-major-version: "11"
|
cuda-major-version: "11"
|
||||||
cuda-minor-version: "7"
|
cuda-minor-version: "7"
|
||||||
@ -78,6 +83,7 @@ jobs:
|
|||||||
ffmpeg: 'true'
|
ffmpeg: 'true'
|
||||||
image-type: 'extras'
|
image-type: 'extras'
|
||||||
runs-on: 'arc-runner-set'
|
runs-on: 'arc-runner-set'
|
||||||
|
base-image: "ubuntu:22.04"
|
||||||
- build-type: 'cublas'
|
- build-type: 'cublas'
|
||||||
cuda-major-version: "12"
|
cuda-major-version: "12"
|
||||||
cuda-minor-version: "1"
|
cuda-minor-version: "1"
|
||||||
@ -87,6 +93,7 @@ jobs:
|
|||||||
ffmpeg: 'true'
|
ffmpeg: 'true'
|
||||||
image-type: 'extras'
|
image-type: 'extras'
|
||||||
runs-on: 'arc-runner-set'
|
runs-on: 'arc-runner-set'
|
||||||
|
base-image: "ubuntu:22.04"
|
||||||
- build-type: ''
|
- build-type: ''
|
||||||
#platforms: 'linux/amd64,linux/arm64'
|
#platforms: 'linux/amd64,linux/arm64'
|
||||||
platforms: 'linux/amd64'
|
platforms: 'linux/amd64'
|
||||||
@ -94,6 +101,7 @@ jobs:
|
|||||||
tag-suffix: ''
|
tag-suffix: ''
|
||||||
ffmpeg: ''
|
ffmpeg: ''
|
||||||
image-type: 'extras'
|
image-type: 'extras'
|
||||||
|
base-image: "ubuntu:22.04"
|
||||||
runs-on: 'arc-runner-set'
|
runs-on: 'arc-runner-set'
|
||||||
core-image-build:
|
core-image-build:
|
||||||
uses: ./.github/workflows/image_build.yml
|
uses: ./.github/workflows/image_build.yml
|
||||||
@ -107,6 +115,7 @@ jobs:
|
|||||||
cuda-minor-version: ${{ matrix.cuda-minor-version }}
|
cuda-minor-version: ${{ matrix.cuda-minor-version }}
|
||||||
platforms: ${{ matrix.platforms }}
|
platforms: ${{ matrix.platforms }}
|
||||||
runs-on: ${{ matrix.runs-on }}
|
runs-on: ${{ matrix.runs-on }}
|
||||||
|
base-image: ${{ matrix.base-image }}
|
||||||
secrets:
|
secrets:
|
||||||
dockerUsername: ${{ secrets.DOCKERHUB_USERNAME }}
|
dockerUsername: ${{ secrets.DOCKERHUB_USERNAME }}
|
||||||
dockerPassword: ${{ secrets.DOCKERHUB_PASSWORD }}
|
dockerPassword: ${{ secrets.DOCKERHUB_PASSWORD }}
|
||||||
@ -121,10 +130,12 @@ jobs:
|
|||||||
tag-suffix: '-ffmpeg-core'
|
tag-suffix: '-ffmpeg-core'
|
||||||
ffmpeg: 'true'
|
ffmpeg: 'true'
|
||||||
image-type: 'core'
|
image-type: 'core'
|
||||||
|
base-image: "ubuntu:22.04"
|
||||||
runs-on: 'ubuntu-latest'
|
runs-on: 'ubuntu-latest'
|
||||||
- build-type: 'sycl_f16'
|
- build-type: 'sycl_f16'
|
||||||
platforms: 'linux/amd64'
|
platforms: 'linux/amd64'
|
||||||
tag-latest: 'false'
|
tag-latest: 'false'
|
||||||
|
base-image: "intel/oneapi-basekit:2024.0.1-devel-ubuntu22.04"
|
||||||
tag-suffix: '-sycl-f16-core'
|
tag-suffix: '-sycl-f16-core'
|
||||||
ffmpeg: 'false'
|
ffmpeg: 'false'
|
||||||
image-type: 'core'
|
image-type: 'core'
|
||||||
@ -132,6 +143,7 @@ jobs:
|
|||||||
- build-type: 'sycl_f32'
|
- build-type: 'sycl_f32'
|
||||||
platforms: 'linux/amd64'
|
platforms: 'linux/amd64'
|
||||||
tag-latest: 'false'
|
tag-latest: 'false'
|
||||||
|
base-image: "intel/oneapi-basekit:2024.0.1-devel-ubuntu22.04"
|
||||||
tag-suffix: '-sycl-f32-core'
|
tag-suffix: '-sycl-f32-core'
|
||||||
ffmpeg: 'false'
|
ffmpeg: 'false'
|
||||||
image-type: 'core'
|
image-type: 'core'
|
||||||
@ -139,6 +151,7 @@ jobs:
|
|||||||
- build-type: 'sycl_f16'
|
- build-type: 'sycl_f16'
|
||||||
platforms: 'linux/amd64'
|
platforms: 'linux/amd64'
|
||||||
tag-latest: 'false'
|
tag-latest: 'false'
|
||||||
|
base-image: "intel/oneapi-basekit:2024.0.1-devel-ubuntu22.04"
|
||||||
tag-suffix: '-sycl-f16-ffmpeg-core'
|
tag-suffix: '-sycl-f16-ffmpeg-core'
|
||||||
ffmpeg: 'true'
|
ffmpeg: 'true'
|
||||||
image-type: 'core'
|
image-type: 'core'
|
||||||
@ -146,6 +159,7 @@ jobs:
|
|||||||
- build-type: 'sycl_f32'
|
- build-type: 'sycl_f32'
|
||||||
platforms: 'linux/amd64'
|
platforms: 'linux/amd64'
|
||||||
tag-latest: 'false'
|
tag-latest: 'false'
|
||||||
|
base-image: "intel/oneapi-basekit:2024.0.1-devel-ubuntu22.04"
|
||||||
tag-suffix: '-sycl-f32-ffmpeg-core'
|
tag-suffix: '-sycl-f32-ffmpeg-core'
|
||||||
ffmpeg: 'true'
|
ffmpeg: 'true'
|
||||||
image-type: 'core'
|
image-type: 'core'
|
||||||
@ -158,6 +172,7 @@ jobs:
|
|||||||
tag-suffix: '-cublas-cuda11-core'
|
tag-suffix: '-cublas-cuda11-core'
|
||||||
ffmpeg: ''
|
ffmpeg: ''
|
||||||
image-type: 'core'
|
image-type: 'core'
|
||||||
|
base-image: "ubuntu:22.04"
|
||||||
runs-on: 'ubuntu-latest'
|
runs-on: 'ubuntu-latest'
|
||||||
- build-type: 'cublas'
|
- build-type: 'cublas'
|
||||||
cuda-major-version: "12"
|
cuda-major-version: "12"
|
||||||
@ -167,6 +182,7 @@ jobs:
|
|||||||
tag-suffix: '-cublas-cuda12-core'
|
tag-suffix: '-cublas-cuda12-core'
|
||||||
ffmpeg: ''
|
ffmpeg: ''
|
||||||
image-type: 'core'
|
image-type: 'core'
|
||||||
|
base-image: "ubuntu:22.04"
|
||||||
runs-on: 'ubuntu-latest'
|
runs-on: 'ubuntu-latest'
|
||||||
- build-type: 'cublas'
|
- build-type: 'cublas'
|
||||||
cuda-major-version: "11"
|
cuda-major-version: "11"
|
||||||
@ -177,6 +193,7 @@ jobs:
|
|||||||
ffmpeg: 'true'
|
ffmpeg: 'true'
|
||||||
image-type: 'core'
|
image-type: 'core'
|
||||||
runs-on: 'ubuntu-latest'
|
runs-on: 'ubuntu-latest'
|
||||||
|
base-image: "ubuntu:22.04"
|
||||||
- build-type: 'cublas'
|
- build-type: 'cublas'
|
||||||
cuda-major-version: "12"
|
cuda-major-version: "12"
|
||||||
cuda-minor-version: "1"
|
cuda-minor-version: "1"
|
||||||
@ -186,3 +203,4 @@ jobs:
|
|||||||
ffmpeg: 'true'
|
ffmpeg: 'true'
|
||||||
image-type: 'core'
|
image-type: 'core'
|
||||||
runs-on: 'ubuntu-latest'
|
runs-on: 'ubuntu-latest'
|
||||||
|
base-image: "ubuntu:22.04"
|
||||||
|
6
.github/workflows/image_build.yml
vendored
6
.github/workflows/image_build.yml
vendored
@ -4,6 +4,11 @@ name: 'build container images (reusable)'
|
|||||||
on:
|
on:
|
||||||
workflow_call:
|
workflow_call:
|
||||||
inputs:
|
inputs:
|
||||||
|
base-image:
|
||||||
|
description: 'Base image'
|
||||||
|
required: false
|
||||||
|
default: ''
|
||||||
|
type: string
|
||||||
build-type:
|
build-type:
|
||||||
description: 'Build type'
|
description: 'Build type'
|
||||||
default: ''
|
default: ''
|
||||||
@ -154,6 +159,7 @@ jobs:
|
|||||||
CUDA_MINOR_VERSION=${{ inputs.cuda-minor-version }}
|
CUDA_MINOR_VERSION=${{ inputs.cuda-minor-version }}
|
||||||
FFMPEG=${{ inputs.ffmpeg }}
|
FFMPEG=${{ inputs.ffmpeg }}
|
||||||
IMAGE_TYPE=${{ inputs.image-type }}
|
IMAGE_TYPE=${{ inputs.image-type }}
|
||||||
|
BASE_IMAGE=${{ inputs.base-image }}
|
||||||
context: .
|
context: .
|
||||||
file: ./Dockerfile
|
file: ./Dockerfile
|
||||||
platforms: ${{ inputs.platforms }}
|
platforms: ${{ inputs.platforms }}
|
||||||
|
132
.github/workflows/test-extra.yml
vendored
132
.github/workflows/test-extra.yml
vendored
@ -164,74 +164,74 @@ jobs:
|
|||||||
|
|
||||||
|
|
||||||
|
|
||||||
tests-bark:
|
# tests-bark:
|
||||||
runs-on: ubuntu-latest
|
# runs-on: ubuntu-latest
|
||||||
steps:
|
# steps:
|
||||||
- name: Release space from worker
|
# - name: Release space from worker
|
||||||
run: |
|
# run: |
|
||||||
echo "Listing top largest packages"
|
# echo "Listing top largest packages"
|
||||||
pkgs=$(dpkg-query -Wf '${Installed-Size}\t${Package}\t${Status}\n' | awk '$NF == "installed"{print $1 "\t" $2}' | sort -nr)
|
# pkgs=$(dpkg-query -Wf '${Installed-Size}\t${Package}\t${Status}\n' | awk '$NF == "installed"{print $1 "\t" $2}' | sort -nr)
|
||||||
head -n 30 <<< "${pkgs}"
|
# head -n 30 <<< "${pkgs}"
|
||||||
echo
|
# echo
|
||||||
df -h
|
# df -h
|
||||||
echo
|
# echo
|
||||||
sudo apt-get remove -y '^llvm-.*|^libllvm.*' || true
|
# sudo apt-get remove -y '^llvm-.*|^libllvm.*' || true
|
||||||
sudo apt-get remove --auto-remove android-sdk-platform-tools || true
|
# sudo apt-get remove --auto-remove android-sdk-platform-tools || true
|
||||||
sudo apt-get purge --auto-remove android-sdk-platform-tools || true
|
# sudo apt-get purge --auto-remove android-sdk-platform-tools || true
|
||||||
sudo rm -rf /usr/local/lib/android
|
# sudo rm -rf /usr/local/lib/android
|
||||||
sudo apt-get remove -y '^dotnet-.*|^aspnetcore-.*' || true
|
# sudo apt-get remove -y '^dotnet-.*|^aspnetcore-.*' || true
|
||||||
sudo rm -rf /usr/share/dotnet
|
# sudo rm -rf /usr/share/dotnet
|
||||||
sudo apt-get remove -y '^mono-.*' || true
|
# sudo apt-get remove -y '^mono-.*' || true
|
||||||
sudo apt-get remove -y '^ghc-.*' || true
|
# sudo apt-get remove -y '^ghc-.*' || true
|
||||||
sudo apt-get remove -y '.*jdk.*|.*jre.*' || true
|
# sudo apt-get remove -y '.*jdk.*|.*jre.*' || true
|
||||||
sudo apt-get remove -y 'php.*' || true
|
# sudo apt-get remove -y 'php.*' || true
|
||||||
sudo apt-get remove -y hhvm powershell firefox monodoc-manual msbuild || true
|
# sudo apt-get remove -y hhvm powershell firefox monodoc-manual msbuild || true
|
||||||
sudo apt-get remove -y '^google-.*' || true
|
# sudo apt-get remove -y '^google-.*' || true
|
||||||
sudo apt-get remove -y azure-cli || true
|
# sudo apt-get remove -y azure-cli || true
|
||||||
sudo apt-get remove -y '^mongo.*-.*|^postgresql-.*|^mysql-.*|^mssql-.*' || true
|
# sudo apt-get remove -y '^mongo.*-.*|^postgresql-.*|^mysql-.*|^mssql-.*' || true
|
||||||
sudo apt-get remove -y '^gfortran-.*' || true
|
# sudo apt-get remove -y '^gfortran-.*' || true
|
||||||
sudo apt-get remove -y microsoft-edge-stable || true
|
# sudo apt-get remove -y microsoft-edge-stable || true
|
||||||
sudo apt-get remove -y firefox || true
|
# sudo apt-get remove -y firefox || true
|
||||||
sudo apt-get remove -y powershell || true
|
# sudo apt-get remove -y powershell || true
|
||||||
sudo apt-get remove -y r-base-core || true
|
# sudo apt-get remove -y r-base-core || true
|
||||||
sudo apt-get autoremove -y
|
# sudo apt-get autoremove -y
|
||||||
sudo apt-get clean
|
# sudo apt-get clean
|
||||||
echo
|
# echo
|
||||||
echo "Listing top largest packages"
|
# echo "Listing top largest packages"
|
||||||
pkgs=$(dpkg-query -Wf '${Installed-Size}\t${Package}\t${Status}\n' | awk '$NF == "installed"{print $1 "\t" $2}' | sort -nr)
|
# pkgs=$(dpkg-query -Wf '${Installed-Size}\t${Package}\t${Status}\n' | awk '$NF == "installed"{print $1 "\t" $2}' | sort -nr)
|
||||||
head -n 30 <<< "${pkgs}"
|
# head -n 30 <<< "${pkgs}"
|
||||||
echo
|
# echo
|
||||||
sudo rm -rfv build || true
|
# sudo rm -rfv build || true
|
||||||
sudo rm -rf /usr/share/dotnet || true
|
# sudo rm -rf /usr/share/dotnet || true
|
||||||
sudo rm -rf /opt/ghc || true
|
# sudo rm -rf /opt/ghc || true
|
||||||
sudo rm -rf "/usr/local/share/boost" || true
|
# sudo rm -rf "/usr/local/share/boost" || true
|
||||||
sudo rm -rf "$AGENT_TOOLSDIRECTORY" || true
|
# sudo rm -rf "$AGENT_TOOLSDIRECTORY" || true
|
||||||
df -h
|
# df -h
|
||||||
- name: Clone
|
# - name: Clone
|
||||||
uses: actions/checkout@v4
|
# uses: actions/checkout@v4
|
||||||
with:
|
# with:
|
||||||
submodules: true
|
# submodules: true
|
||||||
- name: Dependencies
|
# - name: Dependencies
|
||||||
run: |
|
# run: |
|
||||||
sudo apt-get update
|
# sudo apt-get update
|
||||||
sudo apt-get install build-essential ffmpeg
|
# sudo apt-get install build-essential ffmpeg
|
||||||
curl https://repo.anaconda.com/pkgs/misc/gpgkeys/anaconda.asc | gpg --dearmor > conda.gpg && \
|
# curl https://repo.anaconda.com/pkgs/misc/gpgkeys/anaconda.asc | gpg --dearmor > conda.gpg && \
|
||||||
sudo install -o root -g root -m 644 conda.gpg /usr/share/keyrings/conda-archive-keyring.gpg && \
|
# sudo install -o root -g root -m 644 conda.gpg /usr/share/keyrings/conda-archive-keyring.gpg && \
|
||||||
gpg --keyring /usr/share/keyrings/conda-archive-keyring.gpg --no-default-keyring --fingerprint 34161F5BF5EB1D4BFBBB8F0A8AEB4F8B29D82806 && \
|
# gpg --keyring /usr/share/keyrings/conda-archive-keyring.gpg --no-default-keyring --fingerprint 34161F5BF5EB1D4BFBBB8F0A8AEB4F8B29D82806 && \
|
||||||
sudo /bin/bash -c 'echo "deb [arch=amd64 signed-by=/usr/share/keyrings/conda-archive-keyring.gpg] https://repo.anaconda.com/pkgs/misc/debrepo/conda stable main" > /etc/apt/sources.list.d/conda.list' && \
|
# sudo /bin/bash -c 'echo "deb [arch=amd64 signed-by=/usr/share/keyrings/conda-archive-keyring.gpg] https://repo.anaconda.com/pkgs/misc/debrepo/conda stable main" > /etc/apt/sources.list.d/conda.list' && \
|
||||||
sudo /bin/bash -c 'echo "deb [arch=amd64 signed-by=/usr/share/keyrings/conda-archive-keyring.gpg] https://repo.anaconda.com/pkgs/misc/debrepo/conda stable main" | tee -a /etc/apt/sources.list.d/conda.list' && \
|
# sudo /bin/bash -c 'echo "deb [arch=amd64 signed-by=/usr/share/keyrings/conda-archive-keyring.gpg] https://repo.anaconda.com/pkgs/misc/debrepo/conda stable main" | tee -a /etc/apt/sources.list.d/conda.list' && \
|
||||||
sudo apt-get update && \
|
# sudo apt-get update && \
|
||||||
sudo apt-get install -y conda
|
# sudo apt-get install -y conda
|
||||||
sudo apt-get install -y ca-certificates cmake curl patch
|
# sudo apt-get install -y ca-certificates cmake curl patch
|
||||||
sudo apt-get install -y libopencv-dev && sudo ln -s /usr/include/opencv4/opencv2 /usr/include/opencv2
|
# sudo apt-get install -y libopencv-dev && sudo ln -s /usr/include/opencv4/opencv2 /usr/include/opencv2
|
||||||
|
|
||||||
sudo rm -rfv /usr/bin/conda || true
|
# sudo rm -rfv /usr/bin/conda || true
|
||||||
|
|
||||||
- name: Test bark
|
# - name: Test bark
|
||||||
run: |
|
# run: |
|
||||||
export PATH=$PATH:/opt/conda/bin
|
# export PATH=$PATH:/opt/conda/bin
|
||||||
make -C backend/python/bark
|
# make -C backend/python/bark
|
||||||
make -C backend/python/bark test
|
# make -C backend/python/bark test
|
||||||
|
|
||||||
|
|
||||||
# Below tests needs GPU. Commented out for now
|
# Below tests needs GPU. Commented out for now
|
||||||
|
29
Dockerfile
29
Dockerfile
@ -1,9 +1,11 @@
|
|||||||
ARG GO_VERSION=1.21-bullseye
|
ARG GO_VERSION=1.21
|
||||||
ARG IMAGE_TYPE=extras
|
ARG IMAGE_TYPE=extras
|
||||||
|
ARG BASE_IMAGE=ubuntu:22.04
|
||||||
|
|
||||||
# extras or core
|
# extras or core
|
||||||
|
FROM ${BASE_IMAGE} as requirements-core
|
||||||
|
|
||||||
FROM golang:$GO_VERSION as requirements-core
|
ARG GO_VERSION=1.21.7
|
||||||
|
|
||||||
ARG BUILD_TYPE
|
ARG BUILD_TYPE
|
||||||
ARG CUDA_MAJOR_VERSION=11
|
ARG CUDA_MAJOR_VERSION=11
|
||||||
ARG CUDA_MINOR_VERSION=7
|
ARG CUDA_MINOR_VERSION=7
|
||||||
@ -11,14 +13,17 @@ ARG TARGETARCH
|
|||||||
ARG TARGETVARIANT
|
ARG TARGETVARIANT
|
||||||
|
|
||||||
ENV BUILD_TYPE=${BUILD_TYPE}
|
ENV BUILD_TYPE=${BUILD_TYPE}
|
||||||
|
ENV DEBIAN_FRONTEND=noninteractive
|
||||||
ENV EXTERNAL_GRPC_BACKENDS="coqui:/build/backend/python/coqui/run.sh,huggingface-embeddings:/build/backend/python/sentencetransformers/run.sh,petals:/build/backend/python/petals/run.sh,transformers:/build/backend/python/transformers/run.sh,sentencetransformers:/build/backend/python/sentencetransformers/run.sh,autogptq:/build/backend/python/autogptq/run.sh,bark:/build/backend/python/bark/run.sh,diffusers:/build/backend/python/diffusers/run.sh,exllama:/build/backend/python/exllama/run.sh,vall-e-x:/build/backend/python/vall-e-x/run.sh,vllm:/build/backend/python/vllm/run.sh,mamba:/build/backend/python/mamba/run.sh,exllama2:/build/backend/python/exllama2/run.sh,transformers-musicgen:/build/backend/python/transformers-musicgen/run.sh"
|
ENV EXTERNAL_GRPC_BACKENDS="coqui:/build/backend/python/coqui/run.sh,huggingface-embeddings:/build/backend/python/sentencetransformers/run.sh,petals:/build/backend/python/petals/run.sh,transformers:/build/backend/python/transformers/run.sh,sentencetransformers:/build/backend/python/sentencetransformers/run.sh,autogptq:/build/backend/python/autogptq/run.sh,bark:/build/backend/python/bark/run.sh,diffusers:/build/backend/python/diffusers/run.sh,exllama:/build/backend/python/exllama/run.sh,vall-e-x:/build/backend/python/vall-e-x/run.sh,vllm:/build/backend/python/vllm/run.sh,mamba:/build/backend/python/mamba/run.sh,exllama2:/build/backend/python/exllama2/run.sh,transformers-musicgen:/build/backend/python/transformers-musicgen/run.sh"
|
||||||
|
|
||||||
ARG GO_TAGS="stablediffusion tinydream tts"
|
ARG GO_TAGS="stablediffusion tinydream tts"
|
||||||
|
|
||||||
RUN apt-get update && \
|
RUN apt-get update && \
|
||||||
apt-get install -y ca-certificates curl patch pip cmake && apt-get clean
|
apt-get install -y ca-certificates curl patch pip cmake git && apt-get clean
|
||||||
|
|
||||||
|
# Install Go
|
||||||
|
RUN curl -L -s https://go.dev/dl/go$GO_VERSION.linux-$TARGETARCH.tar.gz | tar -v -C /usr/local -xz
|
||||||
|
ENV PATH $PATH:/usr/local/go/bin
|
||||||
|
|
||||||
COPY --chmod=644 custom-ca-certs/* /usr/local/share/ca-certificates/
|
COPY --chmod=644 custom-ca-certs/* /usr/local/share/ca-certificates/
|
||||||
RUN update-ca-certificates
|
RUN update-ca-certificates
|
||||||
@ -30,21 +35,13 @@ RUN echo "Target Variant: $TARGETVARIANT"
|
|||||||
# CuBLAS requirements
|
# CuBLAS requirements
|
||||||
RUN if [ "${BUILD_TYPE}" = "cublas" ]; then \
|
RUN if [ "${BUILD_TYPE}" = "cublas" ]; then \
|
||||||
apt-get install -y software-properties-common && \
|
apt-get install -y software-properties-common && \
|
||||||
apt-add-repository contrib && \
|
curl -O https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2204/x86_64/cuda-keyring_1.1-1_all.deb && \
|
||||||
curl -O https://developer.download.nvidia.com/compute/cuda/repos/debian11/x86_64/cuda-keyring_1.0-1_all.deb && \
|
dpkg -i cuda-keyring_1.1-1_all.deb && \
|
||||||
dpkg -i cuda-keyring_1.0-1_all.deb && \
|
rm -f cuda-keyring_1.1-1_all.deb && \
|
||||||
rm -f cuda-keyring_1.0-1_all.deb && \
|
|
||||||
apt-get update && \
|
apt-get update && \
|
||||||
apt-get install -y cuda-nvcc-${CUDA_MAJOR_VERSION}-${CUDA_MINOR_VERSION} libcublas-dev-${CUDA_MAJOR_VERSION}-${CUDA_MINOR_VERSION} libcusparse-dev-${CUDA_MAJOR_VERSION}-${CUDA_MINOR_VERSION} libcusolver-dev-${CUDA_MAJOR_VERSION}-${CUDA_MINOR_VERSION} && apt-get clean \
|
apt-get install -y cuda-nvcc-${CUDA_MAJOR_VERSION}-${CUDA_MINOR_VERSION} libcublas-dev-${CUDA_MAJOR_VERSION}-${CUDA_MINOR_VERSION} libcusparse-dev-${CUDA_MAJOR_VERSION}-${CUDA_MINOR_VERSION} libcusolver-dev-${CUDA_MAJOR_VERSION}-${CUDA_MINOR_VERSION} && apt-get clean \
|
||||||
; fi
|
; fi
|
||||||
|
|
||||||
# oneapi requirements
|
|
||||||
RUN if [ "${BUILD_TYPE}" = "sycl_f16" ] || [ "${BUILD_TYPE}" = "sycl_f32" ]; then \
|
|
||||||
wget -q https://registrationcenter-download.intel.com/akdlm/IRC_NAS/163da6e4-56eb-4948-aba3-debcec61c064/l_BaseKit_p_2024.0.1.46_offline.sh && \
|
|
||||||
sh ./l_BaseKit_p_2024.0.1.46_offline.sh -a -s --eula accept && \
|
|
||||||
rm -rf l_BaseKit_p_2024.0.1.46_offline.sh \
|
|
||||||
; fi
|
|
||||||
|
|
||||||
ENV PATH /usr/local/cuda/bin:${PATH}
|
ENV PATH /usr/local/cuda/bin:${PATH}
|
||||||
|
|
||||||
# OpenBLAS requirements and stable diffusion
|
# OpenBLAS requirements and stable diffusion
|
||||||
|
67
Makefile
67
Makefile
@ -14,9 +14,6 @@ CPPLLAMA_VERSION?=1cfb5372cf5707c8ec6dde7c874f4a44a6c4c915
|
|||||||
GPT4ALL_REPO?=https://github.com/nomic-ai/gpt4all
|
GPT4ALL_REPO?=https://github.com/nomic-ai/gpt4all
|
||||||
GPT4ALL_VERSION?=27a8b020c36b0df8f8b82a252d261cda47cf44b8
|
GPT4ALL_VERSION?=27a8b020c36b0df8f8b82a252d261cda47cf44b8
|
||||||
|
|
||||||
# go-ggml-transformers version
|
|
||||||
GOGGMLTRANSFORMERS_VERSION?=ffb09d7dd71e2cbc6c5d7d05357d230eea6f369a
|
|
||||||
|
|
||||||
# go-rwkv version
|
# go-rwkv version
|
||||||
RWKV_REPO?=https://github.com/donomii/go-rwkv.cpp
|
RWKV_REPO?=https://github.com/donomii/go-rwkv.cpp
|
||||||
RWKV_VERSION?=633c5a3485c403cb2520693dc0991a25dace9f0f
|
RWKV_VERSION?=633c5a3485c403cb2520693dc0991a25dace9f0f
|
||||||
@ -145,7 +142,16 @@ ifeq ($(findstring tts,$(GO_TAGS)),tts)
|
|||||||
OPTIONAL_GRPC+=backend-assets/grpc/piper
|
OPTIONAL_GRPC+=backend-assets/grpc/piper
|
||||||
endif
|
endif
|
||||||
|
|
||||||
ALL_GRPC_BACKENDS=backend-assets/grpc/langchain-huggingface backend-assets/grpc/bert-embeddings backend-assets/grpc/llama backend-assets/grpc/llama-cpp backend-assets/grpc/llama-ggml backend-assets/grpc/gpt4all backend-assets/grpc/dolly backend-assets/grpc/gptj backend-assets/grpc/gptneox backend-assets/grpc/mpt backend-assets/grpc/replit backend-assets/grpc/rwkv backend-assets/grpc/whisper $(OPTIONAL_GRPC)
|
ALL_GRPC_BACKENDS=backend-assets/grpc/langchain-huggingface
|
||||||
|
ALL_GRPC_BACKENDS+=backend-assets/grpc/bert-embeddings
|
||||||
|
ALL_GRPC_BACKENDS+=backend-assets/grpc/llama
|
||||||
|
ALL_GRPC_BACKENDS+=backend-assets/grpc/llama-cpp
|
||||||
|
ALL_GRPC_BACKENDS+=backend-assets/grpc/llama-ggml
|
||||||
|
ALL_GRPC_BACKENDS+=backend-assets/grpc/gpt4all
|
||||||
|
ALL_GRPC_BACKENDS+=backend-assets/grpc/rwkv
|
||||||
|
ALL_GRPC_BACKENDS+=backend-assets/grpc/whisper
|
||||||
|
ALL_GRPC_BACKENDS+=$(OPTIONAL_GRPC)
|
||||||
|
|
||||||
GRPC_BACKENDS?=$(ALL_GRPC_BACKENDS) $(OPTIONAL_GRPC)
|
GRPC_BACKENDS?=$(ALL_GRPC_BACKENDS) $(OPTIONAL_GRPC)
|
||||||
|
|
||||||
# If empty, then we build all
|
# If empty, then we build all
|
||||||
@ -217,14 +223,6 @@ backend-assets/espeak-ng-data: sources/go-piper
|
|||||||
sources/gpt4all/gpt4all-bindings/golang/libgpt4all.a: sources/gpt4all
|
sources/gpt4all/gpt4all-bindings/golang/libgpt4all.a: sources/gpt4all
|
||||||
$(MAKE) -C sources/gpt4all/gpt4all-bindings/golang/ libgpt4all.a
|
$(MAKE) -C sources/gpt4all/gpt4all-bindings/golang/ libgpt4all.a
|
||||||
|
|
||||||
## CEREBRAS GPT
|
|
||||||
sources/go-ggml-transformers:
|
|
||||||
git clone --recurse-submodules https://github.com/go-skynet/go-ggml-transformers.cpp sources/go-ggml-transformers
|
|
||||||
cd sources/go-ggml-transformers && git checkout -b build $(GOGPT2_VERSION) && git submodule update --init --recursive --depth 1
|
|
||||||
|
|
||||||
sources/go-ggml-transformers/libtransformers.a: sources/go-ggml-transformers
|
|
||||||
$(MAKE) -C sources/go-ggml-transformers BUILD_TYPE=$(BUILD_TYPE) libtransformers.a
|
|
||||||
|
|
||||||
sources/whisper.cpp:
|
sources/whisper.cpp:
|
||||||
git clone https://github.com/ggerganov/whisper.cpp.git sources/whisper.cpp
|
git clone https://github.com/ggerganov/whisper.cpp.git sources/whisper.cpp
|
||||||
cd sources/whisper.cpp && git checkout -b build $(WHISPER_CPP_VERSION) && git submodule update --init --recursive --depth 1
|
cd sources/whisper.cpp && git checkout -b build $(WHISPER_CPP_VERSION) && git submodule update --init --recursive --depth 1
|
||||||
@ -252,12 +250,11 @@ sources/go-piper/libpiper_binding.a: sources/go-piper
|
|||||||
backend/cpp/llama/llama.cpp:
|
backend/cpp/llama/llama.cpp:
|
||||||
LLAMA_VERSION=$(CPPLLAMA_VERSION) $(MAKE) -C backend/cpp/llama llama.cpp
|
LLAMA_VERSION=$(CPPLLAMA_VERSION) $(MAKE) -C backend/cpp/llama llama.cpp
|
||||||
|
|
||||||
get-sources: backend/cpp/llama/llama.cpp sources/go-llama sources/go-llama-ggml sources/go-ggml-transformers sources/gpt4all sources/go-piper sources/go-rwkv sources/whisper.cpp sources/go-bert sources/go-stable-diffusion sources/go-tiny-dream
|
get-sources: backend/cpp/llama/llama.cpp sources/go-llama sources/go-llama-ggml sources/gpt4all sources/go-piper sources/go-rwkv sources/whisper.cpp sources/go-bert sources/go-stable-diffusion sources/go-tiny-dream
|
||||||
touch $@
|
touch $@
|
||||||
|
|
||||||
replace:
|
replace:
|
||||||
$(GOCMD) mod edit -replace github.com/nomic-ai/gpt4all/gpt4all-bindings/golang=$(CURDIR)/sources/gpt4all/gpt4all-bindings/golang
|
$(GOCMD) mod edit -replace github.com/nomic-ai/gpt4all/gpt4all-bindings/golang=$(CURDIR)/sources/gpt4all/gpt4all-bindings/golang
|
||||||
$(GOCMD) mod edit -replace github.com/go-skynet/go-ggml-transformers.cpp=$(CURDIR)/sources/go-ggml-transformers
|
|
||||||
$(GOCMD) mod edit -replace github.com/donomii/go-rwkv.cpp=$(CURDIR)/sources/go-rwkv
|
$(GOCMD) mod edit -replace github.com/donomii/go-rwkv.cpp=$(CURDIR)/sources/go-rwkv
|
||||||
$(GOCMD) mod edit -replace github.com/ggerganov/whisper.cpp=$(CURDIR)/sources/whisper.cpp
|
$(GOCMD) mod edit -replace github.com/ggerganov/whisper.cpp=$(CURDIR)/sources/whisper.cpp
|
||||||
$(GOCMD) mod edit -replace github.com/ggerganov/whisper.cpp/bindings/go=$(CURDIR)/sources/whisper.cpp/bindings/go
|
$(GOCMD) mod edit -replace github.com/ggerganov/whisper.cpp/bindings/go=$(CURDIR)/sources/whisper.cpp/bindings/go
|
||||||
@ -276,7 +273,6 @@ rebuild: ## Rebuilds the project
|
|||||||
$(MAKE) -C sources/go-llama clean
|
$(MAKE) -C sources/go-llama clean
|
||||||
$(MAKE) -C sources/go-llama-ggml clean
|
$(MAKE) -C sources/go-llama-ggml clean
|
||||||
$(MAKE) -C sources/gpt4all/gpt4all-bindings/golang/ clean
|
$(MAKE) -C sources/gpt4all/gpt4all-bindings/golang/ clean
|
||||||
$(MAKE) -C sources/go-ggml-transformers clean
|
|
||||||
$(MAKE) -C sources/go-rwkv clean
|
$(MAKE) -C sources/go-rwkv clean
|
||||||
$(MAKE) -C sources/whisper.cpp clean
|
$(MAKE) -C sources/whisper.cpp clean
|
||||||
$(MAKE) -C sources/go-stable-diffusion clean
|
$(MAKE) -C sources/go-stable-diffusion clean
|
||||||
@ -321,7 +317,7 @@ run: prepare ## run local-ai
|
|||||||
test-models/testmodel:
|
test-models/testmodel:
|
||||||
mkdir test-models
|
mkdir test-models
|
||||||
mkdir test-dir
|
mkdir test-dir
|
||||||
wget -q https://huggingface.co/nnakasato/ggml-model-test/resolve/main/ggml-model-q4.bin -O test-models/testmodel
|
wget -q https://huggingface.co/TheBloke/orca_mini_3B-GGML/resolve/main/orca-mini-3b.ggmlv3.q4_0.bin -O test-models/testmodel
|
||||||
wget -q https://huggingface.co/ggerganov/whisper.cpp/resolve/main/ggml-base.en.bin -O test-models/whisper-en
|
wget -q https://huggingface.co/ggerganov/whisper.cpp/resolve/main/ggml-base.en.bin -O test-models/whisper-en
|
||||||
wget -q https://huggingface.co/mudler/all-MiniLM-L6-v2/resolve/main/ggml-model-q4_0.bin -O test-models/bert
|
wget -q https://huggingface.co/mudler/all-MiniLM-L6-v2/resolve/main/ggml-model-q4_0.bin -O test-models/bert
|
||||||
wget -q https://cdn.openai.com/whisper/draft-20220913a/micro-machines.wav -O test-dir/audio.wav
|
wget -q https://cdn.openai.com/whisper/draft-20220913a/micro-machines.wav -O test-dir/audio.wav
|
||||||
@ -505,26 +501,6 @@ backend-assets/grpc/gpt4all: backend-assets/grpc backend-assets/gpt4all sources/
|
|||||||
CGO_LDFLAGS="$(CGO_LDFLAGS)" C_INCLUDE_PATH=$(CURDIR)/sources/gpt4all/gpt4all-bindings/golang/ LIBRARY_PATH=$(CURDIR)/sources/gpt4all/gpt4all-bindings/golang/ \
|
CGO_LDFLAGS="$(CGO_LDFLAGS)" C_INCLUDE_PATH=$(CURDIR)/sources/gpt4all/gpt4all-bindings/golang/ LIBRARY_PATH=$(CURDIR)/sources/gpt4all/gpt4all-bindings/golang/ \
|
||||||
$(GOCMD) build -ldflags "$(LD_FLAGS)" -tags "$(GO_TAGS)" -o backend-assets/grpc/gpt4all ./backend/go/llm/gpt4all/
|
$(GOCMD) build -ldflags "$(LD_FLAGS)" -tags "$(GO_TAGS)" -o backend-assets/grpc/gpt4all ./backend/go/llm/gpt4all/
|
||||||
|
|
||||||
backend-assets/grpc/dolly: backend-assets/grpc sources/go-ggml-transformers/libtransformers.a
|
|
||||||
CGO_LDFLAGS="$(CGO_LDFLAGS)" C_INCLUDE_PATH=$(CURDIR)/sources/go-ggml-transformers LIBRARY_PATH=$(CURDIR)/sources/go-ggml-transformers \
|
|
||||||
$(GOCMD) build -ldflags "$(LD_FLAGS)" -tags "$(GO_TAGS)" -o backend-assets/grpc/dolly ./backend/go/llm/dolly/
|
|
||||||
|
|
||||||
backend-assets/grpc/gptj: backend-assets/grpc sources/go-ggml-transformers/libtransformers.a
|
|
||||||
CGO_LDFLAGS="$(CGO_LDFLAGS)" C_INCLUDE_PATH=$(CURDIR)/sources/go-ggml-transformers LIBRARY_PATH=$(CURDIR)/sources/go-ggml-transformers \
|
|
||||||
$(GOCMD) build -ldflags "$(LD_FLAGS)" -tags "$(GO_TAGS)" -o backend-assets/grpc/gptj ./backend/go/llm/gptj/
|
|
||||||
|
|
||||||
backend-assets/grpc/gptneox: backend-assets/grpc sources/go-ggml-transformers/libtransformers.a
|
|
||||||
CGO_LDFLAGS="$(CGO_LDFLAGS)" C_INCLUDE_PATH=$(CURDIR)/sources/go-ggml-transformers LIBRARY_PATH=$(CURDIR)/sources/go-ggml-transformers \
|
|
||||||
$(GOCMD) build -ldflags "$(LD_FLAGS)" -tags "$(GO_TAGS)" -o backend-assets/grpc/gptneox ./backend/go/llm/gptneox/
|
|
||||||
|
|
||||||
backend-assets/grpc/mpt: backend-assets/grpc sources/go-ggml-transformers/libtransformers.a
|
|
||||||
CGO_LDFLAGS="$(CGO_LDFLAGS)" C_INCLUDE_PATH=$(CURDIR)/sources/go-ggml-transformers LIBRARY_PATH=$(CURDIR)/sources/go-ggml-transformers \
|
|
||||||
$(GOCMD) build -ldflags "$(LD_FLAGS)" -tags "$(GO_TAGS)" -o backend-assets/grpc/mpt ./backend/go/llm/mpt/
|
|
||||||
|
|
||||||
backend-assets/grpc/replit: backend-assets/grpc sources/go-ggml-transformers/libtransformers.a
|
|
||||||
CGO_LDFLAGS="$(CGO_LDFLAGS)" C_INCLUDE_PATH=$(CURDIR)/sources/go-ggml-transformers LIBRARY_PATH=$(CURDIR)/sources/go-ggml-transformers \
|
|
||||||
$(GOCMD) build -ldflags "$(LD_FLAGS)" -tags "$(GO_TAGS)" -o backend-assets/grpc/replit ./backend/go/llm/replit/
|
|
||||||
|
|
||||||
backend-assets/grpc/rwkv: backend-assets/grpc sources/go-rwkv/librwkv.a
|
backend-assets/grpc/rwkv: backend-assets/grpc sources/go-rwkv/librwkv.a
|
||||||
CGO_LDFLAGS="$(CGO_LDFLAGS)" C_INCLUDE_PATH=$(CURDIR)/sources/go-rwkv LIBRARY_PATH=$(CURDIR)/sources/go-rwkv \
|
CGO_LDFLAGS="$(CGO_LDFLAGS)" C_INCLUDE_PATH=$(CURDIR)/sources/go-rwkv LIBRARY_PATH=$(CURDIR)/sources/go-rwkv \
|
||||||
$(GOCMD) build -ldflags "$(LD_FLAGS)" -tags "$(GO_TAGS)" -o backend-assets/grpc/rwkv ./backend/go/llm/rwkv
|
$(GOCMD) build -ldflags "$(LD_FLAGS)" -tags "$(GO_TAGS)" -o backend-assets/grpc/rwkv ./backend/go/llm/rwkv
|
||||||
@ -556,3 +532,22 @@ backend-assets/grpc/whisper: backend-assets/grpc sources/whisper.cpp/libwhisper.
|
|||||||
$(GOCMD) build -ldflags "$(LD_FLAGS)" -tags "$(GO_TAGS)" -o backend-assets/grpc/whisper ./backend/go/transcribe/
|
$(GOCMD) build -ldflags "$(LD_FLAGS)" -tags "$(GO_TAGS)" -o backend-assets/grpc/whisper ./backend/go/transcribe/
|
||||||
|
|
||||||
grpcs: prepare $(GRPC_BACKENDS)
|
grpcs: prepare $(GRPC_BACKENDS)
|
||||||
|
|
||||||
|
DOCKER_IMAGE?=local-ai
|
||||||
|
IMAGE_TYPE?=core
|
||||||
|
BASE_IMAGE?=ubuntu:22.04
|
||||||
|
|
||||||
|
docker:
|
||||||
|
docker build \
|
||||||
|
--build-arg BASE_IMAGE=$(BASE_IMAGE) \
|
||||||
|
--build-arg IMAGE_TYPE=$(IMAGE_TYPE) \
|
||||||
|
--build-arg GO_TAGS=$(GO_TAGS) \
|
||||||
|
--build-arg BUILD_TYPE=$(BUILD_TYPE) \
|
||||||
|
-t $(DOCKER_IMAGE) .
|
||||||
|
|
||||||
|
docker-image-intel:
|
||||||
|
docker build \
|
||||||
|
--build-arg BASE_IMAGE=intel/oneapi-basekit:2024.0.1-devel-ubuntu22.04 \
|
||||||
|
--build-arg IMAGE_TYPE=$(IMAGE_TYPE) \
|
||||||
|
--build-arg GO_TAGS="none" \
|
||||||
|
--build-arg BUILD_TYPE=sycl_f16 -t $(DOCKER_IMAGE) .
|
@ -29,6 +29,15 @@ import (
|
|||||||
"github.com/sashabaranov/go-openai/jsonschema"
|
"github.com/sashabaranov/go-openai/jsonschema"
|
||||||
)
|
)
|
||||||
|
|
||||||
|
const testPrompt = `### System:
|
||||||
|
You are an AI assistant that follows instruction extremely well. Help as much as you can.
|
||||||
|
|
||||||
|
### User:
|
||||||
|
|
||||||
|
Can you help rephrasing sentences?
|
||||||
|
|
||||||
|
### Response:`
|
||||||
|
|
||||||
type modelApplyRequest struct {
|
type modelApplyRequest struct {
|
||||||
ID string `json:"id"`
|
ID string `json:"id"`
|
||||||
URL string `json:"url"`
|
URL string `json:"url"`
|
||||||
@ -629,28 +638,28 @@ var _ = Describe("API test", func() {
|
|||||||
Expect(len(models.Models)).To(Equal(6)) // If "config.yaml" should be included, this should be 8?
|
Expect(len(models.Models)).To(Equal(6)) // If "config.yaml" should be included, this should be 8?
|
||||||
})
|
})
|
||||||
It("can generate completions", func() {
|
It("can generate completions", func() {
|
||||||
resp, err := client.CreateCompletion(context.TODO(), openai.CompletionRequest{Model: "testmodel", Prompt: "abcdedfghikl"})
|
resp, err := client.CreateCompletion(context.TODO(), openai.CompletionRequest{Model: "testmodel", Prompt: testPrompt})
|
||||||
Expect(err).ToNot(HaveOccurred())
|
Expect(err).ToNot(HaveOccurred())
|
||||||
Expect(len(resp.Choices)).To(Equal(1))
|
Expect(len(resp.Choices)).To(Equal(1))
|
||||||
Expect(resp.Choices[0].Text).ToNot(BeEmpty())
|
Expect(resp.Choices[0].Text).ToNot(BeEmpty())
|
||||||
})
|
})
|
||||||
|
|
||||||
It("can generate chat completions ", func() {
|
It("can generate chat completions ", func() {
|
||||||
resp, err := client.CreateChatCompletion(context.TODO(), openai.ChatCompletionRequest{Model: "testmodel", Messages: []openai.ChatCompletionMessage{openai.ChatCompletionMessage{Role: "user", Content: "abcdedfghikl"}}})
|
resp, err := client.CreateChatCompletion(context.TODO(), openai.ChatCompletionRequest{Model: "testmodel", Messages: []openai.ChatCompletionMessage{openai.ChatCompletionMessage{Role: "user", Content: testPrompt}}})
|
||||||
Expect(err).ToNot(HaveOccurred())
|
Expect(err).ToNot(HaveOccurred())
|
||||||
Expect(len(resp.Choices)).To(Equal(1))
|
Expect(len(resp.Choices)).To(Equal(1))
|
||||||
Expect(resp.Choices[0].Message.Content).ToNot(BeEmpty())
|
Expect(resp.Choices[0].Message.Content).ToNot(BeEmpty())
|
||||||
})
|
})
|
||||||
|
|
||||||
It("can generate completions from model configs", func() {
|
It("can generate completions from model configs", func() {
|
||||||
resp, err := client.CreateCompletion(context.TODO(), openai.CompletionRequest{Model: "gpt4all", Prompt: "abcdedfghikl"})
|
resp, err := client.CreateCompletion(context.TODO(), openai.CompletionRequest{Model: "gpt4all", Prompt: testPrompt})
|
||||||
Expect(err).ToNot(HaveOccurred())
|
Expect(err).ToNot(HaveOccurred())
|
||||||
Expect(len(resp.Choices)).To(Equal(1))
|
Expect(len(resp.Choices)).To(Equal(1))
|
||||||
Expect(resp.Choices[0].Text).ToNot(BeEmpty())
|
Expect(resp.Choices[0].Text).ToNot(BeEmpty())
|
||||||
})
|
})
|
||||||
|
|
||||||
It("can generate chat completions from model configs", func() {
|
It("can generate chat completions from model configs", func() {
|
||||||
resp, err := client.CreateChatCompletion(context.TODO(), openai.ChatCompletionRequest{Model: "gpt4all-2", Messages: []openai.ChatCompletionMessage{openai.ChatCompletionMessage{Role: "user", Content: "abcdedfghikl"}}})
|
resp, err := client.CreateChatCompletion(context.TODO(), openai.ChatCompletionRequest{Model: "gpt4all-2", Messages: []openai.ChatCompletionMessage{openai.ChatCompletionMessage{Role: "user", Content: testPrompt}}})
|
||||||
Expect(err).ToNot(HaveOccurred())
|
Expect(err).ToNot(HaveOccurred())
|
||||||
Expect(len(resp.Choices)).To(Equal(1))
|
Expect(len(resp.Choices)).To(Equal(1))
|
||||||
Expect(resp.Choices[0].Message.Content).ToNot(BeEmpty())
|
Expect(resp.Choices[0].Message.Content).ToNot(BeEmpty())
|
||||||
@ -658,7 +667,7 @@ var _ = Describe("API test", func() {
|
|||||||
|
|
||||||
It("returns errors", func() {
|
It("returns errors", func() {
|
||||||
backends := len(model.AutoLoadBackends) + 1 // +1 for huggingface
|
backends := len(model.AutoLoadBackends) + 1 // +1 for huggingface
|
||||||
_, err := client.CreateCompletion(context.TODO(), openai.CompletionRequest{Model: "foomodel", Prompt: "abcdedfghikl"})
|
_, err := client.CreateCompletion(context.TODO(), openai.CompletionRequest{Model: "foomodel", Prompt: testPrompt})
|
||||||
Expect(err).To(HaveOccurred())
|
Expect(err).To(HaveOccurred())
|
||||||
Expect(err.Error()).To(ContainSubstring(fmt.Sprintf("error, status code: 500, message: could not load model - all backends returned error: %d errors occurred:", backends)))
|
Expect(err.Error()).To(ContainSubstring(fmt.Sprintf("error, status code: 500, message: could not load model - all backends returned error: %d errors occurred:", backends)))
|
||||||
})
|
})
|
||||||
@ -834,13 +843,13 @@ var _ = Describe("API test", func() {
|
|||||||
app.Shutdown()
|
app.Shutdown()
|
||||||
})
|
})
|
||||||
It("can generate chat completions from config file (list1)", func() {
|
It("can generate chat completions from config file (list1)", func() {
|
||||||
resp, err := client.CreateChatCompletion(context.TODO(), openai.ChatCompletionRequest{Model: "list1", Messages: []openai.ChatCompletionMessage{{Role: "user", Content: "abcdedfghikl"}}})
|
resp, err := client.CreateChatCompletion(context.TODO(), openai.ChatCompletionRequest{Model: "list1", Messages: []openai.ChatCompletionMessage{{Role: "user", Content: testPrompt}}})
|
||||||
Expect(err).ToNot(HaveOccurred())
|
Expect(err).ToNot(HaveOccurred())
|
||||||
Expect(len(resp.Choices)).To(Equal(1))
|
Expect(len(resp.Choices)).To(Equal(1))
|
||||||
Expect(resp.Choices[0].Message.Content).ToNot(BeEmpty())
|
Expect(resp.Choices[0].Message.Content).ToNot(BeEmpty())
|
||||||
})
|
})
|
||||||
It("can generate chat completions from config file (list2)", func() {
|
It("can generate chat completions from config file (list2)", func() {
|
||||||
resp, err := client.CreateChatCompletion(context.TODO(), openai.ChatCompletionRequest{Model: "list2", Messages: []openai.ChatCompletionMessage{{Role: "user", Content: "abcdedfghikl"}}})
|
resp, err := client.CreateChatCompletion(context.TODO(), openai.ChatCompletionRequest{Model: "list2", Messages: []openai.ChatCompletionMessage{{Role: "user", Content: testPrompt}}})
|
||||||
Expect(err).ToNot(HaveOccurred())
|
Expect(err).ToNot(HaveOccurred())
|
||||||
Expect(len(resp.Choices)).To(Equal(1))
|
Expect(len(resp.Choices)).To(Equal(1))
|
||||||
Expect(resp.Choices[0].Message.Content).ToNot(BeEmpty())
|
Expect(resp.Choices[0].Message.Content).ToNot(BeEmpty())
|
||||||
|
@ -1,44 +0,0 @@
|
|||||||
package transformers
|
|
||||||
|
|
||||||
// This is a wrapper to statisfy the GRPC service interface
|
|
||||||
// It is meant to be used by the main executable that is the server for the specific backend type (falcon, gpt3, etc)
|
|
||||||
import (
|
|
||||||
"fmt"
|
|
||||||
|
|
||||||
"github.com/go-skynet/LocalAI/pkg/grpc/base"
|
|
||||||
pb "github.com/go-skynet/LocalAI/pkg/grpc/proto"
|
|
||||||
|
|
||||||
transformers "github.com/go-skynet/go-ggml-transformers.cpp"
|
|
||||||
)
|
|
||||||
|
|
||||||
type Dolly struct {
|
|
||||||
base.SingleThread
|
|
||||||
|
|
||||||
dolly *transformers.Dolly
|
|
||||||
}
|
|
||||||
|
|
||||||
func (llm *Dolly) Load(opts *pb.ModelOptions) error {
|
|
||||||
model, err := transformers.NewDolly(opts.ModelFile)
|
|
||||||
llm.dolly = model
|
|
||||||
return err
|
|
||||||
}
|
|
||||||
|
|
||||||
func (llm *Dolly) Predict(opts *pb.PredictOptions) (string, error) {
|
|
||||||
return llm.dolly.Predict(opts.Prompt, buildPredictOptions(opts)...)
|
|
||||||
}
|
|
||||||
|
|
||||||
// fallback to Predict
|
|
||||||
func (llm *Dolly) PredictStream(opts *pb.PredictOptions, results chan string) error {
|
|
||||||
|
|
||||||
go func() {
|
|
||||||
res, err := llm.dolly.Predict(opts.Prompt, buildPredictOptions(opts)...)
|
|
||||||
|
|
||||||
if err != nil {
|
|
||||||
fmt.Println("err: ", err)
|
|
||||||
}
|
|
||||||
results <- res
|
|
||||||
close(results)
|
|
||||||
}()
|
|
||||||
|
|
||||||
return nil
|
|
||||||
}
|
|
@ -1,42 +0,0 @@
|
|||||||
package transformers
|
|
||||||
|
|
||||||
// This is a wrapper to statisfy the GRPC service interface
|
|
||||||
// It is meant to be used by the main executable that is the server for the specific backend type (falcon, gpt3, etc)
|
|
||||||
import (
|
|
||||||
"fmt"
|
|
||||||
|
|
||||||
"github.com/go-skynet/LocalAI/pkg/grpc/base"
|
|
||||||
pb "github.com/go-skynet/LocalAI/pkg/grpc/proto"
|
|
||||||
|
|
||||||
transformers "github.com/go-skynet/go-ggml-transformers.cpp"
|
|
||||||
)
|
|
||||||
|
|
||||||
type GPT2 struct {
|
|
||||||
base.SingleThread
|
|
||||||
|
|
||||||
gpt2 *transformers.GPT2
|
|
||||||
}
|
|
||||||
|
|
||||||
func (llm *GPT2) Load(opts *pb.ModelOptions) error {
|
|
||||||
model, err := transformers.New(opts.ModelFile)
|
|
||||||
llm.gpt2 = model
|
|
||||||
return err
|
|
||||||
}
|
|
||||||
|
|
||||||
func (llm *GPT2) Predict(opts *pb.PredictOptions) (string, error) {
|
|
||||||
return llm.gpt2.Predict(opts.Prompt, buildPredictOptions(opts)...)
|
|
||||||
}
|
|
||||||
|
|
||||||
// fallback to Predict
|
|
||||||
func (llm *GPT2) PredictStream(opts *pb.PredictOptions, results chan string) error {
|
|
||||||
go func() {
|
|
||||||
res, err := llm.gpt2.Predict(opts.Prompt, buildPredictOptions(opts)...)
|
|
||||||
|
|
||||||
if err != nil {
|
|
||||||
fmt.Println("err: ", err)
|
|
||||||
}
|
|
||||||
results <- res
|
|
||||||
close(results)
|
|
||||||
}()
|
|
||||||
return nil
|
|
||||||
}
|
|
@ -1,42 +0,0 @@
|
|||||||
package transformers
|
|
||||||
|
|
||||||
// This is a wrapper to statisfy the GRPC service interface
|
|
||||||
// It is meant to be used by the main executable that is the server for the specific backend type (falcon, gpt3, etc)
|
|
||||||
import (
|
|
||||||
"fmt"
|
|
||||||
|
|
||||||
"github.com/go-skynet/LocalAI/pkg/grpc/base"
|
|
||||||
pb "github.com/go-skynet/LocalAI/pkg/grpc/proto"
|
|
||||||
|
|
||||||
transformers "github.com/go-skynet/go-ggml-transformers.cpp"
|
|
||||||
)
|
|
||||||
|
|
||||||
type GPTJ struct {
|
|
||||||
base.SingleThread
|
|
||||||
|
|
||||||
gptj *transformers.GPTJ
|
|
||||||
}
|
|
||||||
|
|
||||||
func (llm *GPTJ) Load(opts *pb.ModelOptions) error {
|
|
||||||
model, err := transformers.NewGPTJ(opts.ModelFile)
|
|
||||||
llm.gptj = model
|
|
||||||
return err
|
|
||||||
}
|
|
||||||
|
|
||||||
func (llm *GPTJ) Predict(opts *pb.PredictOptions) (string, error) {
|
|
||||||
return llm.gptj.Predict(opts.Prompt, buildPredictOptions(opts)...)
|
|
||||||
}
|
|
||||||
|
|
||||||
// fallback to Predict
|
|
||||||
func (llm *GPTJ) PredictStream(opts *pb.PredictOptions, results chan string) error {
|
|
||||||
go func() {
|
|
||||||
res, err := llm.gptj.Predict(opts.Prompt, buildPredictOptions(opts)...)
|
|
||||||
|
|
||||||
if err != nil {
|
|
||||||
fmt.Println("err: ", err)
|
|
||||||
}
|
|
||||||
results <- res
|
|
||||||
close(results)
|
|
||||||
}()
|
|
||||||
return nil
|
|
||||||
}
|
|
@ -1,42 +0,0 @@
|
|||||||
package transformers
|
|
||||||
|
|
||||||
// This is a wrapper to statisfy the GRPC service interface
|
|
||||||
// It is meant to be used by the main executable that is the server for the specific backend type (falcon, gpt3, etc)
|
|
||||||
import (
|
|
||||||
"fmt"
|
|
||||||
|
|
||||||
"github.com/go-skynet/LocalAI/pkg/grpc/base"
|
|
||||||
pb "github.com/go-skynet/LocalAI/pkg/grpc/proto"
|
|
||||||
|
|
||||||
transformers "github.com/go-skynet/go-ggml-transformers.cpp"
|
|
||||||
)
|
|
||||||
|
|
||||||
type GPTNeoX struct {
|
|
||||||
base.SingleThread
|
|
||||||
|
|
||||||
gptneox *transformers.GPTNeoX
|
|
||||||
}
|
|
||||||
|
|
||||||
func (llm *GPTNeoX) Load(opts *pb.ModelOptions) error {
|
|
||||||
model, err := transformers.NewGPTNeoX(opts.ModelFile)
|
|
||||||
llm.gptneox = model
|
|
||||||
return err
|
|
||||||
}
|
|
||||||
|
|
||||||
func (llm *GPTNeoX) Predict(opts *pb.PredictOptions) (string, error) {
|
|
||||||
return llm.gptneox.Predict(opts.Prompt, buildPredictOptions(opts)...)
|
|
||||||
}
|
|
||||||
|
|
||||||
// fallback to Predict
|
|
||||||
func (llm *GPTNeoX) PredictStream(opts *pb.PredictOptions, results chan string) error {
|
|
||||||
go func() {
|
|
||||||
res, err := llm.gptneox.Predict(opts.Prompt, buildPredictOptions(opts)...)
|
|
||||||
|
|
||||||
if err != nil {
|
|
||||||
fmt.Println("err: ", err)
|
|
||||||
}
|
|
||||||
results <- res
|
|
||||||
close(results)
|
|
||||||
}()
|
|
||||||
return nil
|
|
||||||
}
|
|
@ -1,42 +0,0 @@
|
|||||||
package transformers
|
|
||||||
|
|
||||||
// This is a wrapper to statisfy the GRPC service interface
|
|
||||||
// It is meant to be used by the main executable that is the server for the specific backend type (falcon, gpt3, etc)
|
|
||||||
import (
|
|
||||||
"fmt"
|
|
||||||
|
|
||||||
"github.com/go-skynet/LocalAI/pkg/grpc/base"
|
|
||||||
pb "github.com/go-skynet/LocalAI/pkg/grpc/proto"
|
|
||||||
|
|
||||||
transformers "github.com/go-skynet/go-ggml-transformers.cpp"
|
|
||||||
)
|
|
||||||
|
|
||||||
type MPT struct {
|
|
||||||
base.SingleThread
|
|
||||||
|
|
||||||
mpt *transformers.MPT
|
|
||||||
}
|
|
||||||
|
|
||||||
func (llm *MPT) Load(opts *pb.ModelOptions) error {
|
|
||||||
model, err := transformers.NewMPT(opts.ModelFile)
|
|
||||||
llm.mpt = model
|
|
||||||
return err
|
|
||||||
}
|
|
||||||
|
|
||||||
func (llm *MPT) Predict(opts *pb.PredictOptions) (string, error) {
|
|
||||||
return llm.mpt.Predict(opts.Prompt, buildPredictOptions(opts)...)
|
|
||||||
}
|
|
||||||
|
|
||||||
// fallback to Predict
|
|
||||||
func (llm *MPT) PredictStream(opts *pb.PredictOptions, results chan string) error {
|
|
||||||
go func() {
|
|
||||||
res, err := llm.mpt.Predict(opts.Prompt, buildPredictOptions(opts)...)
|
|
||||||
|
|
||||||
if err != nil {
|
|
||||||
fmt.Println("err: ", err)
|
|
||||||
}
|
|
||||||
results <- res
|
|
||||||
close(results)
|
|
||||||
}()
|
|
||||||
return nil
|
|
||||||
}
|
|
@ -1,26 +0,0 @@
|
|||||||
package transformers
|
|
||||||
|
|
||||||
import (
|
|
||||||
pb "github.com/go-skynet/LocalAI/pkg/grpc/proto"
|
|
||||||
transformers "github.com/go-skynet/go-ggml-transformers.cpp"
|
|
||||||
)
|
|
||||||
|
|
||||||
func buildPredictOptions(opts *pb.PredictOptions) []transformers.PredictOption {
|
|
||||||
predictOptions := []transformers.PredictOption{
|
|
||||||
transformers.SetTemperature(float64(opts.Temperature)),
|
|
||||||
transformers.SetTopP(float64(opts.TopP)),
|
|
||||||
transformers.SetTopK(int(opts.TopK)),
|
|
||||||
transformers.SetTokens(int(opts.Tokens)),
|
|
||||||
transformers.SetThreads(int(opts.Threads)),
|
|
||||||
}
|
|
||||||
|
|
||||||
if opts.Batch != 0 {
|
|
||||||
predictOptions = append(predictOptions, transformers.SetBatch(int(opts.Batch)))
|
|
||||||
}
|
|
||||||
|
|
||||||
if opts.Seed != 0 {
|
|
||||||
predictOptions = append(predictOptions, transformers.SetSeed(int(opts.Seed)))
|
|
||||||
}
|
|
||||||
|
|
||||||
return predictOptions
|
|
||||||
}
|
|
@ -1,42 +0,0 @@
|
|||||||
package transformers
|
|
||||||
|
|
||||||
// This is a wrapper to statisfy the GRPC service interface
|
|
||||||
// It is meant to be used by the main executable that is the server for the specific backend type (falcon, gpt3, etc)
|
|
||||||
import (
|
|
||||||
"fmt"
|
|
||||||
|
|
||||||
"github.com/go-skynet/LocalAI/pkg/grpc/base"
|
|
||||||
pb "github.com/go-skynet/LocalAI/pkg/grpc/proto"
|
|
||||||
|
|
||||||
transformers "github.com/go-skynet/go-ggml-transformers.cpp"
|
|
||||||
)
|
|
||||||
|
|
||||||
type Replit struct {
|
|
||||||
base.SingleThread
|
|
||||||
|
|
||||||
replit *transformers.Replit
|
|
||||||
}
|
|
||||||
|
|
||||||
func (llm *Replit) Load(opts *pb.ModelOptions) error {
|
|
||||||
model, err := transformers.NewReplit(opts.ModelFile)
|
|
||||||
llm.replit = model
|
|
||||||
return err
|
|
||||||
}
|
|
||||||
|
|
||||||
func (llm *Replit) Predict(opts *pb.PredictOptions) (string, error) {
|
|
||||||
return llm.replit.Predict(opts.Prompt, buildPredictOptions(opts)...)
|
|
||||||
}
|
|
||||||
|
|
||||||
// fallback to Predict
|
|
||||||
func (llm *Replit) PredictStream(opts *pb.PredictOptions, results chan string) error {
|
|
||||||
go func() {
|
|
||||||
res, err := llm.replit.Predict(opts.Prompt, buildPredictOptions(opts)...)
|
|
||||||
|
|
||||||
if err != nil {
|
|
||||||
fmt.Println("err: ", err)
|
|
||||||
}
|
|
||||||
results <- res
|
|
||||||
close(results)
|
|
||||||
}()
|
|
||||||
return nil
|
|
||||||
}
|
|
@ -1,43 +0,0 @@
|
|||||||
package transformers
|
|
||||||
|
|
||||||
// This is a wrapper to statisfy the GRPC service interface
|
|
||||||
// It is meant to be used by the main executable that is the server for the specific backend type (falcon, gpt3, etc)
|
|
||||||
import (
|
|
||||||
"fmt"
|
|
||||||
|
|
||||||
"github.com/go-skynet/LocalAI/pkg/grpc/base"
|
|
||||||
pb "github.com/go-skynet/LocalAI/pkg/grpc/proto"
|
|
||||||
|
|
||||||
transformers "github.com/go-skynet/go-ggml-transformers.cpp"
|
|
||||||
)
|
|
||||||
|
|
||||||
type Starcoder struct {
|
|
||||||
base.SingleThread
|
|
||||||
|
|
||||||
starcoder *transformers.Starcoder
|
|
||||||
}
|
|
||||||
|
|
||||||
func (llm *Starcoder) Load(opts *pb.ModelOptions) error {
|
|
||||||
model, err := transformers.NewStarcoder(opts.ModelFile)
|
|
||||||
llm.starcoder = model
|
|
||||||
return err
|
|
||||||
}
|
|
||||||
|
|
||||||
func (llm *Starcoder) Predict(opts *pb.PredictOptions) (string, error) {
|
|
||||||
return llm.starcoder.Predict(opts.Prompt, buildPredictOptions(opts)...)
|
|
||||||
}
|
|
||||||
|
|
||||||
// fallback to Predict
|
|
||||||
func (llm *Starcoder) PredictStream(opts *pb.PredictOptions, results chan string) error {
|
|
||||||
go func() {
|
|
||||||
res, err := llm.starcoder.Predict(opts.Prompt, buildPredictOptions(opts)...)
|
|
||||||
|
|
||||||
if err != nil {
|
|
||||||
fmt.Println("err: ", err)
|
|
||||||
}
|
|
||||||
results <- res
|
|
||||||
close(results)
|
|
||||||
}()
|
|
||||||
|
|
||||||
return nil
|
|
||||||
}
|
|
@ -13,10 +13,6 @@ if [ -n "$EXTRA_BACKENDS" ]; then
|
|||||||
done
|
done
|
||||||
fi
|
fi
|
||||||
|
|
||||||
if [ -e "/opt/intel/oneapi/setvars.sh" ]; then
|
|
||||||
source /opt/intel/oneapi/setvars.sh
|
|
||||||
fi
|
|
||||||
|
|
||||||
if [ "$REBUILD" != "false" ]; then
|
if [ "$REBUILD" != "false" ]; then
|
||||||
rm -rf ./local-ai
|
rm -rf ./local-ai
|
||||||
make build -j${BUILD_PARALLELISM:-1}
|
make build -j${BUILD_PARALLELISM:-1}
|
||||||
|
@ -23,11 +23,6 @@ const (
|
|||||||
GoLlamaBackend = "llama"
|
GoLlamaBackend = "llama"
|
||||||
LlamaGGML = "llama-ggml"
|
LlamaGGML = "llama-ggml"
|
||||||
LLamaCPP = "llama-cpp"
|
LLamaCPP = "llama-cpp"
|
||||||
GPTJBackend = "gptj"
|
|
||||||
DollyBackend = "dolly"
|
|
||||||
MPTBackend = "mpt"
|
|
||||||
GPTNeoXBackend = "gptneox"
|
|
||||||
ReplitBackend = "replit"
|
|
||||||
Gpt4AllLlamaBackend = "gpt4all-llama"
|
Gpt4AllLlamaBackend = "gpt4all-llama"
|
||||||
Gpt4AllMptBackend = "gpt4all-mpt"
|
Gpt4AllMptBackend = "gpt4all-mpt"
|
||||||
Gpt4AllJBackend = "gpt4all-j"
|
Gpt4AllJBackend = "gpt4all-j"
|
||||||
@ -50,12 +45,7 @@ var AutoLoadBackends []string = []string{
|
|||||||
LlamaGGML,
|
LlamaGGML,
|
||||||
GoLlamaBackend,
|
GoLlamaBackend,
|
||||||
Gpt4All,
|
Gpt4All,
|
||||||
GPTNeoXBackend,
|
|
||||||
BertEmbeddingsBackend,
|
BertEmbeddingsBackend,
|
||||||
GPTJBackend,
|
|
||||||
DollyBackend,
|
|
||||||
MPTBackend,
|
|
||||||
ReplitBackend,
|
|
||||||
RwkvBackend,
|
RwkvBackend,
|
||||||
WhisperBackend,
|
WhisperBackend,
|
||||||
StableDiffusionBackend,
|
StableDiffusionBackend,
|
||||||
|
@ -4,7 +4,7 @@
|
|||||||
top_p: 80
|
top_p: 80
|
||||||
top_k: 0.9
|
top_k: 0.9
|
||||||
temperature: 0.1
|
temperature: 0.1
|
||||||
context_size: 10
|
context_size: 200
|
||||||
stopwords:
|
stopwords:
|
||||||
- "HUMAN:"
|
- "HUMAN:"
|
||||||
- "### Response:"
|
- "### Response:"
|
||||||
@ -20,7 +20,7 @@
|
|||||||
top_k: 0.9
|
top_k: 0.9
|
||||||
temperature: 0.1
|
temperature: 0.1
|
||||||
model: testmodel
|
model: testmodel
|
||||||
context_size: 10
|
context_size: 200
|
||||||
stopwords:
|
stopwords:
|
||||||
- "HUMAN:"
|
- "HUMAN:"
|
||||||
- "### Response:"
|
- "### Response:"
|
||||||
|
@ -4,7 +4,7 @@ parameters:
|
|||||||
top_p: 80
|
top_p: 80
|
||||||
top_k: 0.9
|
top_k: 0.9
|
||||||
temperature: 0.1
|
temperature: 0.1
|
||||||
context_size: 10
|
context_size: 200
|
||||||
stopwords:
|
stopwords:
|
||||||
- "HUMAN:"
|
- "HUMAN:"
|
||||||
- "### Response:"
|
- "### Response:"
|
||||||
|
@ -4,7 +4,7 @@ parameters:
|
|||||||
top_p: 80
|
top_p: 80
|
||||||
top_k: 0.9
|
top_k: 0.9
|
||||||
temperature: 0.1
|
temperature: 0.1
|
||||||
context_size: 10
|
context_size: 200
|
||||||
stopwords:
|
stopwords:
|
||||||
- "HUMAN:"
|
- "HUMAN:"
|
||||||
- "### Response:"
|
- "### Response:"
|
||||||
|
Loading…
Reference in New Issue
Block a user