Merge branch 'master' into ci/static-check

This commit is contained in:
Ettore Di Giacinto 2024-07-18 19:44:59 +02:00 committed by GitHub
commit c28e8ca697
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
119 changed files with 4374 additions and 1164 deletions

112
.github/dependabot.yml vendored
View File

@ -1,6 +1,10 @@
# https://docs.github.com/en/code-security/dependabot/dependabot-version-updates/configuration-options-for-the-dependabot.yml-file # https://docs.github.com/en/code-security/dependabot/dependabot-version-updates/configuration-options-for-the-dependabot.yml-file
version: 2 version: 2
updates: updates:
- package-ecosystem: "gitsubmodule"
directory: "/"
schedule:
interval: "weekly"
- package-ecosystem: "gomod" - package-ecosystem: "gomod"
directory: "/" directory: "/"
schedule: schedule:
@ -23,3 +27,111 @@ updates:
schedule: schedule:
# Check for updates to GitHub Actions every weekday # Check for updates to GitHub Actions every weekday
interval: "weekly" interval: "weekly"
- package-ecosystem: "pip"
directory: "/backend/python/autogptq"
schedule:
interval: "weekly"
- package-ecosystem: "pip"
directory: "/backend/python/bark"
schedule:
interval: "weekly"
- package-ecosystem: "pip"
directory: "/backend/python/common/template"
schedule:
interval: "weekly"
- package-ecosystem: "pip"
directory: "/backend/python/coqui"
schedule:
interval: "weekly"
- package-ecosystem: "pip"
directory: "/backend/python/diffusers"
schedule:
interval: "weekly"
- package-ecosystem: "pip"
directory: "/backend/python/exllama"
schedule:
interval: "weekly"
- package-ecosystem: "pip"
directory: "/backend/python/exllama2"
schedule:
interval: "weekly"
- package-ecosystem: "pip"
directory: "/backend/python/mamba"
schedule:
interval: "weekly"
- package-ecosystem: "pip"
directory: "/backend/python/openvoice"
schedule:
interval: "weekly"
- package-ecosystem: "pip"
directory: "/backend/python/parler-tts"
schedule:
interval: "weekly"
- package-ecosystem: "pip"
directory: "/backend/python/petals"
schedule:
interval: "weekly"
- package-ecosystem: "pip"
directory: "/backend/python/rerankers"
schedule:
interval: "weekly"
- package-ecosystem: "pip"
directory: "/backend/python/sentencetransformers"
schedule:
interval: "weekly"
- package-ecosystem: "pip"
directory: "/backend/python/transformers"
schedule:
interval: "weekly"
- package-ecosystem: "pip"
directory: "/backend/python/transformers-musicgen"
schedule:
interval: "weekly"
- package-ecosystem: "pip"
directory: "/backend/python/vall-e-x"
schedule:
interval: "weekly"
- package-ecosystem: "pip"
directory: "/backend/python/vllm"
schedule:
interval: "weekly"
- package-ecosystem: "pip"
directory: "/examples/chainlit"
schedule:
interval: "weekly"
- package-ecosystem: "pip"
directory: "/examples/functions"
schedule:
interval: "weekly"
- package-ecosystem: "pip"
directory: "/examples/langchain/langchainpy-localai-example"
schedule:
interval: "weekly"
- package-ecosystem: "pip"
directory: "/examples/langchain-chroma"
schedule:
interval: "weekly"
- package-ecosystem: "pip"
directory: "/examples/streamlit-bot"
schedule:
interval: "weekly"
- package-ecosystem: "docker"
directory: "/examples/k8sgpt"
schedule:
interval: "weekly"
- package-ecosystem: "docker"
directory: "/examples/kubernetes"
schedule:
interval: "weekly"
- package-ecosystem: "docker"
directory: "/examples/langchain"
schedule:
interval: "weekly"
- package-ecosystem: "gomod"
directory: "/examples/semantic-todo"
schedule:
interval: "weekly"
- package-ecosystem: "docker"
directory: "/examples/telegram-bot"
schedule:
interval: "weekly"

View File

@ -27,9 +27,6 @@ jobs:
- repository: "go-skynet/bloomz.cpp" - repository: "go-skynet/bloomz.cpp"
variable: "BLOOMZ_VERSION" variable: "BLOOMZ_VERSION"
branch: "main" branch: "main"
- repository: "nomic-ai/gpt4all"
variable: "GPT4ALL_VERSION"
branch: "main"
- repository: "mudler/go-ggllm.cpp" - repository: "mudler/go-ggllm.cpp"
variable: "GOGGLLM_VERSION" variable: "GOGGLLM_VERSION"
branch: "master" branch: "master"
@ -51,7 +48,7 @@ jobs:
token: ${{ secrets.UPDATE_BOT_TOKEN }} token: ${{ secrets.UPDATE_BOT_TOKEN }}
push-to-fork: ci-forks/LocalAI push-to-fork: ci-forks/LocalAI
commit-message: ':arrow_up: Update ${{ matrix.repository }}' commit-message: ':arrow_up: Update ${{ matrix.repository }}'
title: ':arrow_up: Update ${{ matrix.repository }}' title: 'chore: :arrow_up: Update ${{ matrix.repository }}'
branch: "update/${{ matrix.variable }}" branch: "update/${{ matrix.variable }}"
body: Bump of ${{ matrix.repository }} version body: Bump of ${{ matrix.repository }} version
signoff: true signoff: true

View File

@ -22,7 +22,7 @@ jobs:
token: ${{ secrets.UPDATE_BOT_TOKEN }} token: ${{ secrets.UPDATE_BOT_TOKEN }}
push-to-fork: ci-forks/LocalAI push-to-fork: ci-forks/LocalAI
commit-message: ':arrow_up: Update docs version ${{ matrix.repository }}' commit-message: ':arrow_up: Update docs version ${{ matrix.repository }}'
title: ':arrow_up: Update docs version ${{ matrix.repository }}' title: 'docs: :arrow_up: update docs version ${{ matrix.repository }}'
branch: "update/docs" branch: "update/docs"
body: Bump of ${{ matrix.repository }} version inside docs body: Bump of ${{ matrix.repository }} version inside docs
signoff: true signoff: true

View File

@ -20,12 +20,12 @@ jobs:
run: | run: |
sudo apt-get update sudo apt-get update
sudo apt-get install -y pip wget sudo apt-get install -y pip wget
sudo pip install --upgrade pip sudo pip install --upgrade pip
pip install huggingface_hub pip install huggingface_hub
- name: 'Setup yq' - name: 'Setup yq'
uses: dcarbone/install-yq-action@v1.1.1 uses: dcarbone/install-yq-action@v1.1.1
with: with:
version: 'v4.43.1' version: 'v4.44.2'
download-compressed: true download-compressed: true
force: true force: true

81
.github/workflows/comment-pr.yaml vendored Normal file
View File

@ -0,0 +1,81 @@
name: Comment PRs
on:
pull_request_target:
jobs:
comment-pr:
env:
MODEL_NAME: hermes-2-theta-llama-3-8b
runs-on: ubuntu-latest
steps:
- name: Checkout code
uses: actions/checkout@v3
with:
ref: "${{ github.event.pull_request.merge_commit_sha }}"
- uses: mudler/localai-github-action@v1
with:
model: 'hermes-2-theta-llama-3-8b' # Any from models.localai.io, or from huggingface.com with: "huggingface://<repository>/file"
# Check the PR diff using the current branch and the base branch of the PR
- uses: GrantBirki/git-diff-action@v2.7.0
id: git-diff-action
with:
json_diff_file_output: diff.json
raw_diff_file_output: diff.txt
file_output_only: "true"
- name: Show diff
env:
DIFF: ${{ steps.git-diff-action.outputs.raw-diff-path }}
run: |
cat $DIFF
- name: Summarize
env:
DIFF: ${{ steps.git-diff-action.outputs.raw-diff-path }}
id: summarize
run: |
input="$(cat $DIFF)"
# Define the LocalAI API endpoint
API_URL="http://localhost:8080/chat/completions"
# Create a JSON payload using jq to handle special characters
json_payload=$(jq -n --arg input "$input" '{
model: "'$MODEL_NAME'",
messages: [
{
role: "system",
content: "You are LocalAI-bot in Github that helps understanding PRs and assess complexity. Explain what has changed in this PR diff and why"
},
{
role: "user",
content: $input
}
]
}')
# Send the request to LocalAI
response=$(curl -s -X POST $API_URL \
-H "Content-Type: application/json" \
-d "$json_payload")
# Extract the summary from the response
summary="$(echo $response | jq -r '.choices[0].message.content')"
# Print the summary
# -H "Authorization: Bearer $API_KEY" \
echo "Summary:"
echo "$summary"
echo "payload sent"
echo "$json_payload"
{
echo 'message<<EOF'
echo "$summary"
echo EOF
} >> "$GITHUB_OUTPUT"
docker logs --tail 10 local-ai
- uses: mshick/add-pr-comment@v2
if: always()
with:
repo-token: ${{ secrets.UPDATE_BOT_TOKEN }}
message: ${{ steps.summarize.outputs.message }}
message-failure: |
Uh oh! Could not analyze this PR, maybe it's too big?

View File

@ -14,7 +14,7 @@ jobs:
steps: steps:
- name: Dependabot metadata - name: Dependabot metadata
id: metadata id: metadata
uses: dependabot/fetch-metadata@v2.1.0 uses: dependabot/fetch-metadata@v2.2.0
with: with:
github-token: "${{ secrets.GITHUB_TOKEN }}" github-token: "${{ secrets.GITHUB_TOKEN }}"
skip-commit-verification: true skip-commit-verification: true

View File

@ -75,7 +75,7 @@ jobs:
uses: actions/checkout@v4 uses: actions/checkout@v4
- name: Cache GRPC - name: Cache GRPC
uses: docker/build-push-action@v5 uses: docker/build-push-action@v6
with: with:
builder: ${{ steps.buildx.outputs.name }} builder: ${{ steps.buildx.outputs.name }}
# The build-args MUST be an EXACT match between the image cache and other workflow steps that want to use that cache. # The build-args MUST be an EXACT match between the image cache and other workflow steps that want to use that cache.
@ -84,11 +84,11 @@ jobs:
build-args: | build-args: |
GRPC_BASE_IMAGE=${{ matrix.grpc-base-image }} GRPC_BASE_IMAGE=${{ matrix.grpc-base-image }}
GRPC_MAKEFLAGS=--jobs=4 --output-sync=target GRPC_MAKEFLAGS=--jobs=4 --output-sync=target
GRPC_VERSION=v1.64.0 GRPC_VERSION=v1.65.0
context: . context: .
file: ./Dockerfile file: ./Dockerfile
cache-to: type=gha,ignore-error=true cache-to: type=gha,ignore-error=true
cache-from: type=gha cache-from: type=gha
target: grpc target: grpc
platforms: ${{ matrix.platforms }} platforms: ${{ matrix.platforms }}
push: false push: false

View File

@ -15,7 +15,7 @@ jobs:
strategy: strategy:
matrix: matrix:
include: include:
- base-image: intel/oneapi-basekit:2024.1.0-devel-ubuntu22.04 - base-image: intel/oneapi-basekit:2024.2.0-devel-ubuntu22.04
runs-on: 'ubuntu-latest' runs-on: 'ubuntu-latest'
platforms: 'linux/amd64' platforms: 'linux/amd64'
runs-on: ${{matrix.runs-on}} runs-on: ${{matrix.runs-on}}
@ -46,7 +46,7 @@ jobs:
uses: actions/checkout@v4 uses: actions/checkout@v4
- name: Cache Intel images - name: Cache Intel images
uses: docker/build-push-action@v5 uses: docker/build-push-action@v6
with: with:
builder: ${{ steps.buildx.outputs.name }} builder: ${{ steps.buildx.outputs.name }}
build-args: | build-args: |

View File

@ -215,7 +215,7 @@ jobs:
password: ${{ secrets.quayPassword }} password: ${{ secrets.quayPassword }}
- name: Build and push - name: Build and push
uses: docker/build-push-action@v5 uses: docker/build-push-action@v6
if: github.event_name != 'pull_request' if: github.event_name != 'pull_request'
with: with:
builder: ${{ steps.buildx.outputs.name }} builder: ${{ steps.buildx.outputs.name }}
@ -232,7 +232,7 @@ jobs:
BASE_IMAGE=${{ inputs.base-image }} BASE_IMAGE=${{ inputs.base-image }}
GRPC_BASE_IMAGE=${{ inputs.grpc-base-image || inputs.base-image }} GRPC_BASE_IMAGE=${{ inputs.grpc-base-image || inputs.base-image }}
GRPC_MAKEFLAGS=--jobs=4 --output-sync=target GRPC_MAKEFLAGS=--jobs=4 --output-sync=target
GRPC_VERSION=v1.64.0 GRPC_VERSION=v1.65.0
MAKEFLAGS=${{ inputs.makeflags }} MAKEFLAGS=${{ inputs.makeflags }}
context: . context: .
file: ./Dockerfile file: ./Dockerfile
@ -243,7 +243,7 @@ jobs:
labels: ${{ steps.meta.outputs.labels }} labels: ${{ steps.meta.outputs.labels }}
### Start testing image ### Start testing image
- name: Build and push - name: Build and push
uses: docker/build-push-action@v5 uses: docker/build-push-action@v6
if: github.event_name == 'pull_request' if: github.event_name == 'pull_request'
with: with:
builder: ${{ steps.buildx.outputs.name }} builder: ${{ steps.buildx.outputs.name }}
@ -260,7 +260,7 @@ jobs:
BASE_IMAGE=${{ inputs.base-image }} BASE_IMAGE=${{ inputs.base-image }}
GRPC_BASE_IMAGE=${{ inputs.grpc-base-image || inputs.base-image }} GRPC_BASE_IMAGE=${{ inputs.grpc-base-image || inputs.base-image }}
GRPC_MAKEFLAGS=--jobs=4 --output-sync=target GRPC_MAKEFLAGS=--jobs=4 --output-sync=target
GRPC_VERSION=v1.64.0 GRPC_VERSION=v1.65.0
MAKEFLAGS=${{ inputs.makeflags }} MAKEFLAGS=${{ inputs.makeflags }}
context: . context: .
file: ./Dockerfile file: ./Dockerfile
@ -276,7 +276,7 @@ jobs:
## End testing image ## End testing image
- name: Build and push AIO image - name: Build and push AIO image
if: inputs.aio != '' if: inputs.aio != ''
uses: docker/build-push-action@v5 uses: docker/build-push-action@v6
with: with:
builder: ${{ steps.buildx.outputs.name }} builder: ${{ steps.buildx.outputs.name }}
build-args: | build-args: |
@ -291,7 +291,7 @@ jobs:
- name: Build and push AIO image (dockerhub) - name: Build and push AIO image (dockerhub)
if: inputs.aio != '' if: inputs.aio != ''
uses: docker/build-push-action@v5 uses: docker/build-push-action@v6
with: with:
builder: ${{ steps.buildx.outputs.name }} builder: ${{ steps.buildx.outputs.name }}
build-args: | build-args: |

View File

@ -14,12 +14,10 @@ jobs:
- uses: actions/checkout@v4 - uses: actions/checkout@v4
with: with:
fetch-depth: 0 # needed to checkout all branches for this Action to work fetch-depth: 0 # needed to checkout all branches for this Action to work
- name: Start LocalAI - uses: mudler/localai-github-action@v1
run: | with:
echo "Starting LocalAI..." model: 'hermes-2-theta-llama-3-8b' # Any from models.localai.io, or from huggingface.com with: "huggingface://<repository>/file"
docker run -e -ti -d --name local-ai -p 8080:8080 localai/localai:master-ffmpeg-core run --debug $MODEL_NAME # Check the PR diff using the current branch and the base branch of the PR
until [ "`docker inspect -f {{.State.Health.Status}} local-ai`" == "healthy" ]; do echo "Waiting for container to be ready"; docker logs --tail 10 local-ai; sleep 2; done
# Check the PR diff using the current branch and the base branch of the PR
- uses: GrantBirki/git-diff-action@v2.7.0 - uses: GrantBirki/git-diff-action@v2.7.0
id: git-diff-action id: git-diff-action
with: with:

View File

@ -12,11 +12,9 @@ jobs:
RELEASE_TITLE: ${{ github.event.release.name }} RELEASE_TITLE: ${{ github.event.release.name }}
RELEASE_TAG_NAME: ${{ github.event.release.tag_name }} RELEASE_TAG_NAME: ${{ github.event.release.tag_name }}
steps: steps:
- name: Start LocalAI - uses: mudler/localai-github-action@v1
run: | with:
echo "Starting LocalAI..." model: 'hermes-2-theta-llama-3-8b' # Any from models.localai.io, or from huggingface.com with: "huggingface://<repository>/file"
docker run -e -ti -d --name local-ai -p 8080:8080 localai/localai:master-ffmpeg-core run --debug $MODEL_NAME
until [ "`docker inspect -f {{.State.Health.Status}} local-ai`" == "healthy" ]; do echo "Waiting for container to be ready"; docker logs --tail 10 local-ai; sleep 2; done
- name: Summarize - name: Summarize
id: summarize id: summarize
run: | run: |

View File

@ -17,12 +17,12 @@ jobs:
- uses: aslafy-z/conventional-pr-title-action@v3 - uses: aslafy-z/conventional-pr-title-action@v3
env: env:
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
check-pr-description: # check-pr-description:
runs-on: ubuntu-latest # runs-on: ubuntu-latest
steps: # steps:
- uses: actions/checkout@v2 # - uses: actions/checkout@v2
- uses: jadrol/pr-description-checker-action@v1.0.0 # - uses: jadrol/pr-description-checker-action@v1.0.0
id: description-checker # id: description-checker
with: # with:
repo-token: ${{ secrets.GITHUB_TOKEN }} # repo-token: ${{ secrets.GITHUB_TOKEN }}
exempt-labels: no qa # exempt-labels: no qa

View File

@ -7,7 +7,7 @@ on:
pull_request: pull_request:
env: env:
GRPC_VERSION: v1.64.0 GRPC_VERSION: v1.65.0
permissions: permissions:
contents: write contents: write
@ -99,8 +99,8 @@ jobs:
CROSS_TOOLCHAIN=/usr/$GNU_HOST CROSS_TOOLCHAIN=/usr/$GNU_HOST
CROSS_STAGING_PREFIX=$CROSS_TOOLCHAIN/stage CROSS_STAGING_PREFIX=$CROSS_TOOLCHAIN/stage
CMAKE_CROSS_TOOLCHAIN=/tmp/arm.toolchain.cmake CMAKE_CROSS_TOOLCHAIN=/tmp/arm.toolchain.cmake
go install google.golang.org/grpc/cmd/protoc-gen-go-grpc@8ba23be9613c672d40ae261d2a1335d639bdd59b go install google.golang.org/grpc/cmd/protoc-gen-go-grpc@1958fcbe2ca8bd93af633f11e97d44e567e945af
go install google.golang.org/protobuf/cmd/protoc-gen-go@v1.34.0 go install google.golang.org/protobuf/cmd/protoc-gen-go@v1.34.2
export PATH=$PATH:$GOPATH/bin export PATH=$PATH:$GOPATH/bin
export PATH=/usr/local/cuda/bin:$PATH export PATH=/usr/local/cuda/bin:$PATH
sudo rm -rf /usr/aarch64-linux-gnu/lib/libstdc++.so.6 sudo rm -rf /usr/aarch64-linux-gnu/lib/libstdc++.so.6
@ -163,7 +163,7 @@ jobs:
sudo apt-get update sudo apt-get update
sudo apt-get install -y cuda-nvcc-${CUDA_VERSION} libcublas-dev-${CUDA_VERSION} sudo apt-get install -y cuda-nvcc-${CUDA_VERSION} libcublas-dev-${CUDA_VERSION}
env: env:
CUDA_VERSION: 12-3 CUDA_VERSION: 12-5
- name: "Install Hipblas" - name: "Install Hipblas"
env: env:
ROCM_VERSION: "6.1" ROCM_VERSION: "6.1"
@ -210,8 +210,8 @@ jobs:
- name: Build - name: Build
id: build id: build
run: | run: |
go install google.golang.org/grpc/cmd/protoc-gen-go-grpc@8ba23be9613c672d40ae261d2a1335d639bdd59b go install google.golang.org/grpc/cmd/protoc-gen-go-grpc@1958fcbe2ca8bd93af633f11e97d44e567e945af
go install google.golang.org/protobuf/cmd/protoc-gen-go@v1.34.0 go install google.golang.org/protobuf/cmd/protoc-gen-go@v1.34.2
export PATH=$PATH:$GOPATH/bin export PATH=$PATH:$GOPATH/bin
export PATH=/usr/local/cuda/bin:$PATH export PATH=/usr/local/cuda/bin:$PATH
export PATH=/opt/rocm/bin:$PATH export PATH=/opt/rocm/bin:$PATH
@ -251,8 +251,8 @@ jobs:
run: | run: |
sudo apt-get update sudo apt-get update
sudo apt-get install -y --no-install-recommends libopencv-dev protobuf-compiler ccache sudo apt-get install -y --no-install-recommends libopencv-dev protobuf-compiler ccache
go install google.golang.org/grpc/cmd/protoc-gen-go-grpc@8ba23be9613c672d40ae261d2a1335d639bdd59b go install google.golang.org/grpc/cmd/protoc-gen-go-grpc@1958fcbe2ca8bd93af633f11e97d44e567e945af
go install google.golang.org/protobuf/cmd/protoc-gen-go@v1.34.0 go install google.golang.org/protobuf/cmd/protoc-gen-go@v1.34.2
- name: Build stablediffusion - name: Build stablediffusion
run: | run: |
export PATH=$PATH:$GOPATH/bin export PATH=$PATH:$GOPATH/bin
@ -327,8 +327,8 @@ jobs:
- name: Dependencies - name: Dependencies
run: | run: |
brew install protobuf grpc brew install protobuf grpc
go install google.golang.org/grpc/cmd/protoc-gen-go-grpc@8ba23be9613c672d40ae261d2a1335d639bdd59b go install google.golang.org/grpc/cmd/protoc-gen-go-grpc@1958fcbe2ca8bd93af633f11e97d44e567e945af
go install google.golang.org/protobuf/cmd/protoc-gen-go@v1.34.0 go install google.golang.org/protobuf/cmd/protoc-gen-go@v1.34.2
- name: Build - name: Build
id: build id: build
run: | run: |

View File

@ -19,7 +19,7 @@ jobs:
steps: steps:
- name: Clone - name: Clone
uses: actions/checkout@v4 uses: actions/checkout@v4
with: with:
submodules: true submodules: true
- name: Dependencies - name: Dependencies
run: | run: |
@ -29,8 +29,8 @@ jobs:
curl -LsSf https://astral.sh/uv/install.sh | sh curl -LsSf https://astral.sh/uv/install.sh | sh
sudo apt-get install -y ca-certificates cmake curl patch python3-pip sudo apt-get install -y ca-certificates cmake curl patch python3-pip
sudo apt-get install -y libopencv-dev sudo apt-get install -y libopencv-dev
pip install --user grpcio-tools==1.64.0 pip install --user --no-cache-dir grpcio-tools==1.64.1
- name: Test transformers - name: Test transformers
run: | run: |
make --jobs=5 --output-sync=target -C backend/python/transformers make --jobs=5 --output-sync=target -C backend/python/transformers
@ -41,7 +41,7 @@ jobs:
steps: steps:
- name: Clone - name: Clone
uses: actions/checkout@v4 uses: actions/checkout@v4
with: with:
submodules: true submodules: true
- name: Dependencies - name: Dependencies
run: | run: |
@ -51,8 +51,8 @@ jobs:
curl -LsSf https://astral.sh/uv/install.sh | sh curl -LsSf https://astral.sh/uv/install.sh | sh
sudo apt-get install -y ca-certificates cmake curl patch python3-pip sudo apt-get install -y ca-certificates cmake curl patch python3-pip
sudo apt-get install -y libopencv-dev sudo apt-get install -y libopencv-dev
pip install --user grpcio-tools==1.64.0 pip install --user --no-cache-dir grpcio-tools==1.64.1
- name: Test sentencetransformers - name: Test sentencetransformers
run: | run: |
make --jobs=5 --output-sync=target -C backend/python/sentencetransformers make --jobs=5 --output-sync=target -C backend/python/sentencetransformers
@ -64,7 +64,7 @@ jobs:
steps: steps:
- name: Clone - name: Clone
uses: actions/checkout@v4 uses: actions/checkout@v4
with: with:
submodules: true submodules: true
- name: Dependencies - name: Dependencies
run: | run: |
@ -74,7 +74,7 @@ jobs:
curl -LsSf https://astral.sh/uv/install.sh | sh curl -LsSf https://astral.sh/uv/install.sh | sh
sudo apt-get install -y ca-certificates cmake curl patch python3-pip sudo apt-get install -y ca-certificates cmake curl patch python3-pip
sudo apt-get install -y libopencv-dev sudo apt-get install -y libopencv-dev
pip install --user grpcio-tools==1.64.0 pip install --user --no-cache-dir grpcio-tools==1.64.1
- name: Test rerankers - name: Test rerankers
run: | run: |
@ -86,7 +86,7 @@ jobs:
steps: steps:
- name: Clone - name: Clone
uses: actions/checkout@v4 uses: actions/checkout@v4
with: with:
submodules: true submodules: true
- name: Dependencies - name: Dependencies
run: | run: |
@ -96,7 +96,7 @@ jobs:
sudo apt-get install -y libopencv-dev sudo apt-get install -y libopencv-dev
# Install UV # Install UV
curl -LsSf https://astral.sh/uv/install.sh | sh curl -LsSf https://astral.sh/uv/install.sh | sh
pip install --user grpcio-tools==1.64.0 pip install --user --no-cache-dir grpcio-tools==1.64.1
- name: Test diffusers - name: Test diffusers
run: | run: |
make --jobs=5 --output-sync=target -C backend/python/diffusers make --jobs=5 --output-sync=target -C backend/python/diffusers
@ -107,7 +107,7 @@ jobs:
steps: steps:
- name: Clone - name: Clone
uses: actions/checkout@v4 uses: actions/checkout@v4
with: with:
submodules: true submodules: true
- name: Dependencies - name: Dependencies
run: | run: |
@ -117,19 +117,19 @@ jobs:
curl -LsSf https://astral.sh/uv/install.sh | sh curl -LsSf https://astral.sh/uv/install.sh | sh
sudo apt-get install -y ca-certificates cmake curl patch python3-pip sudo apt-get install -y ca-certificates cmake curl patch python3-pip
sudo apt-get install -y libopencv-dev sudo apt-get install -y libopencv-dev
pip install --user grpcio-tools==1.64.0 pip install --user --no-cache-dir grpcio-tools==1.64.1
- name: Test parler-tts - name: Test parler-tts
run: | run: |
make --jobs=5 --output-sync=target -C backend/python/parler-tts make --jobs=5 --output-sync=target -C backend/python/parler-tts
make --jobs=5 --output-sync=target -C backend/python/parler-tts test make --jobs=5 --output-sync=target -C backend/python/parler-tts test
tests-openvoice: tests-openvoice:
runs-on: ubuntu-latest runs-on: ubuntu-latest
steps: steps:
- name: Clone - name: Clone
uses: actions/checkout@v4 uses: actions/checkout@v4
with: with:
submodules: true submodules: true
- name: Dependencies - name: Dependencies
run: | run: |
@ -139,7 +139,7 @@ jobs:
curl -LsSf https://astral.sh/uv/install.sh | sh curl -LsSf https://astral.sh/uv/install.sh | sh
sudo apt-get install -y ca-certificates cmake curl patch python3-pip sudo apt-get install -y ca-certificates cmake curl patch python3-pip
sudo apt-get install -y libopencv-dev sudo apt-get install -y libopencv-dev
pip install --user grpcio-tools==1.64.0 pip install --user --no-cache-dir grpcio-tools==1.64.1
- name: Test openvoice - name: Test openvoice
run: | run: |
@ -151,7 +151,7 @@ jobs:
steps: steps:
- name: Clone - name: Clone
uses: actions/checkout@v4 uses: actions/checkout@v4
with: with:
submodules: true submodules: true
- name: Dependencies - name: Dependencies
run: | run: |
@ -161,7 +161,7 @@ jobs:
curl -LsSf https://astral.sh/uv/install.sh | sh curl -LsSf https://astral.sh/uv/install.sh | sh
sudo apt-get install -y ca-certificates cmake curl patch python3-pip sudo apt-get install -y ca-certificates cmake curl patch python3-pip
sudo apt-get install -y libopencv-dev sudo apt-get install -y libopencv-dev
pip install --user grpcio-tools==1.64.0 pip install --user --no-cache-dir grpcio-tools==1.64.1
- name: Test transformers-musicgen - name: Test transformers-musicgen
run: | run: |
@ -175,7 +175,7 @@ jobs:
# steps: # steps:
# - name: Clone # - name: Clone
# uses: actions/checkout@v4 # uses: actions/checkout@v4
# with: # with:
# submodules: true # submodules: true
# - name: Dependencies # - name: Dependencies
# run: | # run: |
@ -185,14 +185,14 @@ jobs:
# curl -LsSf https://astral.sh/uv/install.sh | sh # curl -LsSf https://astral.sh/uv/install.sh | sh
# sudo apt-get install -y ca-certificates cmake curl patch python3-pip # sudo apt-get install -y ca-certificates cmake curl patch python3-pip
# sudo apt-get install -y libopencv-dev # sudo apt-get install -y libopencv-dev
# pip install --user grpcio-tools==1.64.0 # pip install --user --no-cache-dir grpcio-tools==1.64.1
# - name: Test petals # - name: Test petals
# run: | # run: |
# make --jobs=5 --output-sync=target -C backend/python/petals # make --jobs=5 --output-sync=target -C backend/python/petals
# make --jobs=5 --output-sync=target -C backend/python/petals test # make --jobs=5 --output-sync=target -C backend/python/petals test
# tests-bark: # tests-bark:
# runs-on: ubuntu-latest # runs-on: ubuntu-latest
@ -239,7 +239,7 @@ jobs:
# df -h # df -h
# - name: Clone # - name: Clone
# uses: actions/checkout@v4 # uses: actions/checkout@v4
# with: # with:
# submodules: true # submodules: true
# - name: Dependencies # - name: Dependencies
# run: | # run: |
@ -249,14 +249,14 @@ jobs:
# curl -LsSf https://astral.sh/uv/install.sh | sh # curl -LsSf https://astral.sh/uv/install.sh | sh
# sudo apt-get install -y ca-certificates cmake curl patch python3-pip # sudo apt-get install -y ca-certificates cmake curl patch python3-pip
# sudo apt-get install -y libopencv-dev # sudo apt-get install -y libopencv-dev
# pip install --user grpcio-tools==1.64.0 # pip install --user --no-cache-dir grpcio-tools==1.64.1
# - name: Test bark # - name: Test bark
# run: | # run: |
# make --jobs=5 --output-sync=target -C backend/python/bark # make --jobs=5 --output-sync=target -C backend/python/bark
# make --jobs=5 --output-sync=target -C backend/python/bark test # make --jobs=5 --output-sync=target -C backend/python/bark test
# Below tests needs GPU. Commented out for now # Below tests needs GPU. Commented out for now
# TODO: Re-enable as soon as we have GPU nodes # TODO: Re-enable as soon as we have GPU nodes
# tests-vllm: # tests-vllm:
@ -264,7 +264,7 @@ jobs:
# steps: # steps:
# - name: Clone # - name: Clone
# uses: actions/checkout@v4 # uses: actions/checkout@v4
# with: # with:
# submodules: true # submodules: true
# - name: Dependencies # - name: Dependencies
# run: | # run: |
@ -274,7 +274,7 @@ jobs:
# curl -LsSf https://astral.sh/uv/install.sh | sh # curl -LsSf https://astral.sh/uv/install.sh | sh
# sudo apt-get install -y ca-certificates cmake curl patch python3-pip # sudo apt-get install -y ca-certificates cmake curl patch python3-pip
# sudo apt-get install -y libopencv-dev # sudo apt-get install -y libopencv-dev
# pip install --user grpcio-tools==1.64.0 # pip install --user --no-cache-dir grpcio-tools==1.64.1
# - name: Test vllm # - name: Test vllm
# run: | # run: |
# make --jobs=5 --output-sync=target -C backend/python/vllm # make --jobs=5 --output-sync=target -C backend/python/vllm
@ -284,7 +284,7 @@ jobs:
steps: steps:
- name: Clone - name: Clone
uses: actions/checkout@v4 uses: actions/checkout@v4
with: with:
submodules: true submodules: true
- name: Dependencies - name: Dependencies
run: | run: |
@ -294,7 +294,7 @@ jobs:
curl -LsSf https://astral.sh/uv/install.sh | sh curl -LsSf https://astral.sh/uv/install.sh | sh
sudo apt-get install -y ca-certificates cmake curl patch python3-pip sudo apt-get install -y ca-certificates cmake curl patch python3-pip
sudo apt-get install -y libopencv-dev sudo apt-get install -y libopencv-dev
pip install --user grpcio-tools==1.64.0 pip install --user --no-cache-dir grpcio-tools==1.64.1
- name: Test vall-e-x - name: Test vall-e-x
run: | run: |
make --jobs=5 --output-sync=target -C backend/python/vall-e-x make --jobs=5 --output-sync=target -C backend/python/vall-e-x
@ -305,7 +305,7 @@ jobs:
steps: steps:
- name: Clone - name: Clone
uses: actions/checkout@v4 uses: actions/checkout@v4
with: with:
submodules: true submodules: true
- name: Dependencies - name: Dependencies
run: | run: |
@ -314,8 +314,8 @@ jobs:
sudo apt-get install -y ca-certificates cmake curl patch espeak espeak-ng python3-pip sudo apt-get install -y ca-certificates cmake curl patch espeak espeak-ng python3-pip
# Install UV # Install UV
curl -LsSf https://astral.sh/uv/install.sh | sh curl -LsSf https://astral.sh/uv/install.sh | sh
pip install --user grpcio-tools==1.64.0 pip install --user --no-cache-dir grpcio-tools==1.64.1
- name: Test coqui - name: Test coqui
run: | run: |
make --jobs=5 --output-sync=target -C backend/python/coqui make --jobs=5 --output-sync=target -C backend/python/coqui
make --jobs=5 --output-sync=target -C backend/python/coqui test make --jobs=5 --output-sync=target -C backend/python/coqui test

View File

@ -10,7 +10,7 @@ on:
- '*' - '*'
env: env:
GRPC_VERSION: v1.64.0 GRPC_VERSION: v1.65.0
concurrency: concurrency:
group: ci-tests-${{ github.head_ref || github.ref }}-${{ github.repository }} group: ci-tests-${{ github.head_ref || github.ref }}-${{ github.repository }}
@ -94,8 +94,8 @@ jobs:
sudo apt-get install -y cuda-nvcc-${CUDA_VERSION} libcublas-dev-${CUDA_VERSION} sudo apt-get install -y cuda-nvcc-${CUDA_VERSION} libcublas-dev-${CUDA_VERSION}
export CUDACXX=/usr/local/cuda/bin/nvcc export CUDACXX=/usr/local/cuda/bin/nvcc
go install google.golang.org/protobuf/cmd/protoc-gen-go@v1.34.0 go install google.golang.org/protobuf/cmd/protoc-gen-go@v1.34.2
go install google.golang.org/grpc/cmd/protoc-gen-go-grpc@8ba23be9613c672d40ae261d2a1335d639bdd59b go install google.golang.org/grpc/cmd/protoc-gen-go-grpc@1958fcbe2ca8bd93af633f11e97d44e567e945af
# The python3-grpc-tools package in 22.04 is too old # The python3-grpc-tools package in 22.04 is too old
pip install --user grpcio-tools pip install --user grpcio-tools
@ -110,7 +110,7 @@ jobs:
# Pre-build stable diffusion before we install a newer version of abseil (not compatible with stablediffusion-ncn) # Pre-build stable diffusion before we install a newer version of abseil (not compatible with stablediffusion-ncn)
PATH="$PATH:/root/go/bin" GO_TAGS="stablediffusion tts" GRPC_BACKENDS=backend-assets/grpc/stablediffusion make build PATH="$PATH:/root/go/bin" GO_TAGS="stablediffusion tts" GRPC_BACKENDS=backend-assets/grpc/stablediffusion make build
env: env:
CUDA_VERSION: 12-3 CUDA_VERSION: 12-4
- name: Cache grpc - name: Cache grpc
id: cache-grpc id: cache-grpc
uses: actions/cache@v4 uses: actions/cache@v4
@ -215,7 +215,7 @@ jobs:
- name: Dependencies - name: Dependencies
run: | run: |
brew install protobuf grpc make protoc-gen-go protoc-gen-go-grpc brew install protobuf grpc make protoc-gen-go protoc-gen-go-grpc
pip install --user grpcio-tools==1.64.0 pip install --user --no-cache-dir grpcio-tools==1.64.1
- name: Test - name: Test
run: | run: |
export C_INCLUDE_PATH=/usr/local/include export C_INCLUDE_PATH=/usr/local/include

View File

@ -13,11 +13,17 @@ jobs:
- uses: actions/setup-go@v5 - uses: actions/setup-go@v5
with: with:
go-version: 'stable' go-version: 'stable'
- name: Dependencies
run: |
sudo apt-get update
sudo apt-get install protobuf-compiler
- run: | - run: |
go install github.com/swaggo/swag/cmd/swag@latest go install github.com/swaggo/swag/cmd/swag@latest
go install google.golang.org/grpc/cmd/protoc-gen-go-grpc@1958fcbe2ca8bd93af633f11e97d44e567e945af
go install google.golang.org/protobuf/cmd/protoc-gen-go@v1.34.2
- name: Bump swagger 🔧 - name: Bump swagger 🔧
run: | run: |
make swagger make protogen-go swagger
- name: Create Pull Request - name: Create Pull Request
uses: peter-evans/create-pull-request@v6 uses: peter-evans/create-pull-request@v6
with: with:

View File

@ -8,7 +8,7 @@ FROM ${BASE_IMAGE} AS requirements-core
USER root USER root
ARG GO_VERSION=1.22.4 ARG GO_VERSION=1.22.5
ARG TARGETARCH ARG TARGETARCH
ARG TARGETVARIANT ARG TARGETVARIANT
@ -108,11 +108,11 @@ RUN <<EOT bash
if [ "${BUILD_TYPE}" = "vulkan" ]; then if [ "${BUILD_TYPE}" = "vulkan" ]; then
apt-get update && \ apt-get update && \
apt-get install -y --no-install-recommends \ apt-get install -y --no-install-recommends \
software-properties-common pciutils wget gpg-agent && \ software-properties-common pciutils wget gpg-agent && \
wget -qO - https://packages.lunarg.com/lunarg-signing-key-pub.asc | apt-key add - && \ wget -qO - https://packages.lunarg.com/lunarg-signing-key-pub.asc | apt-key add - && \
wget -qO /etc/apt/sources.list.d/lunarg-vulkan-jammy.list https://packages.lunarg.com/vulkan/lunarg-vulkan-jammy.list && \ wget -qO /etc/apt/sources.list.d/lunarg-vulkan-jammy.list https://packages.lunarg.com/vulkan/lunarg-vulkan-jammy.list && \
apt-get update && \ apt-get update && \
apt-get install -y \ apt-get install -y \
vulkan-sdk && \ vulkan-sdk && \
apt-get clean && \ apt-get clean && \
rm -rf /var/lib/apt/lists/* rm -rf /var/lib/apt/lists/*
@ -124,33 +124,13 @@ RUN <<EOT bash
if [ "${BUILD_TYPE}" = "cublas" ]; then if [ "${BUILD_TYPE}" = "cublas" ]; then
apt-get update && \ apt-get update && \
apt-get install -y --no-install-recommends \ apt-get install -y --no-install-recommends \
software-properties-common pciutils software-properties-common pciutils
if [ "amd64" = "$TARGETARCH" ]; then if [ "amd64" = "$TARGETARCH" ]; then
curl -O https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2204/x86_64/cuda-keyring_1.1-1_all.deb curl -O https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2204/x86_64/cuda-keyring_1.1-1_all.deb
fi fi
if [ "arm64" = "$TARGETARCH" ]; then if [ "arm64" = "$TARGETARCH" ]; then
curl -O https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2204/arm64/cuda-keyring_1.1-1_all.deb curl -O https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2204/arm64/cuda-keyring_1.1-1_all.deb
fi fi
dpkg -i cuda-keyring_1.1-1_all.deb && \
rm -f cuda-keyring_1.1-1_all.deb && \
apt-get update && \
apt-get install -y --no-install-recommends \
cuda-nvcc-${CUDA_MAJOR_VERSION}-${CUDA_MINOR_VERSION} \
libcufft-dev-${CUDA_MAJOR_VERSION}-${CUDA_MINOR_VERSION} \
libcurand-dev-${CUDA_MAJOR_VERSION}-${CUDA_MINOR_VERSION} \
libcublas-dev-${CUDA_MAJOR_VERSION}-${CUDA_MINOR_VERSION} \
libcusparse-dev-${CUDA_MAJOR_VERSION}-${CUDA_MINOR_VERSION} \
libcusolver-dev-${CUDA_MAJOR_VERSION}-${CUDA_MINOR_VERSION} && \
apt-get clean && \
rm -rf /var/lib/apt/lists/*
fi
EOT
RUN if [ "${BUILD_TYPE}" = "cublas" ]; then \
apt-get update && \
apt-get install -y --no-install-recommends \
software-properties-common pciutils && \
curl -O https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2204/x86_64/cuda-keyring_1.1-1_all.deb && \
dpkg -i cuda-keyring_1.1-1_all.deb && \ dpkg -i cuda-keyring_1.1-1_all.deb && \
rm -f cuda-keyring_1.1-1_all.deb && \ rm -f cuda-keyring_1.1-1_all.deb && \
apt-get update && \ apt-get update && \
@ -162,8 +142,9 @@ RUN if [ "${BUILD_TYPE}" = "cublas" ]; then \
libcusparse-dev-${CUDA_MAJOR_VERSION}-${CUDA_MINOR_VERSION} \ libcusparse-dev-${CUDA_MAJOR_VERSION}-${CUDA_MINOR_VERSION} \
libcusolver-dev-${CUDA_MAJOR_VERSION}-${CUDA_MINOR_VERSION} && \ libcusolver-dev-${CUDA_MAJOR_VERSION}-${CUDA_MINOR_VERSION} && \
apt-get clean && \ apt-get clean && \
rm -rf /var/lib/apt/lists/* \ rm -rf /var/lib/apt/lists/*
; fi fi
EOT
# If we are building with clblas support, we need the libraries for the builds # If we are building with clblas support, we need the libraries for the builds
RUN if [ "${BUILD_TYPE}" = "clblas" ]; then \ RUN if [ "${BUILD_TYPE}" = "clblas" ]; then \
@ -206,7 +187,7 @@ FROM ${GRPC_BASE_IMAGE} AS grpc
# This is a bit of a hack, but it's required in order to be able to effectively cache this layer in CI # This is a bit of a hack, but it's required in order to be able to effectively cache this layer in CI
ARG GRPC_MAKEFLAGS="-j4 -Otarget" ARG GRPC_MAKEFLAGS="-j4 -Otarget"
ARG GRPC_VERSION=v1.64.2 ARG GRPC_VERSION=v1.65.0
ENV MAKEFLAGS=${GRPC_MAKEFLAGS} ENV MAKEFLAGS=${GRPC_MAKEFLAGS}

View File

@ -8,7 +8,7 @@ DETECT_LIBS?=true
# llama.cpp versions # llama.cpp versions
GOLLAMA_REPO?=https://github.com/go-skynet/go-llama.cpp GOLLAMA_REPO?=https://github.com/go-skynet/go-llama.cpp
GOLLAMA_VERSION?=2b57a8ae43e4699d3dc5d1496a1ccd42922993be GOLLAMA_VERSION?=2b57a8ae43e4699d3dc5d1496a1ccd42922993be
CPPLLAMA_VERSION?=368645698ab648e390dcd7c00a2bf60efa654f57 CPPLLAMA_VERSION?=b3283448ce9a5098226afe1d8648ccc578511fe4
# gpt4all version # gpt4all version
GPT4ALL_REPO?=https://github.com/nomic-ai/gpt4all GPT4ALL_REPO?=https://github.com/nomic-ai/gpt4all
@ -20,7 +20,7 @@ RWKV_VERSION?=661e7ae26d442f5cfebd2a0881b44e8c55949ec6
# whisper.cpp version # whisper.cpp version
WHISPER_REPO?=https://github.com/ggerganov/whisper.cpp WHISPER_REPO?=https://github.com/ggerganov/whisper.cpp
WHISPER_CPP_VERSION?=d207c6882247984689091ae9d780d2e51eab1df7 WHISPER_CPP_VERSION?=f68298ce06ca3edd6e6f3f21c3d0bb5f073942c3
# bert.cpp version # bert.cpp version
BERT_REPO?=https://github.com/go-skynet/go-bert.cpp BERT_REPO?=https://github.com/go-skynet/go-bert.cpp
@ -214,7 +214,7 @@ sources/go-bert.cpp:
git remote add origin $(BERT_REPO) && \ git remote add origin $(BERT_REPO) && \
git fetch origin && \ git fetch origin && \
git checkout $(BERT_VERSION) && \ git checkout $(BERT_VERSION) && \
git submodule update --init --recursive --depth 1 git submodule update --init --recursive --depth 1 --single-branch
sources/go-bert.cpp/libgobert.a: sources/go-bert.cpp sources/go-bert.cpp/libgobert.a: sources/go-bert.cpp
$(MAKE) -C sources/go-bert.cpp libgobert.a $(MAKE) -C sources/go-bert.cpp libgobert.a
@ -227,7 +227,7 @@ sources/go-llama.cpp:
git remote add origin $(GOLLAMA_REPO) && \ git remote add origin $(GOLLAMA_REPO) && \
git fetch origin && \ git fetch origin && \
git checkout $(GOLLAMA_VERSION) && \ git checkout $(GOLLAMA_VERSION) && \
git submodule update --init --recursive --depth 1 git submodule update --init --recursive --depth 1 --single-branch
sources/go-llama.cpp/libbinding.a: sources/go-llama.cpp sources/go-llama.cpp/libbinding.a: sources/go-llama.cpp
$(MAKE) -C sources/go-llama.cpp BUILD_TYPE=$(STABLE_BUILD_TYPE) libbinding.a $(MAKE) -C sources/go-llama.cpp BUILD_TYPE=$(STABLE_BUILD_TYPE) libbinding.a
@ -240,7 +240,7 @@ sources/go-piper:
git remote add origin $(PIPER_REPO) && \ git remote add origin $(PIPER_REPO) && \
git fetch origin && \ git fetch origin && \
git checkout $(PIPER_VERSION) && \ git checkout $(PIPER_VERSION) && \
git submodule update --init --recursive --depth 1 git submodule update --init --recursive --depth 1 --single-branch
sources/go-piper/libpiper_binding.a: sources/go-piper sources/go-piper/libpiper_binding.a: sources/go-piper
$(MAKE) -C sources/go-piper libpiper_binding.a example/main piper.o $(MAKE) -C sources/go-piper libpiper_binding.a example/main piper.o
@ -253,7 +253,7 @@ sources/gpt4all:
git remote add origin $(GPT4ALL_REPO) && \ git remote add origin $(GPT4ALL_REPO) && \
git fetch origin && \ git fetch origin && \
git checkout $(GPT4ALL_VERSION) && \ git checkout $(GPT4ALL_VERSION) && \
git submodule update --init --recursive --depth 1 git submodule update --init --recursive --depth 1 --single-branch
sources/gpt4all/gpt4all-bindings/golang/libgpt4all.a: sources/gpt4all sources/gpt4all/gpt4all-bindings/golang/libgpt4all.a: sources/gpt4all
$(MAKE) -C sources/gpt4all/gpt4all-bindings/golang/ libgpt4all.a $(MAKE) -C sources/gpt4all/gpt4all-bindings/golang/ libgpt4all.a
@ -266,7 +266,7 @@ sources/go-rwkv.cpp:
git remote add origin $(RWKV_REPO) && \ git remote add origin $(RWKV_REPO) && \
git fetch origin && \ git fetch origin && \
git checkout $(RWKV_VERSION) && \ git checkout $(RWKV_VERSION) && \
git submodule update --init --recursive --depth 1 git submodule update --init --recursive --depth 1 --single-branch
sources/go-rwkv.cpp/librwkv.a: sources/go-rwkv.cpp sources/go-rwkv.cpp/librwkv.a: sources/go-rwkv.cpp
cd sources/go-rwkv.cpp && cd rwkv.cpp && cmake . -DRWKV_BUILD_SHARED_LIBRARY=OFF && cmake --build . && cp librwkv.a .. cd sources/go-rwkv.cpp && cd rwkv.cpp && cmake . -DRWKV_BUILD_SHARED_LIBRARY=OFF && cmake --build . && cp librwkv.a ..
@ -279,7 +279,7 @@ sources/go-stable-diffusion:
git remote add origin $(STABLEDIFFUSION_REPO) && \ git remote add origin $(STABLEDIFFUSION_REPO) && \
git fetch origin && \ git fetch origin && \
git checkout $(STABLEDIFFUSION_VERSION) && \ git checkout $(STABLEDIFFUSION_VERSION) && \
git submodule update --init --recursive --depth 1 git submodule update --init --recursive --depth 1 --single-branch
sources/go-stable-diffusion/libstablediffusion.a: sources/go-stable-diffusion sources/go-stable-diffusion/libstablediffusion.a: sources/go-stable-diffusion
CPATH="$(CPATH):/usr/include/opencv4" $(MAKE) -C sources/go-stable-diffusion libstablediffusion.a CPATH="$(CPATH):/usr/include/opencv4" $(MAKE) -C sources/go-stable-diffusion libstablediffusion.a
@ -292,7 +292,7 @@ sources/go-tiny-dream:
git remote add origin $(TINYDREAM_REPO) && \ git remote add origin $(TINYDREAM_REPO) && \
git fetch origin && \ git fetch origin && \
git checkout $(TINYDREAM_VERSION) && \ git checkout $(TINYDREAM_VERSION) && \
git submodule update --init --recursive --depth 1 git submodule update --init --recursive --depth 1 --single-branch
sources/go-tiny-dream/libtinydream.a: sources/go-tiny-dream sources/go-tiny-dream/libtinydream.a: sources/go-tiny-dream
$(MAKE) -C sources/go-tiny-dream libtinydream.a $(MAKE) -C sources/go-tiny-dream libtinydream.a
@ -305,12 +305,12 @@ sources/whisper.cpp:
git remote add origin $(WHISPER_REPO) && \ git remote add origin $(WHISPER_REPO) && \
git fetch origin && \ git fetch origin && \
git checkout $(WHISPER_CPP_VERSION) && \ git checkout $(WHISPER_CPP_VERSION) && \
git submodule update --init --recursive --depth 1 git submodule update --init --recursive --depth 1 --single-branch
sources/whisper.cpp/libwhisper.a: sources/whisper.cpp sources/whisper.cpp/libwhisper.a: sources/whisper.cpp
cd sources/whisper.cpp && $(MAKE) libwhisper.a libggml.a cd sources/whisper.cpp && $(MAKE) libwhisper.a libggml.a
get-sources: sources/go-llama.cpp sources/gpt4all sources/go-piper sources/go-rwkv.cpp sources/whisper.cpp sources/go-bert.cpp sources/go-stable-diffusion sources/go-tiny-dream get-sources: sources/go-llama.cpp sources/gpt4all sources/go-piper sources/go-rwkv.cpp sources/whisper.cpp sources/go-bert.cpp sources/go-stable-diffusion sources/go-tiny-dream backend/cpp/llama/llama.cpp
replace: replace:
$(GOCMD) mod edit -replace github.com/donomii/go-rwkv.cpp=$(CURDIR)/sources/go-rwkv.cpp $(GOCMD) mod edit -replace github.com/donomii/go-rwkv.cpp=$(CURDIR)/sources/go-rwkv.cpp
@ -384,7 +384,7 @@ endif
CGO_LDFLAGS="$(CGO_LDFLAGS)" $(GOCMD) build -ldflags "$(LD_FLAGS)" -tags "$(GO_TAGS)" -o $(BINARY_NAME) ./ CGO_LDFLAGS="$(CGO_LDFLAGS)" $(GOCMD) build -ldflags "$(LD_FLAGS)" -tags "$(GO_TAGS)" -o $(BINARY_NAME) ./
build-minimal: build-minimal:
BUILD_GRPC_FOR_BACKEND_LLAMA=true GRPC_BACKENDS="backend-assets/grpc/llama-cpp-avx2" GO_TAGS=none $(MAKE) build BUILD_GRPC_FOR_BACKEND_LLAMA=true GRPC_BACKENDS="backend-assets/grpc/llama-cpp-avx2" GO_TAGS=p2p $(MAKE) build
build-api: build-api:
BUILD_GRPC_FOR_BACKEND_LLAMA=true BUILD_API_ONLY=true GO_TAGS=none $(MAKE) build BUILD_GRPC_FOR_BACKEND_LLAMA=true BUILD_API_ONLY=true GO_TAGS=none $(MAKE) build
@ -767,28 +767,28 @@ else
endif endif
# This target is for manually building a variant with-auto detected flags # This target is for manually building a variant with-auto detected flags
backend-assets/grpc/llama-cpp: backend-assets/grpc backend-assets/grpc/llama-cpp: backend-assets/grpc backend/cpp/llama/llama.cpp
cp -rf backend/cpp/llama backend/cpp/llama-cpp cp -rf backend/cpp/llama backend/cpp/llama-cpp
$(MAKE) -C backend/cpp/llama-cpp purge $(MAKE) -C backend/cpp/llama-cpp purge
$(info ${GREEN}I llama-cpp build info:avx2${RESET}) $(info ${GREEN}I llama-cpp build info:avx2${RESET})
$(MAKE) VARIANT="llama-cpp" build-llama-cpp-grpc-server $(MAKE) VARIANT="llama-cpp" build-llama-cpp-grpc-server
cp -rfv backend/cpp/llama-cpp/grpc-server backend-assets/grpc/llama-cpp cp -rfv backend/cpp/llama-cpp/grpc-server backend-assets/grpc/llama-cpp
backend-assets/grpc/llama-cpp-avx2: backend-assets/grpc backend-assets/grpc/llama-cpp-avx2: backend-assets/grpc backend/cpp/llama/llama.cpp
cp -rf backend/cpp/llama backend/cpp/llama-avx2 cp -rf backend/cpp/llama backend/cpp/llama-avx2
$(MAKE) -C backend/cpp/llama-avx2 purge $(MAKE) -C backend/cpp/llama-avx2 purge
$(info ${GREEN}I llama-cpp build info:avx2${RESET}) $(info ${GREEN}I llama-cpp build info:avx2${RESET})
CMAKE_ARGS="$(CMAKE_ARGS) -DGGML_AVX=on -DGGML_AVX2=on -DGGML_AVX512=off -DGGML_FMA=on -DGGML_F16C=on" $(MAKE) VARIANT="llama-avx2" build-llama-cpp-grpc-server CMAKE_ARGS="$(CMAKE_ARGS) -DGGML_AVX=on -DGGML_AVX2=on -DGGML_AVX512=off -DGGML_FMA=on -DGGML_F16C=on" $(MAKE) VARIANT="llama-avx2" build-llama-cpp-grpc-server
cp -rfv backend/cpp/llama-avx2/grpc-server backend-assets/grpc/llama-cpp-avx2 cp -rfv backend/cpp/llama-avx2/grpc-server backend-assets/grpc/llama-cpp-avx2
backend-assets/grpc/llama-cpp-avx: backend-assets/grpc backend-assets/grpc/llama-cpp-avx: backend-assets/grpc backend/cpp/llama/llama.cpp
cp -rf backend/cpp/llama backend/cpp/llama-avx cp -rf backend/cpp/llama backend/cpp/llama-avx
$(MAKE) -C backend/cpp/llama-avx purge $(MAKE) -C backend/cpp/llama-avx purge
$(info ${GREEN}I llama-cpp build info:avx${RESET}) $(info ${GREEN}I llama-cpp build info:avx${RESET})
CMAKE_ARGS="$(CMAKE_ARGS) -DGGML_AVX=on -DGGML_AVX2=off -DGGML_AVX512=off -DGGML_FMA=off -DGGML_F16C=off" $(MAKE) VARIANT="llama-avx" build-llama-cpp-grpc-server CMAKE_ARGS="$(CMAKE_ARGS) -DGGML_AVX=on -DGGML_AVX2=off -DGGML_AVX512=off -DGGML_FMA=off -DGGML_F16C=off" $(MAKE) VARIANT="llama-avx" build-llama-cpp-grpc-server
cp -rfv backend/cpp/llama-avx/grpc-server backend-assets/grpc/llama-cpp-avx cp -rfv backend/cpp/llama-avx/grpc-server backend-assets/grpc/llama-cpp-avx
backend-assets/grpc/llama-cpp-fallback: backend-assets/grpc backend-assets/grpc/llama-cpp-fallback: backend-assets/grpc backend/cpp/llama/llama.cpp
cp -rf backend/cpp/llama backend/cpp/llama-fallback cp -rf backend/cpp/llama backend/cpp/llama-fallback
$(MAKE) -C backend/cpp/llama-fallback purge $(MAKE) -C backend/cpp/llama-fallback purge
$(info ${GREEN}I llama-cpp build info:fallback${RESET}) $(info ${GREEN}I llama-cpp build info:fallback${RESET})
@ -799,35 +799,35 @@ ifeq ($(BUILD_TYPE),metal)
cp backend/cpp/llama-fallback/llama.cpp/build/bin/default.metallib backend-assets/grpc/ cp backend/cpp/llama-fallback/llama.cpp/build/bin/default.metallib backend-assets/grpc/
endif endif
backend-assets/grpc/llama-cpp-cuda: backend-assets/grpc backend-assets/grpc/llama-cpp-cuda: backend-assets/grpc backend/cpp/llama/llama.cpp
cp -rf backend/cpp/llama backend/cpp/llama-cuda cp -rf backend/cpp/llama backend/cpp/llama-cuda
$(MAKE) -C backend/cpp/llama-cuda purge $(MAKE) -C backend/cpp/llama-cuda purge
$(info ${GREEN}I llama-cpp build info:cuda${RESET}) $(info ${GREEN}I llama-cpp build info:cuda${RESET})
CMAKE_ARGS="$(CMAKE_ARGS) -DGGML_AVX=on -DGGML_AVX2=off -DGGML_AVX512=off -DGGML_FMA=off -DGGML_F16C=off -DGGML_CUDA=ON" $(MAKE) VARIANT="llama-cuda" build-llama-cpp-grpc-server CMAKE_ARGS="$(CMAKE_ARGS) -DGGML_AVX=on -DGGML_AVX2=off -DGGML_AVX512=off -DGGML_FMA=off -DGGML_F16C=off -DGGML_CUDA=ON" $(MAKE) VARIANT="llama-cuda" build-llama-cpp-grpc-server
cp -rfv backend/cpp/llama-cuda/grpc-server backend-assets/grpc/llama-cpp-cuda cp -rfv backend/cpp/llama-cuda/grpc-server backend-assets/grpc/llama-cpp-cuda
backend-assets/grpc/llama-cpp-hipblas: backend-assets/grpc backend-assets/grpc/llama-cpp-hipblas: backend-assets/grpc backend/cpp/llama/llama.cpp
cp -rf backend/cpp/llama backend/cpp/llama-hipblas cp -rf backend/cpp/llama backend/cpp/llama-hipblas
$(MAKE) -C backend/cpp/llama-hipblas purge $(MAKE) -C backend/cpp/llama-hipblas purge
$(info ${GREEN}I llama-cpp build info:hipblas${RESET}) $(info ${GREEN}I llama-cpp build info:hipblas${RESET})
BUILD_TYPE="hipblas" $(MAKE) VARIANT="llama-hipblas" build-llama-cpp-grpc-server BUILD_TYPE="hipblas" $(MAKE) VARIANT="llama-hipblas" build-llama-cpp-grpc-server
cp -rfv backend/cpp/llama-hipblas/grpc-server backend-assets/grpc/llama-cpp-hipblas cp -rfv backend/cpp/llama-hipblas/grpc-server backend-assets/grpc/llama-cpp-hipblas
backend-assets/grpc/llama-cpp-sycl_f16: backend-assets/grpc backend-assets/grpc/llama-cpp-sycl_f16: backend-assets/grpc backend/cpp/llama/llama.cpp
cp -rf backend/cpp/llama backend/cpp/llama-sycl_f16 cp -rf backend/cpp/llama backend/cpp/llama-sycl_f16
$(MAKE) -C backend/cpp/llama-sycl_f16 purge $(MAKE) -C backend/cpp/llama-sycl_f16 purge
$(info ${GREEN}I llama-cpp build info:sycl_f16${RESET}) $(info ${GREEN}I llama-cpp build info:sycl_f16${RESET})
BUILD_TYPE="sycl_f16" $(MAKE) VARIANT="llama-sycl_f16" build-llama-cpp-grpc-server BUILD_TYPE="sycl_f16" $(MAKE) VARIANT="llama-sycl_f16" build-llama-cpp-grpc-server
cp -rfv backend/cpp/llama-sycl_f16/grpc-server backend-assets/grpc/llama-cpp-sycl_f16 cp -rfv backend/cpp/llama-sycl_f16/grpc-server backend-assets/grpc/llama-cpp-sycl_f16
backend-assets/grpc/llama-cpp-sycl_f32: backend-assets/grpc backend-assets/grpc/llama-cpp-sycl_f32: backend-assets/grpc backend/cpp/llama/llama.cpp
cp -rf backend/cpp/llama backend/cpp/llama-sycl_f32 cp -rf backend/cpp/llama backend/cpp/llama-sycl_f32
$(MAKE) -C backend/cpp/llama-sycl_f32 purge $(MAKE) -C backend/cpp/llama-sycl_f32 purge
$(info ${GREEN}I llama-cpp build info:sycl_f32${RESET}) $(info ${GREEN}I llama-cpp build info:sycl_f32${RESET})
BUILD_TYPE="sycl_f32" $(MAKE) VARIANT="llama-sycl_f32" build-llama-cpp-grpc-server BUILD_TYPE="sycl_f32" $(MAKE) VARIANT="llama-sycl_f32" build-llama-cpp-grpc-server
cp -rfv backend/cpp/llama-sycl_f32/grpc-server backend-assets/grpc/llama-cpp-sycl_f32 cp -rfv backend/cpp/llama-sycl_f32/grpc-server backend-assets/grpc/llama-cpp-sycl_f32
backend-assets/grpc/llama-cpp-grpc: backend-assets/grpc backend-assets/grpc/llama-cpp-grpc: backend-assets/grpc backend/cpp/llama/llama.cpp
cp -rf backend/cpp/llama backend/cpp/llama-grpc cp -rf backend/cpp/llama backend/cpp/llama-grpc
$(MAKE) -C backend/cpp/llama-grpc purge $(MAKE) -C backend/cpp/llama-grpc purge
$(info ${GREEN}I llama-cpp build info:grpc${RESET}) $(info ${GREEN}I llama-cpp build info:grpc${RESET})
@ -905,7 +905,7 @@ docker-aio-all:
docker-image-intel: docker-image-intel:
docker build \ docker build \
--build-arg BASE_IMAGE=intel/oneapi-basekit:2024.1.0-devel-ubuntu22.04 \ --build-arg BASE_IMAGE=intel/oneapi-basekit:2024.2.0-devel-ubuntu22.04 \
--build-arg IMAGE_TYPE=$(IMAGE_TYPE) \ --build-arg IMAGE_TYPE=$(IMAGE_TYPE) \
--build-arg GO_TAGS="none" \ --build-arg GO_TAGS="none" \
--build-arg MAKEFLAGS="$(DOCKER_MAKEFLAGS)" \ --build-arg MAKEFLAGS="$(DOCKER_MAKEFLAGS)" \
@ -913,7 +913,7 @@ docker-image-intel:
docker-image-intel-xpu: docker-image-intel-xpu:
docker build \ docker build \
--build-arg BASE_IMAGE=intel/oneapi-basekit:2024.1.0-devel-ubuntu22.04 \ --build-arg BASE_IMAGE=intel/oneapi-basekit:2024.2.0-devel-ubuntu22.04 \
--build-arg IMAGE_TYPE=$(IMAGE_TYPE) \ --build-arg IMAGE_TYPE=$(IMAGE_TYPE) \
--build-arg GO_TAGS="none" \ --build-arg GO_TAGS="none" \
--build-arg MAKEFLAGS="$(DOCKER_MAKEFLAGS)" \ --build-arg MAKEFLAGS="$(DOCKER_MAKEFLAGS)" \

View File

@ -72,14 +72,15 @@ docker run -ti --name local-ai -p 8080:8080 localai/localai:latest-aio-cpu
[Roadmap](https://github.com/mudler/LocalAI/issues?q=is%3Aissue+is%3Aopen+label%3Aroadmap) [Roadmap](https://github.com/mudler/LocalAI/issues?q=is%3Aissue+is%3Aopen+label%3Aroadmap)
- 🔥🔥 🆕 P2P Dashboard, LocalAI Federated mode and AI Swarms: https://github.com/mudler/LocalAI/pull/2723 - July 2024: 🔥🔥 🆕 P2P Dashboard, LocalAI Federated mode and AI Swarms: https://github.com/mudler/LocalAI/pull/2723
- 🆕 You can browse now the model gallery without LocalAI! Check out https://models.localai.io - June 2024: 🆕 You can browse now the model gallery without LocalAI! Check out https://models.localai.io
- 🔥🔥 Decentralized llama.cpp: https://github.com/mudler/LocalAI/pull/2343 (peer2peer llama.cpp!) 👉 Docs https://localai.io/features/distribute/ - June 2024: Support for models from OCI registries: https://github.com/mudler/LocalAI/pull/2628
- 🔥🔥 Openvoice: https://github.com/mudler/LocalAI/pull/2334 - May 2024: 🔥🔥 Decentralized P2P llama.cpp: https://github.com/mudler/LocalAI/pull/2343 (peer2peer llama.cpp!) 👉 Docs https://localai.io/features/distribute/
- 🆕 Function calls without grammars and mixed mode: https://github.com/mudler/LocalAI/pull/2328 - May 2024: 🔥🔥 Openvoice: https://github.com/mudler/LocalAI/pull/2334
- 🔥🔥 Distributed inferencing: https://github.com/mudler/LocalAI/pull/2324 - May 2024: 🆕 Function calls without grammars and mixed mode: https://github.com/mudler/LocalAI/pull/2328
- Chat, TTS, and Image generation in the WebUI: https://github.com/mudler/LocalAI/pull/2222 - May 2024: 🔥🔥 Distributed inferencing: https://github.com/mudler/LocalAI/pull/2324
- Reranker API: https://github.com/mudler/LocalAI/pull/2121 - May 2024: Chat, TTS, and Image generation in the WebUI: https://github.com/mudler/LocalAI/pull/2222
- April 2024: Reranker API: https://github.com/mudler/LocalAI/pull/2121
Hot topics (looking for contributors): Hot topics (looking for contributors):
@ -89,6 +90,7 @@ Hot topics (looking for contributors):
- Assistant API: https://github.com/mudler/LocalAI/issues/1273 - Assistant API: https://github.com/mudler/LocalAI/issues/1273
- Moderation endpoint: https://github.com/mudler/LocalAI/issues/999 - Moderation endpoint: https://github.com/mudler/LocalAI/issues/999
- Vulkan: https://github.com/mudler/LocalAI/issues/1647 - Vulkan: https://github.com/mudler/LocalAI/issues/1647
- Anthropic API: https://github.com/mudler/LocalAI/issues/1808
If you want to help and contribute, issues up for grabs: https://github.com/mudler/LocalAI/issues?q=is%3Aissue+is%3Aopen+label%3A%22up+for+grabs%22 If you want to help and contribute, issues up for grabs: https://github.com/mudler/LocalAI/issues?q=is%3Aissue+is%3Aopen+label%3A%22up+for+grabs%22
@ -134,6 +136,7 @@ Other:
- Slack bot https://github.com/mudler/LocalAGI/tree/main/examples/slack - Slack bot https://github.com/mudler/LocalAGI/tree/main/examples/slack
- Shell-Pilot(Interact with LLM using LocalAI models via pure shell scripts on your Linux or MacOS system) https://github.com/reid41/shell-pilot - Shell-Pilot(Interact with LLM using LocalAI models via pure shell scripts on your Linux or MacOS system) https://github.com/reid41/shell-pilot
- Telegram bot https://github.com/mudler/LocalAI/tree/master/examples/telegram-bot - Telegram bot https://github.com/mudler/LocalAI/tree/master/examples/telegram-bot
- Github Actions: https://github.com/marketplace/actions/start-localai
- Examples: https://github.com/mudler/LocalAI/tree/master/examples/ - Examples: https://github.com/mudler/LocalAI/tree/master/examples/

View File

@ -52,7 +52,7 @@ $(GRPC_REPO):
git remote add origin $(GIT_REPO_LIB_GRPC) && \ git remote add origin $(GIT_REPO_LIB_GRPC) && \
git fetch origin && \ git fetch origin && \
git checkout $(TAG_LIB_GRPC) && \ git checkout $(TAG_LIB_GRPC) && \
git submodule update --init --recursive --depth 1 git submodule update --init --recursive --depth 1 --single-branch
$(GRPC_BUILD): $(GRPC_REPO) $(GRPC_BUILD): $(GRPC_REPO)
mkdir -p $(GRPC_BUILD) mkdir -p $(GRPC_BUILD)

View File

@ -52,7 +52,7 @@ llama.cpp:
git remote add origin $(LLAMA_REPO) && \ git remote add origin $(LLAMA_REPO) && \
git fetch origin && \ git fetch origin && \
git checkout -b build $(LLAMA_VERSION) && \ git checkout -b build $(LLAMA_VERSION) && \
git submodule update --init --recursive --depth 1 git submodule update --init --recursive --depth 1 --single-branch
llama.cpp/examples/grpc-server: llama.cpp llama.cpp/examples/grpc-server: llama.cpp
mkdir -p llama.cpp/examples/grpc-server mkdir -p llama.cpp/examples/grpc-server

View File

@ -2108,6 +2108,7 @@ json parse_options(bool streaming, const backend::PredictOptions* predict, llama
data["grammar"] = predict->grammar(); data["grammar"] = predict->grammar();
data["prompt"] = predict->prompt(); data["prompt"] = predict->prompt();
data["ignore_eos"] = predict->ignoreeos(); data["ignore_eos"] = predict->ignoreeos();
data["embeddings"] = predict->embeddings();
// for each image in the request, add the image data // for each image in the request, add the image data
// //
@ -2385,6 +2386,31 @@ public:
return grpc::Status::OK; return grpc::Status::OK;
} }
/// https://github.com/ggerganov/llama.cpp/blob/aa2341298924ac89778252015efcb792f2df1e20/examples/server/server.cpp#L2969
grpc::Status Embedding(ServerContext* context, const backend::PredictOptions* request, backend::EmbeddingResult* embeddingResult) {
json data = parse_options(false, request, llama);
const int task_id = llama.queue_tasks.get_new_id();
llama.queue_results.add_waiting_task_id(task_id);
llama.request_completion(task_id, { {"prompt", data["embeddings"]}, { "n_predict", 0}, {"image_data", ""} }, false, true, -1);
// get the result
task_result result = llama.queue_results.recv(task_id);
//std::cout << "Embedding result JSON" << result.result_json.dump() << std::endl;
llama.queue_results.remove_waiting_task_id(task_id);
if (!result.error && result.stop) {
std::vector<float> embeddings = result.result_json.value("embedding", std::vector<float>());
// loop the vector and set the embeddings results
for (int i = 0; i < embeddings.size(); i++) {
embeddingResult->add_embeddings(embeddings[i]);
}
}
else
{
return grpc::Status::OK;
}
return grpc::Status::OK;
}
}; };
void RunServer(const std::string& server_address) { void RunServer(const std::string& server_address) {

View File

@ -6,9 +6,9 @@ import (
"fmt" "fmt"
"path/filepath" "path/filepath"
pb "github.com/go-skynet/LocalAI/pkg/grpc/proto"
"github.com/go-skynet/go-llama.cpp" "github.com/go-skynet/go-llama.cpp"
"github.com/mudler/LocalAI/pkg/grpc/base" "github.com/mudler/LocalAI/pkg/grpc/base"
pb "github.com/mudler/LocalAI/pkg/grpc/proto"
) )
type LLM struct { type LLM struct {

View File

@ -2,4 +2,4 @@
intel-extension-for-pytorch intel-extension-for-pytorch
torch torch
optimum[openvino] optimum[openvino]
setuptools==69.5.1 # https://github.com/mudler/LocalAI/issues/2406 setuptools==70.3.0 # https://github.com/mudler/LocalAI/issues/2406

View File

@ -1,6 +1,6 @@
accelerate accelerate
auto-gptq==0.7.1 auto-gptq==0.7.1
grpcio==1.64.0 grpcio==1.65.0
protobuf protobuf
torch torch
certifi certifi

View File

@ -3,4 +3,4 @@ intel-extension-for-pytorch
torch torch
torchaudio torchaudio
optimum[openvino] optimum[openvino]
setuptools==69.5.1 # https://github.com/mudler/LocalAI/issues/2406 setuptools==70.3.0 # https://github.com/mudler/LocalAI/issues/2406

View File

@ -1,6 +1,6 @@
accelerate accelerate
bark==0.1.5 bark==0.1.5
grpcio==1.64.0 grpcio==1.65.0
protobuf protobuf
certifi certifi
transformers transformers

View File

@ -1,2 +1,2 @@
grpcio==1.64.0 grpcio==1.65.0
protobuf protobuf

View File

@ -3,4 +3,4 @@ intel-extension-for-pytorch
torch torch
torchaudio torchaudio
optimum[openvino] optimum[openvino]
setuptools==69.5.1 # https://github.com/mudler/LocalAI/issues/2406 setuptools==70.3.0 # https://github.com/mudler/LocalAI/issues/2406

View File

@ -1,6 +1,6 @@
accelerate accelerate
TTS==0.22.0 TTS==0.22.0
grpcio==1.64.0 grpcio==1.65.0
protobuf protobuf
certifi certifi
transformers transformers

View File

@ -1,6 +1,6 @@
#!/usr/bin/env python3 #!/usr/bin/env python3
from concurrent import futures from concurrent import futures
import traceback
import argparse import argparse
from collections import defaultdict from collections import defaultdict
from enum import Enum from enum import Enum
@ -17,35 +17,39 @@ import backend_pb2_grpc
import grpc import grpc
from diffusers import StableDiffusion3Pipeline, StableDiffusionXLPipeline, StableDiffusionDepth2ImgPipeline, DPMSolverMultistepScheduler, StableDiffusionPipeline, DiffusionPipeline, EulerAncestralDiscreteScheduler from diffusers import StableDiffusion3Pipeline, StableDiffusionXLPipeline, StableDiffusionDepth2ImgPipeline, DPMSolverMultistepScheduler, StableDiffusionPipeline, DiffusionPipeline, \
EulerAncestralDiscreteScheduler
from diffusers import StableDiffusionImg2ImgPipeline, AutoPipelineForText2Image, ControlNetModel, StableVideoDiffusionPipeline from diffusers import StableDiffusionImg2ImgPipeline, AutoPipelineForText2Image, ControlNetModel, StableVideoDiffusionPipeline
from diffusers.pipelines.stable_diffusion import safety_checker from diffusers.pipelines.stable_diffusion import safety_checker
from diffusers.utils import load_image,export_to_video from diffusers.utils import load_image, export_to_video
from compel import Compel, ReturnedEmbeddingsType from compel import Compel, ReturnedEmbeddingsType
from transformers import CLIPTextModel from transformers import CLIPTextModel
from safetensors.torch import load_file from safetensors.torch import load_file
_ONE_DAY_IN_SECONDS = 60 * 60 * 24 _ONE_DAY_IN_SECONDS = 60 * 60 * 24
COMPEL=os.environ.get("COMPEL", "0") == "1" COMPEL = os.environ.get("COMPEL", "0") == "1"
XPU=os.environ.get("XPU", "0") == "1" XPU = os.environ.get("XPU", "0") == "1"
CLIPSKIP=os.environ.get("CLIPSKIP", "1") == "1" CLIPSKIP = os.environ.get("CLIPSKIP", "1") == "1"
SAFETENSORS=os.environ.get("SAFETENSORS", "1") == "1" SAFETENSORS = os.environ.get("SAFETENSORS", "1") == "1"
CHUNK_SIZE=os.environ.get("CHUNK_SIZE", "8") CHUNK_SIZE = os.environ.get("CHUNK_SIZE", "8")
FPS=os.environ.get("FPS", "7") FPS = os.environ.get("FPS", "7")
DISABLE_CPU_OFFLOAD=os.environ.get("DISABLE_CPU_OFFLOAD", "0") == "1" DISABLE_CPU_OFFLOAD = os.environ.get("DISABLE_CPU_OFFLOAD", "0") == "1"
FRAMES=os.environ.get("FRAMES", "64") FRAMES = os.environ.get("FRAMES", "64")
if XPU: if XPU:
import intel_extension_for_pytorch as ipex import intel_extension_for_pytorch as ipex
print(ipex.xpu.get_device_name(0)) print(ipex.xpu.get_device_name(0))
# If MAX_WORKERS are specified in the environment use it, otherwise default to 1 # If MAX_WORKERS are specified in the environment use it, otherwise default to 1
MAX_WORKERS = int(os.environ.get('PYTHON_GRPC_MAX_WORKERS', '1')) MAX_WORKERS = int(os.environ.get('PYTHON_GRPC_MAX_WORKERS', '1'))
# https://github.com/CompVis/stable-diffusion/issues/239#issuecomment-1627615287 # https://github.com/CompVis/stable-diffusion/issues/239#issuecomment-1627615287
def sc(self, clip_input, images) : return images, [False for i in images] def sc(self, clip_input, images): return images, [False for i in images]
# edit the StableDiffusionSafetyChecker class so that, when called, it just returns the images and an array of True values # edit the StableDiffusionSafetyChecker class so that, when called, it just returns the images and an array of True values
safety_checker.StableDiffusionSafetyChecker.forward = sc safety_checker.StableDiffusionSafetyChecker.forward = sc
@ -62,6 +66,8 @@ from diffusers.schedulers import (
PNDMScheduler, PNDMScheduler,
UniPCMultistepScheduler, UniPCMultistepScheduler,
) )
# The scheduler list mapping was taken from here: https://github.com/neggles/animatediff-cli/blob/6f336f5f4b5e38e85d7f06f1744ef42d0a45f2a7/src/animatediff/schedulers.py#L39 # The scheduler list mapping was taken from here: https://github.com/neggles/animatediff-cli/blob/6f336f5f4b5e38e85d7f06f1744ef42d0a45f2a7/src/animatediff/schedulers.py#L39
# Credits to https://github.com/neggles # Credits to https://github.com/neggles
# See https://github.com/huggingface/diffusers/issues/4167 for more details on sched mapping from A1111 # See https://github.com/huggingface/diffusers/issues/4167 for more details on sched mapping from A1111
@ -136,10 +142,12 @@ def get_scheduler(name: str, config: dict = {}):
return sched_class.from_config(config) return sched_class.from_config(config)
# Implement the BackendServicer class with the service methods # Implement the BackendServicer class with the service methods
class BackendServicer(backend_pb2_grpc.BackendServicer): class BackendServicer(backend_pb2_grpc.BackendServicer):
def Health(self, request, context): def Health(self, request, context):
return backend_pb2.Reply(message=bytes("OK", 'utf-8')) return backend_pb2.Reply(message=bytes("OK", 'utf-8'))
def LoadModel(self, request, context): def LoadModel(self, request, context):
try: try:
print(f"Loading model {request.Model}...", file=sys.stderr) print(f"Loading model {request.Model}...", file=sys.stderr)
@ -149,7 +157,7 @@ class BackendServicer(backend_pb2_grpc.BackendServicer):
if request.F16Memory: if request.F16Memory:
torchType = torch.float16 torchType = torch.float16
variant="fp16" variant = "fp16"
local = False local = False
modelFile = request.Model modelFile = request.Model
@ -157,38 +165,38 @@ class BackendServicer(backend_pb2_grpc.BackendServicer):
self.cfg_scale = 7 self.cfg_scale = 7
if request.CFGScale != 0: if request.CFGScale != 0:
self.cfg_scale = request.CFGScale self.cfg_scale = request.CFGScale
clipmodel = "runwayml/stable-diffusion-v1-5" clipmodel = "runwayml/stable-diffusion-v1-5"
if request.CLIPModel != "": if request.CLIPModel != "":
clipmodel = request.CLIPModel clipmodel = request.CLIPModel
clipsubfolder = "text_encoder" clipsubfolder = "text_encoder"
if request.CLIPSubfolder != "": if request.CLIPSubfolder != "":
clipsubfolder = request.CLIPSubfolder clipsubfolder = request.CLIPSubfolder
# Check if ModelFile exists # Check if ModelFile exists
if request.ModelFile != "": if request.ModelFile != "":
if os.path.exists(request.ModelFile): if os.path.exists(request.ModelFile):
local = True local = True
modelFile = request.ModelFile modelFile = request.ModelFile
fromSingleFile = request.Model.startswith("http") or request.Model.startswith("/") or local fromSingleFile = request.Model.startswith("http") or request.Model.startswith("/") or local
self.img2vid=False self.img2vid = False
self.txt2vid=False self.txt2vid = False
## img2img ## img2img
if (request.PipelineType == "StableDiffusionImg2ImgPipeline") or (request.IMG2IMG and request.PipelineType == ""): if (request.PipelineType == "StableDiffusionImg2ImgPipeline") or (request.IMG2IMG and request.PipelineType == ""):
if fromSingleFile: if fromSingleFile:
self.pipe = StableDiffusionImg2ImgPipeline.from_single_file(modelFile, self.pipe = StableDiffusionImg2ImgPipeline.from_single_file(modelFile,
torch_dtype=torchType) torch_dtype=torchType)
else: else:
self.pipe = StableDiffusionImg2ImgPipeline.from_pretrained(request.Model, self.pipe = StableDiffusionImg2ImgPipeline.from_pretrained(request.Model,
torch_dtype=torchType) torch_dtype=torchType)
elif request.PipelineType == "StableDiffusionDepth2ImgPipeline": elif request.PipelineType == "StableDiffusionDepth2ImgPipeline":
self.pipe = StableDiffusionDepth2ImgPipeline.from_pretrained(request.Model, self.pipe = StableDiffusionDepth2ImgPipeline.from_pretrained(request.Model,
torch_dtype=torchType) torch_dtype=torchType)
## img2vid ## img2vid
elif request.PipelineType == "StableVideoDiffusionPipeline": elif request.PipelineType == "StableVideoDiffusionPipeline":
self.img2vid=True self.img2vid = True
self.pipe = StableVideoDiffusionPipeline.from_pretrained( self.pipe = StableVideoDiffusionPipeline.from_pretrained(
request.Model, torch_dtype=torchType, variant=variant request.Model, torch_dtype=torchType, variant=variant
) )
@ -197,64 +205,63 @@ class BackendServicer(backend_pb2_grpc.BackendServicer):
## text2img ## text2img
elif request.PipelineType == "AutoPipelineForText2Image" or request.PipelineType == "": elif request.PipelineType == "AutoPipelineForText2Image" or request.PipelineType == "":
self.pipe = AutoPipelineForText2Image.from_pretrained(request.Model, self.pipe = AutoPipelineForText2Image.from_pretrained(request.Model,
torch_dtype=torchType, torch_dtype=torchType,
use_safetensors=SAFETENSORS, use_safetensors=SAFETENSORS,
variant=variant) variant=variant)
elif request.PipelineType == "StableDiffusionPipeline": elif request.PipelineType == "StableDiffusionPipeline":
if fromSingleFile: if fromSingleFile:
self.pipe = StableDiffusionPipeline.from_single_file(modelFile, self.pipe = StableDiffusionPipeline.from_single_file(modelFile,
torch_dtype=torchType) torch_dtype=torchType)
else: else:
self.pipe = StableDiffusionPipeline.from_pretrained(request.Model, self.pipe = StableDiffusionPipeline.from_pretrained(request.Model,
torch_dtype=torchType) torch_dtype=torchType)
elif request.PipelineType == "DiffusionPipeline": elif request.PipelineType == "DiffusionPipeline":
self.pipe = DiffusionPipeline.from_pretrained(request.Model, self.pipe = DiffusionPipeline.from_pretrained(request.Model,
torch_dtype=torchType) torch_dtype=torchType)
elif request.PipelineType == "VideoDiffusionPipeline": elif request.PipelineType == "VideoDiffusionPipeline":
self.txt2vid=True self.txt2vid = True
self.pipe = DiffusionPipeline.from_pretrained(request.Model, self.pipe = DiffusionPipeline.from_pretrained(request.Model,
torch_dtype=torchType) torch_dtype=torchType)
elif request.PipelineType == "StableDiffusionXLPipeline": elif request.PipelineType == "StableDiffusionXLPipeline":
if fromSingleFile: if fromSingleFile:
self.pipe = StableDiffusionXLPipeline.from_single_file(modelFile, self.pipe = StableDiffusionXLPipeline.from_single_file(modelFile,
torch_dtype=torchType, torch_dtype=torchType,
use_safetensors=True) use_safetensors=True)
else: else:
self.pipe = StableDiffusionXLPipeline.from_pretrained( self.pipe = StableDiffusionXLPipeline.from_pretrained(
request.Model, request.Model,
torch_dtype=torchType, torch_dtype=torchType,
use_safetensors=True, use_safetensors=True,
variant=variant) variant=variant)
elif request.PipelineType == "StableDiffusion3Pipeline": elif request.PipelineType == "StableDiffusion3Pipeline":
if fromSingleFile: if fromSingleFile:
self.pipe = StableDiffusion3Pipeline.from_single_file(modelFile, self.pipe = StableDiffusion3Pipeline.from_single_file(modelFile,
torch_dtype=torchType, torch_dtype=torchType,
use_safetensors=True) use_safetensors=True)
else: else:
self.pipe = StableDiffusion3Pipeline.from_pretrained( self.pipe = StableDiffusion3Pipeline.from_pretrained(
request.Model, request.Model,
torch_dtype=torchType, torch_dtype=torchType,
use_safetensors=True, use_safetensors=True,
variant=variant) variant=variant)
if CLIPSKIP and request.CLIPSkip != 0: if CLIPSKIP and request.CLIPSkip != 0:
self.clip_skip = request.CLIPSkip self.clip_skip = request.CLIPSkip
else: else:
self.clip_skip = 0 self.clip_skip = 0
# torch_dtype needs to be customized. float16 for GPU, float32 for CPU # torch_dtype needs to be customized. float16 for GPU, float32 for CPU
# TODO: this needs to be customized # TODO: this needs to be customized
if request.SchedulerType != "": if request.SchedulerType != "":
self.pipe.scheduler = get_scheduler(request.SchedulerType, self.pipe.scheduler.config) self.pipe.scheduler = get_scheduler(request.SchedulerType, self.pipe.scheduler.config)
if COMPEL: if COMPEL:
self.compel = Compel( self.compel = Compel(
tokenizer=[self.pipe.tokenizer, self.pipe.tokenizer_2 ], tokenizer=[self.pipe.tokenizer, self.pipe.tokenizer_2],
text_encoder=[self.pipe.text_encoder, self.pipe.text_encoder_2], text_encoder=[self.pipe.text_encoder, self.pipe.text_encoder_2],
returned_embeddings_type=ReturnedEmbeddingsType.PENULTIMATE_HIDDEN_STATES_NON_NORMALIZED, returned_embeddings_type=ReturnedEmbeddingsType.PENULTIMATE_HIDDEN_STATES_NON_NORMALIZED,
requires_pooled=[False, True] requires_pooled=[False, True]
) )
if request.ControlNet: if request.ControlNet:
self.controlnet = ControlNetModel.from_pretrained( self.controlnet = ControlNetModel.from_pretrained(
@ -263,13 +270,6 @@ class BackendServicer(backend_pb2_grpc.BackendServicer):
self.pipe.controlnet = self.controlnet self.pipe.controlnet = self.controlnet
else: else:
self.controlnet = None self.controlnet = None
if request.CUDA:
self.pipe.to('cuda')
if self.controlnet:
self.controlnet.to('cuda')
if XPU:
self.pipe = self.pipe.to("xpu")
# Assume directory from request.ModelFile. # Assume directory from request.ModelFile.
# Only if request.LoraAdapter it's not an absolute path # Only if request.LoraAdapter it's not an absolute path
if request.LoraAdapter and request.ModelFile != "" and not os.path.isabs(request.LoraAdapter) and request.LoraAdapter: if request.LoraAdapter and request.ModelFile != "" and not os.path.isabs(request.LoraAdapter) and request.LoraAdapter:
@ -282,10 +282,17 @@ class BackendServicer(backend_pb2_grpc.BackendServicer):
if request.LoraAdapter: if request.LoraAdapter:
# Check if its a local file and not a directory ( we load lora differently for a safetensor file ) # Check if its a local file and not a directory ( we load lora differently for a safetensor file )
if os.path.exists(request.LoraAdapter) and not os.path.isdir(request.LoraAdapter): if os.path.exists(request.LoraAdapter) and not os.path.isdir(request.LoraAdapter):
self.load_lora_weights(request.LoraAdapter, 1, device, torchType) # self.load_lora_weights(request.LoraAdapter, 1, device, torchType)
self.pipe.load_lora_weights(request.LoraAdapter)
else: else:
self.pipe.unet.load_attn_procs(request.LoraAdapter) self.pipe.unet.load_attn_procs(request.LoraAdapter)
if request.CUDA:
self.pipe.to('cuda')
if self.controlnet:
self.controlnet.to('cuda')
if XPU:
self.pipe = self.pipe.to("xpu")
except Exception as err: except Exception as err:
return backend_pb2.Result(success=False, message=f"Unexpected {err=}, {type(err)=}") return backend_pb2.Result(success=False, message=f"Unexpected {err=}, {type(err)=}")
# Implement your logic here for the LoadModel service # Implement your logic here for the LoadModel service
@ -358,9 +365,9 @@ class BackendServicer(backend_pb2_grpc.BackendServicer):
# create a dictionary of values for the parameters # create a dictionary of values for the parameters
options = { options = {
"negative_prompt": request.negative_prompt, "negative_prompt": request.negative_prompt,
"width": request.width, "width": request.width,
"height": request.height, "height": request.height,
"num_inference_steps": steps, "num_inference_steps": steps,
} }
@ -372,7 +379,7 @@ class BackendServicer(backend_pb2_grpc.BackendServicer):
options["image"] = pose_image options["image"] = pose_image
if CLIPSKIP and self.clip_skip != 0: if CLIPSKIP and self.clip_skip != 0:
options["clip_skip"]=self.clip_skip options["clip_skip"] = self.clip_skip
# Get the keys that we will build the args for our pipe for # Get the keys that we will build the args for our pipe for
keys = options.keys() keys = options.keys()
@ -416,20 +423,21 @@ class BackendServicer(backend_pb2_grpc.BackendServicer):
image = self.pipe( image = self.pipe(
guidance_scale=self.cfg_scale, guidance_scale=self.cfg_scale,
**kwargs **kwargs
).images[0] ).images[0]
else: else:
# pass the kwargs dictionary to the self.pipe method # pass the kwargs dictionary to the self.pipe method
image = self.pipe( image = self.pipe(
prompt, prompt,
guidance_scale=self.cfg_scale, guidance_scale=self.cfg_scale,
**kwargs **kwargs
).images[0] ).images[0]
# save the result # save the result
image.save(request.dst) image.save(request.dst)
return backend_pb2.Result(message="Media generated", success=True) return backend_pb2.Result(message="Media generated", success=True)
def serve(address): def serve(address):
server = grpc.server(futures.ThreadPoolExecutor(max_workers=MAX_WORKERS)) server = grpc.server(futures.ThreadPoolExecutor(max_workers=MAX_WORKERS))
backend_pb2_grpc.add_BackendServicer_to_server(BackendServicer(), server) backend_pb2_grpc.add_BackendServicer_to_server(BackendServicer(), server)
@ -453,6 +461,7 @@ def serve(address):
except KeyboardInterrupt: except KeyboardInterrupt:
server.stop(0) server.stop(0)
if __name__ == "__main__": if __name__ == "__main__":
parser = argparse.ArgumentParser(description="Run the gRPC server.") parser = argparse.ArgumentParser(description="Run the gRPC server.")
parser.add_argument( parser.add_argument(
@ -460,4 +469,4 @@ if __name__ == "__main__":
) )
args = parser.parse_args() args = parser.parse_args()
serve(args.addr) serve(args.addr)

View File

@ -3,4 +3,4 @@ intel-extension-for-pytorch
torch torch
torchvision torchvision
optimum[openvino] optimum[openvino]
setuptools==69.5.1 # https://github.com/mudler/LocalAI/issues/2406 setuptools==70.3.0 # https://github.com/mudler/LocalAI/issues/2406

View File

@ -1,7 +1,9 @@
setuptools
accelerate accelerate
compel compel
peft
diffusers diffusers
grpcio==1.64.0 grpcio==1.65.0
opencv-python opencv-python
pillow pillow
protobuf protobuf

View File

@ -1,4 +1,4 @@
grpcio==1.64.0 grpcio==1.65.0
protobuf protobuf
torch torch
transformers transformers

View File

@ -1,5 +1,5 @@
accelerate accelerate
grpcio==1.64.0 grpcio==1.65.0
protobuf protobuf
certifi certifi
torch torch

View File

@ -4,4 +4,4 @@
packaging packaging
setuptools setuptools
wheel wheel
torch==2.2.0 torch==2.3.1

View File

@ -1,6 +1,6 @@
causal-conv1d==1.2.0.post2 causal-conv1d==1.4.0
mamba-ssm==1.2.0.post1 mamba-ssm==2.2.2
grpcio==1.64.0 grpcio==1.65.0
protobuf protobuf
certifi certifi
transformers transformers

View File

@ -2,22 +2,22 @@
intel-extension-for-pytorch intel-extension-for-pytorch
torch torch
optimum[openvino] optimum[openvino]
grpcio==1.64.0 grpcio==1.64.1
protobuf protobuf
librosa==0.9.1 librosa==0.9.1
faster-whisper==0.9.0 faster-whisper==1.0.3
pydub==0.25.1 pydub==0.25.1
wavmark==0.0.3 wavmark==0.0.3
numpy==1.22.0 numpy==1.26.4
eng_to_ipa==0.0.2 eng_to_ipa==0.0.2
inflect==7.0.0 inflect==7.0.0
unidecode==1.3.7 unidecode==1.3.7
whisper-timestamped==1.14.2 whisper-timestamped==1.15.4
openai openai
python-dotenv python-dotenv
pypinyin==0.50.0 pypinyin==0.50.0
cn2an==0.5.22 cn2an==0.5.22
jieba==0.42.1 jieba==0.42.1
gradio==3.48.0 gradio==4.38.1
langid==1.1.6 langid==1.1.6
git+https://github.com/myshell-ai/MeloTTS.git git+https://github.com/myshell-ai/MeloTTS.git

View File

@ -1,20 +1,20 @@
grpcio==1.64.0 grpcio==1.65.0
protobuf protobuf
librosa==0.9.1 librosa
faster-whisper==0.9.0 faster-whisper
pydub==0.25.1 pydub==0.25.1
wavmark==0.0.3 wavmark==0.0.3
numpy==1.22.0 numpy
eng_to_ipa==0.0.2 eng_to_ipa==0.0.2
inflect==7.0.0 inflect
unidecode==1.3.7 unidecode
whisper-timestamped==1.14.2 whisper-timestamped
openai openai
python-dotenv python-dotenv
pypinyin==0.50.0 pypinyin
cn2an==0.5.22 cn2an==0.5.22
jieba==0.42.1 jieba==0.42.1
gradio==3.48.0 gradio
langid==1.1.6 langid==1.1.6
git+https://github.com/myshell-ai/MeloTTS.git git+https://github.com/myshell-ai/MeloTTS.git
git+https://github.com/myshell-ai/OpenVoice.git git+https://github.com/myshell-ai/OpenVoice.git

View File

@ -3,4 +3,4 @@ intel-extension-for-pytorch
torch torch
torchaudio torchaudio
optimum[openvino] optimum[openvino]
setuptools==69.5.1 # https://github.com/mudler/LocalAI/issues/2406 setuptools==70.3.0 # https://github.com/mudler/LocalAI/issues/2406

View File

@ -1,5 +1,5 @@
accelerate accelerate
grpcio==1.64.0 grpcio==1.65.0
protobuf protobuf
torch torch
git+https://github.com/huggingface/parler-tts.git@10016fb0300c0dc31a0fb70e26f3affee7b62f16 git+https://github.com/huggingface/parler-tts.git@10016fb0300c0dc31a0fb70e26f3affee7b62f16

View File

@ -2,4 +2,4 @@
intel-extension-for-pytorch intel-extension-for-pytorch
torch torch
optimum[openvino] optimum[openvino]
setuptools==69.5.1 # https://github.com/mudler/LocalAI/issues/2406 setuptools==70.3.0 # https://github.com/mudler/LocalAI/issues/2406

View File

@ -2,4 +2,4 @@
intel-extension-for-pytorch intel-extension-for-pytorch
torch torch
optimum[openvino] optimum[openvino]
setuptools==69.5.1 # https://github.com/mudler/LocalAI/issues/2406 setuptools==70.3.0 # https://github.com/mudler/LocalAI/issues/2406

View File

@ -1,6 +1,6 @@
accelerate accelerate
rerankers[transformers] rerankers[transformers]
grpcio==1.64.0 grpcio==1.65.0
protobuf protobuf
certifi certifi
transformers transformers

View File

@ -2,4 +2,4 @@
intel-extension-for-pytorch intel-extension-for-pytorch
torch torch
optimum[openvino] optimum[openvino]
setuptools==69.5.1 # https://github.com/mudler/LocalAI/issues/2406 setuptools==70.3.0 # https://github.com/mudler/LocalAI/issues/2406

View File

@ -1,6 +1,6 @@
accelerate accelerate
sentence-transformers==2.5.1 sentence-transformers==3.0.1
transformers transformers
grpcio==1.64.0 grpcio==1.65.0
protobuf protobuf
certifi certifi

View File

@ -2,4 +2,4 @@
intel-extension-for-pytorch intel-extension-for-pytorch
torch torch
optimum[openvino] optimum[openvino]
setuptools==69.5.1 # https://github.com/mudler/LocalAI/issues/2406 setuptools==70.3.0 # https://github.com/mudler/LocalAI/issues/2406

View File

@ -1,7 +1,7 @@
accelerate accelerate
transformers transformers
grpcio==1.64.0 grpcio==1.65.0
protobuf protobuf
torch torch
scipy==1.13.0 scipy==1.14.0
certifi certifi

View File

@ -2,4 +2,4 @@
intel-extension-for-pytorch intel-extension-for-pytorch
torch torch
optimum[openvino] optimum[openvino]
setuptools==69.5.1 # https://github.com/mudler/LocalAI/issues/2406 setuptools==70.3.0 # https://github.com/mudler/LocalAI/issues/2406

View File

@ -1,9 +1,9 @@
accelerate accelerate
transformers transformers
grpcio==1.64.0 grpcio==1.65.0
protobuf protobuf
torch torch
certifi certifi
intel-extension-for-transformers intel-extension-for-transformers
bitsandbytes bitsandbytes
setuptools==69.5.1 # https://github.com/mudler/LocalAI/issues/2406 setuptools==70.3.0 # https://github.com/mudler/LocalAI/issues/2406

View File

@ -3,4 +3,4 @@ intel-extension-for-pytorch
torch torch
torchaudio torchaudio
optimum[openvino] optimum[openvino]
setuptools==69.5.1 # https://github.com/mudler/LocalAI/issues/2406 setuptools==70.3.0 # https://github.com/mudler/LocalAI/issues/2406

View File

@ -1,4 +1,4 @@
accelerate accelerate
grpcio==1.64.0 grpcio==1.65.0
protobuf protobuf
certifi certifi

View File

@ -2,4 +2,4 @@
intel-extension-for-pytorch intel-extension-for-pytorch
torch torch
optimum[openvino] optimum[openvino]
setuptools==69.5.1 # https://github.com/mudler/LocalAI/issues/2406 setuptools==70.3.0 # https://github.com/mudler/LocalAI/issues/2406

View File

@ -1,6 +1,6 @@
accelerate accelerate
vllm vllm
grpcio==1.64.0 grpcio==1.65.0
protobuf protobuf
certifi certifi
transformers transformers

View File

@ -91,7 +91,7 @@ func gRPCModelOpts(c config.BackendConfig) *pb.ModelOptions {
Type: c.ModelType, Type: c.ModelType,
RopeFreqScale: c.RopeFreqScale, RopeFreqScale: c.RopeFreqScale,
NUMA: c.NUMA, NUMA: c.NUMA,
Embeddings: c.Embeddings, Embeddings: *c.Embeddings,
LowVRAM: *c.LowVRAM, LowVRAM: *c.LowVRAM,
NGPULayers: int32(*c.NGPULayers), NGPULayers: int32(*c.NGPULayers),
MMap: *c.MMap, MMap: *c.MMap,

View File

@ -2,20 +2,9 @@ package cli
import ( import (
"context" "context"
"errors"
"fmt"
"io"
"net"
"time"
"math/rand/v2"
cliContext "github.com/mudler/LocalAI/core/cli/context" cliContext "github.com/mudler/LocalAI/core/cli/context"
"github.com/mudler/LocalAI/core/p2p" "github.com/mudler/LocalAI/core/p2p"
"github.com/mudler/edgevpn/pkg/node"
"github.com/mudler/edgevpn/pkg/protocol"
"github.com/mudler/edgevpn/pkg/types"
"github.com/rs/zerolog/log"
) )
type FederatedCLI struct { type FederatedCLI struct {
@ -25,106 +14,7 @@ type FederatedCLI struct {
func (f *FederatedCLI) Run(ctx *cliContext.Context) error { func (f *FederatedCLI) Run(ctx *cliContext.Context) error {
n, err := p2p.NewNode(f.Peer2PeerToken) fs := p2p.NewFederatedServer(f.Address, p2p.FederatedID, f.Peer2PeerToken)
if err != nil {
return fmt.Errorf("creating a new node: %w", err)
}
err = n.Start(context.Background())
if err != nil {
return fmt.Errorf("creating a new node: %w", err)
}
if err := p2p.ServiceDiscoverer(context.Background(), n, f.Peer2PeerToken, p2p.FederatedID, nil); err != nil { return fs.Start(context.Background())
return err
}
return Proxy(context.Background(), n, f.Address, p2p.FederatedID)
}
func Proxy(ctx context.Context, node *node.Node, listenAddr, service string) error {
log.Info().Msgf("Allocating service '%s' on: %s", service, listenAddr)
// Open local port for listening
l, err := net.Listen("tcp", listenAddr)
if err != nil {
log.Error().Err(err).Msg("Error listening")
return err
}
// ll.Info("Binding local port on", srcaddr)
ledger, _ := node.Ledger()
// Announce ourselves so nodes accepts our connection
ledger.Announce(
ctx,
10*time.Second,
func() {
// Retrieve current ID for ip in the blockchain
//_, found := ledger.GetKey(protocol.UsersLedgerKey, node.Host().ID().String())
// If mismatch, update the blockchain
//if !found {
updatedMap := map[string]interface{}{}
updatedMap[node.Host().ID().String()] = &types.User{
PeerID: node.Host().ID().String(),
Timestamp: time.Now().String(),
}
ledger.Add(protocol.UsersLedgerKey, updatedMap)
// }
},
)
defer l.Close()
for {
select {
case <-ctx.Done():
return errors.New("context canceled")
default:
log.Debug().Msg("New for connection")
// Listen for an incoming connection.
conn, err := l.Accept()
if err != nil {
fmt.Println("Error accepting: ", err.Error())
continue
}
// Handle connections in a new goroutine, forwarding to the p2p service
go func() {
var tunnelAddresses []string
for _, v := range p2p.GetAvailableNodes(p2p.FederatedID) {
if v.IsOnline() {
tunnelAddresses = append(tunnelAddresses, v.TunnelAddress)
} else {
log.Info().Msgf("Node %s is offline", v.ID)
}
}
// open a TCP stream to one of the tunnels
// chosen randomly
// TODO: optimize this and track usage
tunnelAddr := tunnelAddresses[rand.IntN(len(tunnelAddresses))]
tunnelConn, err := net.Dial("tcp", tunnelAddr)
if err != nil {
log.Error().Err(err).Msg("Error connecting to tunnel")
return
}
log.Info().Msgf("Redirecting %s to %s", conn.LocalAddr().String(), tunnelConn.RemoteAddr().String())
closer := make(chan struct{}, 2)
go copyStream(closer, tunnelConn, conn)
go copyStream(closer, conn, tunnelConn)
<-closer
tunnelConn.Close()
conn.Close()
// ll.Infof("(service %s) Done handling %s", serviceID, l.Addr().String())
}()
}
}
}
func copyStream(closer chan struct{}, dst io.Writer, src io.Reader) {
defer func() { closer <- struct{}{} }() // connection is closed, send signal to stop proxy
io.Copy(dst, src)
} }

View File

@ -119,7 +119,7 @@ func (r *RunCMD) Run(ctx *cliContext.Context) error {
} }
log.Info().Msg("Starting P2P server discovery...") log.Info().Msg("Starting P2P server discovery...")
if err := p2p.ServiceDiscoverer(context.Background(), node, token, "", func() { if err := p2p.ServiceDiscoverer(context.Background(), node, token, "", func(serviceID string, node p2p.NodeData) {
var tunnelAddresses []string var tunnelAddresses []string
for _, v := range p2p.GetAvailableNodes("") { for _, v := range p2p.GetAvailableNodes("") {
if v.IsOnline() { if v.IsOnline() {

View File

@ -32,7 +32,7 @@ type BackendConfig struct {
Threads *int `yaml:"threads"` Threads *int `yaml:"threads"`
Debug *bool `yaml:"debug"` Debug *bool `yaml:"debug"`
Roles map[string]string `yaml:"roles"` Roles map[string]string `yaml:"roles"`
Embeddings bool `yaml:"embeddings"` Embeddings *bool `yaml:"embeddings"`
Backend string `yaml:"backend"` Backend string `yaml:"backend"`
TemplateConfig TemplateConfig `yaml:"template"` TemplateConfig TemplateConfig `yaml:"template"`
@ -338,6 +338,10 @@ func (cfg *BackendConfig) SetDefaults(opts ...ConfigLoaderOption) {
cfg.LowVRAM = &falseV cfg.LowVRAM = &falseV
} }
if cfg.Embeddings == nil {
cfg.Embeddings = &falseV
}
// Value passed by the top level are treated as default (no implicit defaults) // Value passed by the top level are treated as default (no implicit defaults)
// defaults are set by the user // defaults are set by the user
if ctx == 0 { if ctx == 0 {

View File

@ -20,6 +20,7 @@ const (
ChatML ChatML
Mistral03 Mistral03
Gemma Gemma
DeepSeek2
) )
type settingsConfig struct { type settingsConfig struct {
@ -37,6 +38,17 @@ var defaultsSettings map[familyType]settingsConfig = map[familyType]settingsConf
Completion: "{{.Input}}", Completion: "{{.Input}}",
}, },
}, },
DeepSeek2: {
StopWords: []string{"<end▁of▁sentence>"},
TemplateConfig: TemplateConfig{
ChatMessage: `{{if eq .RoleName "user" -}}User: {{.Content }}
{{ end -}}
{{if eq .RoleName "assistant" -}}Assistant: {{.Content}}<endofsentence>{{end}}
{{if eq .RoleName "system" -}}{{.Content}}
{{end -}}`,
Chat: "{{.Input -}}\nAssistant: ",
},
},
LLaMa3: { LLaMa3: {
StopWords: []string{"<|eot_id|>"}, StopWords: []string{"<|eot_id|>"},
TemplateConfig: TemplateConfig{ TemplateConfig: TemplateConfig{
@ -208,8 +220,11 @@ func identifyFamily(f *gguf.GGUFFile) familyType {
qwen2 := arch == "qwen2" qwen2 := arch == "qwen2"
phi3 := arch == "phi-3" phi3 := arch == "phi-3"
gemma := strings.HasPrefix(f.Model().Name, "gemma") gemma := strings.HasPrefix(f.Model().Name, "gemma")
deepseek2 := arch == "deepseek2"
switch { switch {
case deepseek2:
return DeepSeek2
case gemma: case gemma:
return Gemma return Gemma
case llama3: case llama3:

View File

@ -7,9 +7,10 @@ import (
"path/filepath" "path/filepath"
"strings" "strings"
"github.com/imdario/mergo" "dario.cat/mergo"
"github.com/mudler/LocalAI/core/config" "github.com/mudler/LocalAI/core/config"
"github.com/mudler/LocalAI/pkg/downloader" "github.com/mudler/LocalAI/pkg/downloader"
"github.com/mudler/LocalAI/pkg/utils"
"github.com/rs/zerolog/log" "github.com/rs/zerolog/log"
"gopkg.in/yaml.v2" "gopkg.in/yaml.v2"
) )
@ -189,6 +190,12 @@ func DeleteModelFromSystem(basePath string, name string, additionalFiles []strin
galleryFile := filepath.Join(basePath, galleryFileName(name)) galleryFile := filepath.Join(basePath, galleryFileName(name))
for _, f := range []string{configFile, galleryFile} {
if err := utils.VerifyPath(f, basePath); err != nil {
return fmt.Errorf("failed to verify path %s: %w", f, err)
}
}
var err error var err error
// Delete all the files associated to the model // Delete all the files associated to the model
// read the model config // read the model config

View File

@ -6,7 +6,7 @@ import (
"os" "os"
"path/filepath" "path/filepath"
"github.com/imdario/mergo" "dario.cat/mergo"
lconfig "github.com/mudler/LocalAI/core/config" lconfig "github.com/mudler/LocalAI/core/config"
"github.com/mudler/LocalAI/pkg/downloader" "github.com/mudler/LocalAI/pkg/downloader"
"github.com/mudler/LocalAI/pkg/utils" "github.com/mudler/LocalAI/pkg/utils"

View File

@ -12,6 +12,11 @@ import (
"github.com/rs/zerolog/log" "github.com/rs/zerolog/log"
) )
// JINARerankEndpoint acts like the Jina reranker endpoint (https://jina.ai/reranker/)
// @Summary Reranks a list of phrases by relevance to a given text query.
// @Param request body schema.JINARerankRequest true "query params"
// @Success 200 {object} schema.JINARerankResponse "Response"
// @Router /v1/rerank [post]
func JINARerankEndpoint(cl *config.BackendConfigLoader, ml *model.ModelLoader, appConfig *config.ApplicationConfig) func(c *fiber.Ctx) error { func JINARerankEndpoint(cl *config.BackendConfigLoader, ml *model.ModelLoader, appConfig *config.ApplicationConfig) func(c *fiber.Ctx) error {
return func(c *fiber.Ctx) error { return func(c *fiber.Ctx) error {
req := new(schema.JINARerankRequest) req := new(schema.JINARerankRequest)

View File

@ -6,6 +6,11 @@ import (
"github.com/mudler/LocalAI/core/services" "github.com/mudler/LocalAI/core/services"
) )
// BackendMonitorEndpoint returns the status of the specified backend
// @Summary Backend monitor endpoint
// @Param request body schema.BackendMonitorRequest true "Backend statistics request"
// @Success 200 {object} proto.StatusResponse "Response"
// @Router /backend/monitor [get]
func BackendMonitorEndpoint(bm *services.BackendMonitorService) func(c *fiber.Ctx) error { func BackendMonitorEndpoint(bm *services.BackendMonitorService) func(c *fiber.Ctx) error {
return func(c *fiber.Ctx) error { return func(c *fiber.Ctx) error {
@ -23,6 +28,10 @@ func BackendMonitorEndpoint(bm *services.BackendMonitorService) func(c *fiber.Ct
} }
} }
// BackendMonitorEndpoint shuts down the specified backend
// @Summary Backend monitor endpoint
// @Param request body schema.BackendMonitorRequest true "Backend statistics request"
// @Router /backend/shutdown [post]
func BackendShutdownEndpoint(bm *services.BackendMonitorService) func(c *fiber.Ctx) error { func BackendShutdownEndpoint(bm *services.BackendMonitorService) func(c *fiber.Ctx) error {
return func(c *fiber.Ctx) error { return func(c *fiber.Ctx) error {
input := new(schema.BackendMonitorRequest) input := new(schema.BackendMonitorRequest)

View File

@ -9,6 +9,7 @@ import (
"github.com/google/uuid" "github.com/google/uuid"
"github.com/mudler/LocalAI/core/config" "github.com/mudler/LocalAI/core/config"
"github.com/mudler/LocalAI/core/gallery" "github.com/mudler/LocalAI/core/gallery"
"github.com/mudler/LocalAI/core/schema"
"github.com/mudler/LocalAI/core/services" "github.com/mudler/LocalAI/core/services"
"github.com/rs/zerolog/log" "github.com/rs/zerolog/log"
) )
@ -33,6 +34,10 @@ func CreateModelGalleryEndpointService(galleries []config.Gallery, modelPath str
} }
} }
// GetOpStatusEndpoint returns the job status
// @Summary Returns the job status
// @Success 200 {object} gallery.GalleryOpStatus "Response"
// @Router /models/jobs/{uuid} [get]
func (mgs *ModelGalleryEndpointService) GetOpStatusEndpoint() func(c *fiber.Ctx) error { func (mgs *ModelGalleryEndpointService) GetOpStatusEndpoint() func(c *fiber.Ctx) error {
return func(c *fiber.Ctx) error { return func(c *fiber.Ctx) error {
status := mgs.galleryApplier.GetStatus(c.Params("uuid")) status := mgs.galleryApplier.GetStatus(c.Params("uuid"))
@ -43,12 +48,21 @@ func (mgs *ModelGalleryEndpointService) GetOpStatusEndpoint() func(c *fiber.Ctx)
} }
} }
// GetAllStatusEndpoint returns all the jobs status progress
// @Summary Returns all the jobs status progress
// @Success 200 {object} map[string]gallery.GalleryOpStatus "Response"
// @Router /models/jobs [get]
func (mgs *ModelGalleryEndpointService) GetAllStatusEndpoint() func(c *fiber.Ctx) error { func (mgs *ModelGalleryEndpointService) GetAllStatusEndpoint() func(c *fiber.Ctx) error {
return func(c *fiber.Ctx) error { return func(c *fiber.Ctx) error {
return c.JSON(mgs.galleryApplier.GetAllStatus()) return c.JSON(mgs.galleryApplier.GetAllStatus())
} }
} }
// ApplyModelGalleryEndpoint installs a new model to a LocalAI instance from the model gallery
// @Summary Install models to LocalAI.
// @Param request body GalleryModel true "query params"
// @Success 200 {object} schema.GalleryResponse "Response"
// @Router /models/apply [post]
func (mgs *ModelGalleryEndpointService) ApplyModelGalleryEndpoint() func(c *fiber.Ctx) error { func (mgs *ModelGalleryEndpointService) ApplyModelGalleryEndpoint() func(c *fiber.Ctx) error {
return func(c *fiber.Ctx) error { return func(c *fiber.Ctx) error {
input := new(GalleryModel) input := new(GalleryModel)
@ -68,13 +82,15 @@ func (mgs *ModelGalleryEndpointService) ApplyModelGalleryEndpoint() func(c *fibe
Galleries: mgs.galleries, Galleries: mgs.galleries,
ConfigURL: input.ConfigURL, ConfigURL: input.ConfigURL,
} }
return c.JSON(struct { return c.JSON(schema.GalleryResponse{ID: uuid.String(), StatusURL: c.BaseURL() + "/models/jobs/" + uuid.String()})
ID string `json:"uuid"`
StatusURL string `json:"status"`
}{ID: uuid.String(), StatusURL: c.BaseURL() + "/models/jobs/" + uuid.String()})
} }
} }
// DeleteModelGalleryEndpoint lets delete models from a LocalAI instance
// @Summary delete models to LocalAI.
// @Param name path string true "Model name"
// @Success 200 {object} schema.GalleryResponse "Response"
// @Router /models/delete/{name} [post]
func (mgs *ModelGalleryEndpointService) DeleteModelGalleryEndpoint() func(c *fiber.Ctx) error { func (mgs *ModelGalleryEndpointService) DeleteModelGalleryEndpoint() func(c *fiber.Ctx) error {
return func(c *fiber.Ctx) error { return func(c *fiber.Ctx) error {
modelName := c.Params("name") modelName := c.Params("name")
@ -89,13 +105,14 @@ func (mgs *ModelGalleryEndpointService) DeleteModelGalleryEndpoint() func(c *fib
return err return err
} }
return c.JSON(struct { return c.JSON(schema.GalleryResponse{ID: uuid.String(), StatusURL: c.BaseURL() + "/models/jobs/" + uuid.String()})
ID string `json:"uuid"`
StatusURL string `json:"status"`
}{ID: uuid.String(), StatusURL: c.BaseURL() + "/models/jobs/" + uuid.String()})
} }
} }
// ListModelFromGalleryEndpoint list the available models for installation from the active galleries
// @Summary List installable models.
// @Success 200 {object} []gallery.GalleryModel "Response"
// @Router /models/available [get]
func (mgs *ModelGalleryEndpointService) ListModelFromGalleryEndpoint() func(c *fiber.Ctx) error { func (mgs *ModelGalleryEndpointService) ListModelFromGalleryEndpoint() func(c *fiber.Ctx) error {
return func(c *fiber.Ctx) error { return func(c *fiber.Ctx) error {
log.Debug().Msgf("Listing models from galleries: %+v", mgs.galleries) log.Debug().Msgf("Listing models from galleries: %+v", mgs.galleries)
@ -116,6 +133,10 @@ func (mgs *ModelGalleryEndpointService) ListModelFromGalleryEndpoint() func(c *f
} }
} }
// ListModelGalleriesEndpoint list the available galleries configured in LocalAI
// @Summary List all Galleries
// @Success 200 {object} []config.Gallery "Response"
// @Router /models/galleries [get]
// NOTE: This is different (and much simpler!) than above! This JUST lists the model galleries that have been loaded, not their contents! // NOTE: This is different (and much simpler!) than above! This JUST lists the model galleries that have been loaded, not their contents!
func (mgs *ModelGalleryEndpointService) ListModelGalleriesEndpoint() func(c *fiber.Ctx) error { func (mgs *ModelGalleryEndpointService) ListModelGalleriesEndpoint() func(c *fiber.Ctx) error {
return func(c *fiber.Ctx) error { return func(c *fiber.Ctx) error {
@ -128,6 +149,11 @@ func (mgs *ModelGalleryEndpointService) ListModelGalleriesEndpoint() func(c *fib
} }
} }
// AddModelGalleryEndpoint adds a gallery in LocalAI
// @Summary Adds a gallery in LocalAI
// @Param request body config.Gallery true "Gallery details"
// @Success 200 {object} []config.Gallery "Response"
// @Router /models/galleries [post]
func (mgs *ModelGalleryEndpointService) AddModelGalleryEndpoint() func(c *fiber.Ctx) error { func (mgs *ModelGalleryEndpointService) AddModelGalleryEndpoint() func(c *fiber.Ctx) error {
return func(c *fiber.Ctx) error { return func(c *fiber.Ctx) error {
input := new(config.Gallery) input := new(config.Gallery)
@ -150,6 +176,11 @@ func (mgs *ModelGalleryEndpointService) AddModelGalleryEndpoint() func(c *fiber.
} }
} }
// RemoveModelGalleryEndpoint remove a gallery in LocalAI
// @Summary removes a gallery from LocalAI
// @Param request body config.Gallery true "Gallery details"
// @Success 200 {object} []config.Gallery "Response"
// @Router /models/galleries [delete]
func (mgs *ModelGalleryEndpointService) RemoveModelGalleryEndpoint() func(c *fiber.Ctx) error { func (mgs *ModelGalleryEndpointService) RemoveModelGalleryEndpoint() func(c *fiber.Ctx) error {
return func(c *fiber.Ctx) error { return func(c *fiber.Ctx) error {
input := new(config.Gallery) input := new(config.Gallery)
@ -165,6 +196,10 @@ func (mgs *ModelGalleryEndpointService) RemoveModelGalleryEndpoint() func(c *fib
mgs.galleries = slices.DeleteFunc(mgs.galleries, func(gallery config.Gallery) bool { mgs.galleries = slices.DeleteFunc(mgs.galleries, func(gallery config.Gallery) bool {
return gallery.Name == input.Name return gallery.Name == input.Name
}) })
return c.Send(nil) dat, err := json.Marshal(mgs.galleries)
if err != nil {
return err
}
return c.Send(dat)
} }
} }

View File

@ -9,8 +9,11 @@ import (
"github.com/prometheus/client_golang/prometheus/promhttp" "github.com/prometheus/client_golang/prometheus/promhttp"
) )
// LocalAIMetricsEndpoint returns the metrics endpoint for LocalAI
// @Summary Prometheus metrics endpoint
// @Param request body config.Gallery true "Gallery details"
// @Router /metrics [get]
func LocalAIMetricsEndpoint() fiber.Handler { func LocalAIMetricsEndpoint() fiber.Handler {
return adaptor.HTTPHandler(promhttp.Handler()) return adaptor.HTTPHandler(promhttp.Handler())
} }

View File

@ -0,0 +1,28 @@
package localai
import (
"github.com/gofiber/fiber/v2"
"github.com/mudler/LocalAI/core/config"
"github.com/mudler/LocalAI/core/p2p"
"github.com/mudler/LocalAI/core/schema"
)
// ShowP2PNodes returns the P2P Nodes
// @Summary Returns available P2P nodes
// @Success 200 {object} []schema.P2PNodesResponse "Response"
// @Router /api/p2p [get]
func ShowP2PNodes(c *fiber.Ctx) error {
// Render index
return c.JSON(schema.P2PNodesResponse{
Nodes: p2p.GetAvailableNodes(""),
FederatedNodes: p2p.GetAvailableNodes(p2p.FederatedID),
})
}
// ShowP2PToken returns the P2P token
// @Summary Show the P2P token
// @Success 200 {string} string "Response"
// @Router /api/p2p/token [get]
func ShowP2PToken(appConfig *config.ApplicationConfig) func(*fiber.Ctx) error {
return func(c *fiber.Ctx) error { return c.Send([]byte(appConfig.P2PToken)) }
}

View File

@ -11,6 +11,7 @@ import (
"github.com/gofiber/fiber/v2" "github.com/gofiber/fiber/v2"
"github.com/mudler/LocalAI/core/config" "github.com/mudler/LocalAI/core/config"
"github.com/mudler/LocalAI/core/schema"
"github.com/mudler/LocalAI/core/services" "github.com/mudler/LocalAI/core/services"
model "github.com/mudler/LocalAI/pkg/model" model "github.com/mudler/LocalAI/pkg/model"
"github.com/mudler/LocalAI/pkg/utils" "github.com/mudler/LocalAI/pkg/utils"
@ -125,6 +126,14 @@ func generateRandomID() int64 {
return currentId return currentId
} }
// ListAssistantsEndpoint is the OpenAI Assistant API endpoint to list assistents https://platform.openai.com/docs/api-reference/assistants/listAssistants
// @Summary List available assistents
// @Param limit query int false "Limit the number of assistants returned"
// @Param order query string false "Order of assistants returned"
// @Param after query string false "Return assistants created after the given ID"
// @Param before query string false "Return assistants created before the given ID"
// @Success 200 {object} []Assistant "Response"
// @Router /v1/assistants [get]
func ListAssistantsEndpoint(cl *config.BackendConfigLoader, ml *model.ModelLoader, appConfig *config.ApplicationConfig) func(c *fiber.Ctx) error { func ListAssistantsEndpoint(cl *config.BackendConfigLoader, ml *model.ModelLoader, appConfig *config.ApplicationConfig) func(c *fiber.Ctx) error {
return func(c *fiber.Ctx) error { return func(c *fiber.Ctx) error {
// Because we're altering the existing assistants list we should just duplicate it for now. // Because we're altering the existing assistants list we should just duplicate it for now.
@ -230,13 +239,11 @@ func modelExists(cl *config.BackendConfigLoader, ml *model.ModelLoader, modelNam
return return
} }
// DeleteAssistantEndpoint is the OpenAI Assistant API endpoint to delete assistents https://platform.openai.com/docs/api-reference/assistants/deleteAssistant
// @Summary Delete assistents
// @Success 200 {object} schema.DeleteAssistantResponse "Response"
// @Router /v1/assistants/{assistant_id} [delete]
func DeleteAssistantEndpoint(cl *config.BackendConfigLoader, ml *model.ModelLoader, appConfig *config.ApplicationConfig) func(c *fiber.Ctx) error { func DeleteAssistantEndpoint(cl *config.BackendConfigLoader, ml *model.ModelLoader, appConfig *config.ApplicationConfig) func(c *fiber.Ctx) error {
type DeleteAssistantResponse struct {
ID string `json:"id"`
Object string `json:"object"`
Deleted bool `json:"deleted"`
}
return func(c *fiber.Ctx) error { return func(c *fiber.Ctx) error {
assistantID := c.Params("assistant_id") assistantID := c.Params("assistant_id")
if assistantID == "" { if assistantID == "" {
@ -247,7 +254,7 @@ func DeleteAssistantEndpoint(cl *config.BackendConfigLoader, ml *model.ModelLoad
if assistant.ID == assistantID { if assistant.ID == assistantID {
Assistants = append(Assistants[:i], Assistants[i+1:]...) Assistants = append(Assistants[:i], Assistants[i+1:]...)
utils.SaveConfig(appConfig.ConfigsDir, AssistantsConfigFile, Assistants) utils.SaveConfig(appConfig.ConfigsDir, AssistantsConfigFile, Assistants)
return c.Status(fiber.StatusOK).JSON(DeleteAssistantResponse{ return c.Status(fiber.StatusOK).JSON(schema.DeleteAssistantResponse{
ID: assistantID, ID: assistantID,
Object: "assistant.deleted", Object: "assistant.deleted",
Deleted: true, Deleted: true,
@ -256,7 +263,7 @@ func DeleteAssistantEndpoint(cl *config.BackendConfigLoader, ml *model.ModelLoad
} }
log.Warn().Msgf("Unable to find assistant %s for deletion", assistantID) log.Warn().Msgf("Unable to find assistant %s for deletion", assistantID)
return c.Status(fiber.StatusNotFound).JSON(DeleteAssistantResponse{ return c.Status(fiber.StatusNotFound).JSON(schema.DeleteAssistantResponse{
ID: assistantID, ID: assistantID,
Object: "assistant.deleted", Object: "assistant.deleted",
Deleted: false, Deleted: false,
@ -264,6 +271,10 @@ func DeleteAssistantEndpoint(cl *config.BackendConfigLoader, ml *model.ModelLoad
} }
} }
// GetAssistantEndpoint is the OpenAI Assistant API endpoint to get assistents https://platform.openai.com/docs/api-reference/assistants/getAssistant
// @Summary Get assistent data
// @Success 200 {object} Assistant "Response"
// @Router /v1/assistants/{assistant_id} [get]
func GetAssistantEndpoint(cl *config.BackendConfigLoader, ml *model.ModelLoader, appConfig *config.ApplicationConfig) func(c *fiber.Ctx) error { func GetAssistantEndpoint(cl *config.BackendConfigLoader, ml *model.ModelLoader, appConfig *config.ApplicationConfig) func(c *fiber.Ctx) error {
return func(c *fiber.Ctx) error { return func(c *fiber.Ctx) error {
assistantID := c.Params("assistant_id") assistantID := c.Params("assistant_id")
@ -293,19 +304,9 @@ var (
AssistantsFileConfigFile = "assistantsFile.json" AssistantsFileConfigFile = "assistantsFile.json"
) )
type AssistantFileRequest struct {
FileID string `json:"file_id"`
}
type DeleteAssistantFileResponse struct {
ID string `json:"id"`
Object string `json:"object"`
Deleted bool `json:"deleted"`
}
func CreateAssistantFileEndpoint(cl *config.BackendConfigLoader, ml *model.ModelLoader, appConfig *config.ApplicationConfig) func(c *fiber.Ctx) error { func CreateAssistantFileEndpoint(cl *config.BackendConfigLoader, ml *model.ModelLoader, appConfig *config.ApplicationConfig) func(c *fiber.Ctx) error {
return func(c *fiber.Ctx) error { return func(c *fiber.Ctx) error {
request := new(AssistantFileRequest) request := new(schema.AssistantFileRequest)
if err := c.BodyParser(request); err != nil { if err := c.BodyParser(request); err != nil {
return c.Status(fiber.StatusBadRequest).JSON(fiber.Map{"error": "Cannot parse JSON"}) return c.Status(fiber.StatusBadRequest).JSON(fiber.Map{"error": "Cannot parse JSON"})
} }
@ -346,7 +347,7 @@ func CreateAssistantFileEndpoint(cl *config.BackendConfigLoader, ml *model.Model
func ListAssistantFilesEndpoint(cl *config.BackendConfigLoader, ml *model.ModelLoader, appConfig *config.ApplicationConfig) func(c *fiber.Ctx) error { func ListAssistantFilesEndpoint(cl *config.BackendConfigLoader, ml *model.ModelLoader, appConfig *config.ApplicationConfig) func(c *fiber.Ctx) error {
type ListAssistantFiles struct { type ListAssistantFiles struct {
Data []File Data []schema.File
Object string Object string
} }
@ -464,7 +465,7 @@ func DeleteAssistantFileEndpoint(cl *config.BackendConfigLoader, ml *model.Model
// Remove the file from the assistantFiles slice // Remove the file from the assistantFiles slice
AssistantFiles = append(AssistantFiles[:i], AssistantFiles[i+1:]...) AssistantFiles = append(AssistantFiles[:i], AssistantFiles[i+1:]...)
utils.SaveConfig(appConfig.ConfigsDir, AssistantsFileConfigFile, AssistantFiles) utils.SaveConfig(appConfig.ConfigsDir, AssistantsFileConfigFile, AssistantFiles)
return c.Status(fiber.StatusOK).JSON(DeleteAssistantFileResponse{ return c.Status(fiber.StatusOK).JSON(schema.DeleteAssistantFileResponse{
ID: fileId, ID: fileId,
Object: "assistant.file.deleted", Object: "assistant.file.deleted",
Deleted: true, Deleted: true,
@ -480,7 +481,7 @@ func DeleteAssistantFileEndpoint(cl *config.BackendConfigLoader, ml *model.Model
AssistantFiles = append(AssistantFiles[:i], AssistantFiles[i+1:]...) AssistantFiles = append(AssistantFiles[:i], AssistantFiles[i+1:]...)
utils.SaveConfig(appConfig.ConfigsDir, AssistantsFileConfigFile, AssistantFiles) utils.SaveConfig(appConfig.ConfigsDir, AssistantsFileConfigFile, AssistantFiles)
return c.Status(fiber.StatusNotFound).JSON(DeleteAssistantFileResponse{ return c.Status(fiber.StatusNotFound).JSON(schema.DeleteAssistantFileResponse{
ID: fileId, ID: fileId,
Object: "assistant.file.deleted", Object: "assistant.file.deleted",
Deleted: true, Deleted: true,
@ -491,7 +492,7 @@ func DeleteAssistantFileEndpoint(cl *config.BackendConfigLoader, ml *model.Model
} }
log.Warn().Msgf("Unable to find assistant: %s", assistantID) log.Warn().Msgf("Unable to find assistant: %s", assistantID)
return c.Status(fiber.StatusNotFound).JSON(DeleteAssistantFileResponse{ return c.Status(fiber.StatusNotFound).JSON(schema.DeleteAssistantFileResponse{
ID: fileId, ID: fileId,
Object: "assistant.file.deleted", Object: "assistant.file.deleted",
Deleted: false, Deleted: false,

View File

@ -14,6 +14,7 @@ import (
"github.com/gofiber/fiber/v2" "github.com/gofiber/fiber/v2"
"github.com/mudler/LocalAI/core/config" "github.com/mudler/LocalAI/core/config"
"github.com/mudler/LocalAI/core/schema"
"github.com/mudler/LocalAI/pkg/model" "github.com/mudler/LocalAI/pkg/model"
"github.com/stretchr/testify/assert" "github.com/stretchr/testify/assert"
) )
@ -26,7 +27,7 @@ type MockLoader struct {
func tearDown() func() { func tearDown() func() {
return func() { return func() {
UploadedFiles = []File{} UploadedFiles = []schema.File{}
Assistants = []Assistant{} Assistants = []Assistant{}
AssistantFiles = []AssistantFile{} AssistantFiles = []AssistantFile{}
_ = os.Remove(filepath.Join(configsDir, AssistantsConfigFile)) _ = os.Remove(filepath.Join(configsDir, AssistantsConfigFile))
@ -294,7 +295,7 @@ func TestAssistantEndpoints(t *testing.T) {
file, assistant, err := createFileAndAssistant(t, app, appConfig) file, assistant, err := createFileAndAssistant(t, app, appConfig)
assert.NoError(t, err) assert.NoError(t, err)
afr := AssistantFileRequest{FileID: file.ID} afr := schema.AssistantFileRequest{FileID: file.ID}
af, _, err := createAssistantFile(app, afr, assistant.ID) af, _, err := createAssistantFile(app, afr, assistant.ID)
assert.NoError(t, err) assert.NoError(t, err)
@ -305,7 +306,7 @@ func TestAssistantEndpoints(t *testing.T) {
file, assistant, err := createFileAndAssistant(t, app, appConfig) file, assistant, err := createFileAndAssistant(t, app, appConfig)
assert.NoError(t, err) assert.NoError(t, err)
afr := AssistantFileRequest{FileID: file.ID} afr := schema.AssistantFileRequest{FileID: file.ID}
af, _, err := createAssistantFile(app, afr, assistant.ID) af, _, err := createAssistantFile(app, afr, assistant.ID)
assert.NoError(t, err) assert.NoError(t, err)
@ -316,7 +317,7 @@ func TestAssistantEndpoints(t *testing.T) {
file, assistant, err := createFileAndAssistant(t, app, appConfig) file, assistant, err := createFileAndAssistant(t, app, appConfig)
assert.NoError(t, err) assert.NoError(t, err)
afr := AssistantFileRequest{FileID: file.ID} afr := schema.AssistantFileRequest{FileID: file.ID}
af, _, err := createAssistantFile(app, afr, assistant.ID) af, _, err := createAssistantFile(app, afr, assistant.ID)
assert.NoError(t, err) assert.NoError(t, err)
t.Cleanup(cleanupAssistantFile(t, app, af.ID, af.AssistantID)) t.Cleanup(cleanupAssistantFile(t, app, af.ID, af.AssistantID))
@ -338,7 +339,7 @@ func TestAssistantEndpoints(t *testing.T) {
file, assistant, err := createFileAndAssistant(t, app, appConfig) file, assistant, err := createFileAndAssistant(t, app, appConfig)
assert.NoError(t, err) assert.NoError(t, err)
afr := AssistantFileRequest{FileID: file.ID} afr := schema.AssistantFileRequest{FileID: file.ID}
af, _, err := createAssistantFile(app, afr, assistant.ID) af, _, err := createAssistantFile(app, afr, assistant.ID)
assert.NoError(t, err) assert.NoError(t, err)
@ -349,7 +350,7 @@ func TestAssistantEndpoints(t *testing.T) {
} }
func createFileAndAssistant(t *testing.T, app *fiber.App, o *config.ApplicationConfig) (File, Assistant, error) { func createFileAndAssistant(t *testing.T, app *fiber.App, o *config.ApplicationConfig) (schema.File, Assistant, error) {
ar := &AssistantRequest{ ar := &AssistantRequest{
Model: "ggml-gpt4all-j", Model: "ggml-gpt4all-j",
Name: "3.5-turbo", Name: "3.5-turbo",
@ -362,7 +363,7 @@ func createFileAndAssistant(t *testing.T, app *fiber.App, o *config.ApplicationC
assistant, _, err := createAssistant(app, *ar) assistant, _, err := createAssistant(app, *ar)
if err != nil { if err != nil {
return File{}, Assistant{}, err return schema.File{}, Assistant{}, err
} }
t.Cleanup(cleanupAllAssistants(t, app, []string{assistant.ID})) t.Cleanup(cleanupAllAssistants(t, app, []string{assistant.ID}))
@ -374,7 +375,7 @@ func createFileAndAssistant(t *testing.T, app *fiber.App, o *config.ApplicationC
return file, assistant, nil return file, assistant, nil
} }
func createAssistantFile(app *fiber.App, afr AssistantFileRequest, assistantId string) (AssistantFile, *http.Response, error) { func createAssistantFile(app *fiber.App, afr schema.AssistantFileRequest, assistantId string) (AssistantFile, *http.Response, error) {
afrJson, err := json.Marshal(afr) afrJson, err := json.Marshal(afr)
if err != nil { if err != nil {
return AssistantFile{}, nil, err return AssistantFile{}, nil, err
@ -451,7 +452,7 @@ func cleanupAssistantFile(t *testing.T, app *fiber.App, fileId, assistantId stri
resp, err := app.Test(request) resp, err := app.Test(request)
assert.NoError(t, err) assert.NoError(t, err)
var dafr DeleteAssistantFileResponse var dafr schema.DeleteAssistantFileResponse
err = json.NewDecoder(resp.Body).Decode(&dafr) err = json.NewDecoder(resp.Body).Decode(&dafr)
assert.NoError(t, err) assert.NoError(t, err)
assert.True(t, dafr.Deleted) assert.True(t, dafr.Deleted)

View File

@ -225,18 +225,10 @@ func ChatEndpoint(cl *config.BackendConfigLoader, ml *model.ModelLoader, startup
} }
// Update input grammar // Update input grammar
// Handle if we should return "name" instead of "functions" jsStruct := funcs.ToJSONStructure(config.FunctionsConfig.FunctionNameKey, config.FunctionsConfig.FunctionNameKey)
if config.FunctionsConfig.FunctionName { config.Grammar = jsStruct.Grammar(config.FunctionsConfig.GrammarConfig.Options()...)
jsStruct := funcs.ToJSONNameStructure()
config.Grammar = jsStruct.Grammar(config.FunctionsConfig.GrammarConfig.Options()...)
} else {
jsStruct := funcs.ToJSONFunctionStructure()
config.Grammar = jsStruct.Grammar(config.FunctionsConfig.GrammarConfig.Options()...)
}
case input.JSONFunctionGrammarObject != nil: case input.JSONFunctionGrammarObject != nil:
config.Grammar = input.JSONFunctionGrammarObject.Grammar(config.FunctionsConfig.GrammarConfig.Options()...) config.Grammar = input.JSONFunctionGrammarObject.Grammar(config.FunctionsConfig.GrammarConfig.Options()...)
case input.JSONFunctionGrammarObjectName != nil:
config.Grammar = input.JSONFunctionGrammarObjectName.Grammar(config.FunctionsConfig.GrammarConfig.Options()...)
default: default:
// Force picking one of the functions by the request // Force picking one of the functions by the request
if config.FunctionToCall() != "" { if config.FunctionToCall() != "" {

View File

@ -16,6 +16,11 @@ import (
"github.com/rs/zerolog/log" "github.com/rs/zerolog/log"
) )
// EditEndpoint is the OpenAI edit API endpoint
// @Summary OpenAI edit endpoint
// @Param request body schema.OpenAIRequest true "query params"
// @Success 200 {object} schema.OpenAIResponse "Response"
// @Router /v1/edits [post]
func EditEndpoint(cl *config.BackendConfigLoader, ml *model.ModelLoader, appConfig *config.ApplicationConfig) func(c *fiber.Ctx) error { func EditEndpoint(cl *config.BackendConfigLoader, ml *model.ModelLoader, appConfig *config.ApplicationConfig) func(c *fiber.Ctx) error {
return func(c *fiber.Ctx) error { return func(c *fiber.Ctx) error {
modelFile, input, err := readRequest(c, cl, ml, appConfig, true) modelFile, input, err := readRequest(c, cl, ml, appConfig, true)

View File

@ -9,25 +9,16 @@ import (
"time" "time"
"github.com/mudler/LocalAI/core/config" "github.com/mudler/LocalAI/core/config"
"github.com/mudler/LocalAI/core/schema"
"github.com/gofiber/fiber/v2" "github.com/gofiber/fiber/v2"
"github.com/mudler/LocalAI/pkg/utils" "github.com/mudler/LocalAI/pkg/utils"
) )
var UploadedFiles []File var UploadedFiles []schema.File
const UploadedFilesFile = "uploadedFiles.json" const UploadedFilesFile = "uploadedFiles.json"
// File represents the structure of a file object from the OpenAI API.
type File struct {
ID string `json:"id"` // Unique identifier for the file
Object string `json:"object"` // Type of the object (e.g., "file")
Bytes int `json:"bytes"` // Size of the file in bytes
CreatedAt time.Time `json:"created_at"` // The time at which the file was created
Filename string `json:"filename"` // The name of the file
Purpose string `json:"purpose"` // The purpose of the file (e.g., "fine-tune", "classifications", etc.)
}
// UploadFilesEndpoint https://platform.openai.com/docs/api-reference/files/create // UploadFilesEndpoint https://platform.openai.com/docs/api-reference/files/create
func UploadFilesEndpoint(cm *config.BackendConfigLoader, appConfig *config.ApplicationConfig) func(c *fiber.Ctx) error { func UploadFilesEndpoint(cm *config.BackendConfigLoader, appConfig *config.ApplicationConfig) func(c *fiber.Ctx) error {
return func(c *fiber.Ctx) error { return func(c *fiber.Ctx) error {
@ -61,7 +52,7 @@ func UploadFilesEndpoint(cm *config.BackendConfigLoader, appConfig *config.Appli
return c.Status(fiber.StatusInternalServerError).SendString("Failed to save file: " + err.Error()) return c.Status(fiber.StatusInternalServerError).SendString("Failed to save file: " + err.Error())
} }
f := File{ f := schema.File{
ID: fmt.Sprintf("file-%d", getNextFileId()), ID: fmt.Sprintf("file-%d", getNextFileId()),
Object: "file", Object: "file",
Bytes: int(file.Size), Bytes: int(file.Size),
@ -84,14 +75,13 @@ func getNextFileId() int64 {
} }
// ListFilesEndpoint https://platform.openai.com/docs/api-reference/files/list // ListFilesEndpoint https://platform.openai.com/docs/api-reference/files/list
// @Summary List files.
// @Success 200 {object} schema.ListFiles "Response"
// @Router /v1/files [get]
func ListFilesEndpoint(cm *config.BackendConfigLoader, appConfig *config.ApplicationConfig) func(c *fiber.Ctx) error { func ListFilesEndpoint(cm *config.BackendConfigLoader, appConfig *config.ApplicationConfig) func(c *fiber.Ctx) error {
type ListFiles struct {
Data []File
Object string
}
return func(c *fiber.Ctx) error { return func(c *fiber.Ctx) error {
var listFiles ListFiles var listFiles schema.ListFiles
purpose := c.Query("purpose") purpose := c.Query("purpose")
if purpose == "" { if purpose == "" {
@ -108,7 +98,7 @@ func ListFilesEndpoint(cm *config.BackendConfigLoader, appConfig *config.Applica
} }
} }
func getFileFromRequest(c *fiber.Ctx) (*File, error) { func getFileFromRequest(c *fiber.Ctx) (*schema.File, error) {
id := c.Params("file_id") id := c.Params("file_id")
if id == "" { if id == "" {
return nil, fmt.Errorf("file_id parameter is required") return nil, fmt.Errorf("file_id parameter is required")
@ -125,7 +115,7 @@ func getFileFromRequest(c *fiber.Ctx) (*File, error) {
// GetFilesEndpoint is the OpenAI API endpoint to get files https://platform.openai.com/docs/api-reference/files/retrieve // GetFilesEndpoint is the OpenAI API endpoint to get files https://platform.openai.com/docs/api-reference/files/retrieve
// @Summary Returns information about a specific file. // @Summary Returns information about a specific file.
// @Success 200 {object} File "Response" // @Success 200 {object} schema.File "Response"
// @Router /v1/files/{file_id} [get] // @Router /v1/files/{file_id} [get]
func GetFilesEndpoint(cm *config.BackendConfigLoader, appConfig *config.ApplicationConfig) func(c *fiber.Ctx) error { func GetFilesEndpoint(cm *config.BackendConfigLoader, appConfig *config.ApplicationConfig) func(c *fiber.Ctx) error {
return func(c *fiber.Ctx) error { return func(c *fiber.Ctx) error {

View File

@ -14,6 +14,7 @@ import (
"github.com/rs/zerolog/log" "github.com/rs/zerolog/log"
"github.com/mudler/LocalAI/core/config" "github.com/mudler/LocalAI/core/config"
"github.com/mudler/LocalAI/core/schema"
"github.com/gofiber/fiber/v2" "github.com/gofiber/fiber/v2"
utils2 "github.com/mudler/LocalAI/pkg/utils" utils2 "github.com/mudler/LocalAI/pkg/utils"
@ -22,11 +23,6 @@ import (
"testing" "testing"
) )
type ListFiles struct {
Data []File
Object string
}
func startUpApp() (app *fiber.App, option *config.ApplicationConfig, loader *config.BackendConfigLoader) { func startUpApp() (app *fiber.App, option *config.ApplicationConfig, loader *config.BackendConfigLoader) {
// Preparing the mocked objects // Preparing the mocked objects
loader = &config.BackendConfigLoader{} loader = &config.BackendConfigLoader{}
@ -159,7 +155,7 @@ func TestUploadFileExceedSizeLimit(t *testing.T) {
resp, _ := app.Test(req) resp, _ := app.Test(req)
assert.Equal(t, 200, resp.StatusCode) assert.Equal(t, 200, resp.StatusCode)
var listFiles ListFiles var listFiles schema.ListFiles
if err := json.Unmarshal(bodyToByteArray(resp, t), &listFiles); err != nil { if err := json.Unmarshal(bodyToByteArray(resp, t), &listFiles); err != nil {
t.Errorf("Failed to decode response: %v", err) t.Errorf("Failed to decode response: %v", err)
return return
@ -201,7 +197,7 @@ func CallFilesUploadEndpoint(t *testing.T, app *fiber.App, fileName, tag, purpos
return app.Test(req) return app.Test(req)
} }
func CallFilesUploadEndpointWithCleanup(t *testing.T, app *fiber.App, fileName, tag, purpose string, fileSize int, appConfig *config.ApplicationConfig) File { func CallFilesUploadEndpointWithCleanup(t *testing.T, app *fiber.App, fileName, tag, purpose string, fileSize int, appConfig *config.ApplicationConfig) schema.File {
// Create a file that exceeds the limit // Create a file that exceeds the limit
testName := strings.Split(t.Name(), "/")[1] testName := strings.Split(t.Name(), "/")[1]
file := createTestFile(t, testName+"-"+fileName, fileSize, appConfig) file := createTestFile(t, testName+"-"+fileName, fileSize, appConfig)
@ -280,8 +276,8 @@ func bodyToByteArray(resp *http.Response, t *testing.T) []byte {
return bodyBytes return bodyBytes
} }
func responseToFile(t *testing.T, resp *http.Response) File { func responseToFile(t *testing.T, resp *http.Response) schema.File {
var file File var file schema.File
responseToString := bodyToString(resp, t) responseToString := bodyToString(resp, t)
err := json.NewDecoder(strings.NewReader(responseToString)).Decode(&file) err := json.NewDecoder(strings.NewReader(responseToString)).Decode(&file)
@ -292,8 +288,8 @@ func responseToFile(t *testing.T, resp *http.Response) File {
return file return file
} }
func responseToListFile(t *testing.T, resp *http.Response) ListFiles { func responseToListFile(t *testing.T, resp *http.Response) schema.ListFiles {
var listFiles ListFiles var listFiles schema.ListFiles
responseToString := bodyToString(resp, t) responseToString := bodyToString(resp, t)
err := json.NewDecoder(strings.NewReader(responseToString)).Decode(&listFiles) err := json.NewDecoder(strings.NewReader(responseToString)).Decode(&listFiles)

View File

@ -59,16 +59,8 @@ func RegisterLocalAIRoutes(app *fiber.App,
// p2p // p2p
if p2p.IsP2PEnabled() { if p2p.IsP2PEnabled() {
app.Get("/api/p2p", auth, func(c *fiber.Ctx) error { app.Get("/api/p2p", auth, localai.ShowP2PNodes)
// Render index app.Get("/api/p2p/token", auth, localai.ShowP2PToken(appConfig))
return c.JSON(map[string]interface{}{
"Nodes": p2p.GetAvailableNodes(""),
"FederatedNodes": p2p.GetAvailableNodes(p2p.FederatedID),
})
})
app.Get("/api/p2p/token", auth, func(c *fiber.Ctx) error {
return c.Send([]byte(appConfig.P2PToken))
})
} }
app.Get("/version", auth, func(c *fiber.Ctx) error { app.Get("/version", auth, func(c *fiber.Ctx) error {

15
core/p2p/federated.go Normal file
View File

@ -0,0 +1,15 @@
package p2p
const FederatedID = "federated"
type FederatedServer struct {
listenAddr, service, p2ptoken string
}
func NewFederatedServer(listenAddr, service, p2pToken string) *FederatedServer {
return &FederatedServer{
listenAddr: listenAddr,
service: service,
p2ptoken: p2pToken,
}
}

View File

@ -0,0 +1,127 @@
//go:build p2p
// +build p2p
package p2p
import (
"context"
"errors"
"fmt"
"net"
"time"
"math/rand/v2"
"github.com/mudler/edgevpn/pkg/node"
"github.com/mudler/edgevpn/pkg/protocol"
"github.com/mudler/edgevpn/pkg/types"
"github.com/rs/zerolog/log"
)
func (f *FederatedServer) Start(ctx context.Context) error {
n, err := NewNode(f.p2ptoken)
if err != nil {
return fmt.Errorf("creating a new node: %w", err)
}
err = n.Start(ctx)
if err != nil {
return fmt.Errorf("creating a new node: %w", err)
}
if err := ServiceDiscoverer(ctx, n, f.p2ptoken, f.service, func(servicesID string, tunnel NodeData) {
log.Debug().Msgf("Discovered node: %s", tunnel.ID)
}); err != nil {
return err
}
return f.proxy(ctx, n)
}
func (fs *FederatedServer) proxy(ctx context.Context, node *node.Node) error {
log.Info().Msgf("Allocating service '%s' on: %s", fs.service, fs.listenAddr)
// Open local port for listening
l, err := net.Listen("tcp", fs.listenAddr)
if err != nil {
log.Error().Err(err).Msg("Error listening")
return err
}
// ll.Info("Binding local port on", srcaddr)
ledger, _ := node.Ledger()
// Announce ourselves so nodes accepts our connection
ledger.Announce(
ctx,
10*time.Second,
func() {
// Retrieve current ID for ip in the blockchain
//_, found := ledger.GetKey(protocol.UsersLedgerKey, node.Host().ID().String())
// If mismatch, update the blockchain
//if !found {
updatedMap := map[string]interface{}{}
updatedMap[node.Host().ID().String()] = &types.User{
PeerID: node.Host().ID().String(),
Timestamp: time.Now().String(),
}
ledger.Add(protocol.UsersLedgerKey, updatedMap)
// }
},
)
defer l.Close()
for {
select {
case <-ctx.Done():
return errors.New("context canceled")
default:
log.Debug().Msg("New for connection")
// Listen for an incoming connection.
conn, err := l.Accept()
if err != nil {
fmt.Println("Error accepting: ", err.Error())
continue
}
// Handle connections in a new goroutine, forwarding to the p2p service
go func() {
var tunnelAddresses []string
for _, v := range GetAvailableNodes(fs.service) {
if v.IsOnline() {
tunnelAddresses = append(tunnelAddresses, v.TunnelAddress)
} else {
log.Info().Msgf("Node %s is offline", v.ID)
}
}
if len(tunnelAddresses) == 0 {
log.Error().Msg("No available nodes yet")
return
}
// open a TCP stream to one of the tunnels
// chosen randomly
// TODO: optimize this and track usage
tunnelAddr := tunnelAddresses[rand.IntN(len(tunnelAddresses))]
tunnelConn, err := net.Dial("tcp", tunnelAddr)
if err != nil {
log.Error().Err(err).Msg("Error connecting to tunnel")
return
}
log.Info().Msgf("Redirecting %s to %s", conn.LocalAddr().String(), tunnelConn.RemoteAddr().String())
closer := make(chan struct{}, 2)
go copyStream(closer, tunnelConn, conn)
go copyStream(closer, conn, tunnelConn)
<-closer
tunnelConn.Close()
conn.Close()
// ll.Infof("(service %s) Done handling %s", serviceID, l.Addr().String())
}()
}
}
}

View File

@ -6,7 +6,6 @@ import (
) )
const defaultServicesID = "services_localai" const defaultServicesID = "services_localai"
const FederatedID = "federated"
type NodeData struct { type NodeData struct {
Name string Name string

View File

@ -137,14 +137,9 @@ func allocateLocalService(ctx context.Context, node *node.Node, listenAddr, serv
} }
func copyStream(closer chan struct{}, dst io.Writer, src io.Reader) {
defer func() { closer <- struct{}{} }() // connection is closed, send signal to stop proxy
io.Copy(dst, src)
}
// This is the main of the server (which keeps the env variable updated) // This is the main of the server (which keeps the env variable updated)
// This starts a goroutine that keeps LLAMACPP_GRPC_SERVERS updated with the discovered services // This starts a goroutine that keeps LLAMACPP_GRPC_SERVERS updated with the discovered services
func ServiceDiscoverer(ctx context.Context, n *node.Node, token, servicesID string, discoveryFunc func()) error { func ServiceDiscoverer(ctx context.Context, n *node.Node, token, servicesID string, discoveryFunc func(serviceID string, node NodeData)) error {
if servicesID == "" { if servicesID == "" {
servicesID = defaultServicesID servicesID = defaultServicesID
} }
@ -166,7 +161,7 @@ func ServiceDiscoverer(ctx context.Context, n *node.Node, token, servicesID stri
case tunnel := <-tunnels: case tunnel := <-tunnels:
AddNode(servicesID, tunnel) AddNode(servicesID, tunnel)
if discoveryFunc != nil { if discoveryFunc != nil {
discoveryFunc() discoveryFunc(servicesID, tunnel)
} }
} }
} }
@ -396,3 +391,8 @@ func newNodeOpts(token string) ([]node.Option, error) {
return nodeOpts, nil return nodeOpts, nil
} }
func copyStream(closer chan struct{}, dst io.Writer, src io.Reader) {
defer func() { closer <- struct{}{} }() // connection is closed, send signal to stop proxy
io.Copy(dst, src)
}

View File

@ -14,7 +14,11 @@ func GenerateToken() string {
return "not implemented" return "not implemented"
} }
func ServiceDiscoverer(ctx context.Context, node *node.Node, token, servicesID string, fn func()) error { func (f *FederatedServer) Start(ctx context.Context) error {
return fmt.Errorf("not implemented")
}
func ServiceDiscoverer(ctx context.Context, node *node.Node, token, servicesID string, fn func(string, NodeData)) error {
return fmt.Errorf("not implemented") return fmt.Errorf("not implemented")
} }

View File

@ -1,6 +1,7 @@
package schema package schema
import ( import (
"github.com/mudler/LocalAI/core/p2p"
gopsutil "github.com/shirou/gopsutil/v3/process" gopsutil "github.com/shirou/gopsutil/v3/process"
) )
@ -14,6 +15,11 @@ type BackendMonitorResponse struct {
CPUPercent float64 CPUPercent float64
} }
type GalleryResponse struct {
ID string `json:"uuid"`
StatusURL string `json:"status"`
}
// @Description TTS request body // @Description TTS request body
type TTSRequest struct { type TTSRequest struct {
Model string `json:"model" yaml:"model"` // model name or full path Model string `json:"model" yaml:"model"` // model name or full path
@ -59,3 +65,8 @@ type StoresFindResponse struct {
Values []string `json:"values" yaml:"values"` Values []string `json:"values" yaml:"values"`
Similarities []float32 `json:"similarities" yaml:"similarities"` Similarities []float32 `json:"similarities" yaml:"similarities"`
} }
type P2PNodesResponse struct {
Nodes []p2p.NodeData `json:"nodes" yaml:"nodes"`
FederatedNodes []p2p.NodeData `json:"federated_nodes" yaml:"federated_nodes"`
}

View File

@ -2,6 +2,7 @@ package schema
import ( import (
"context" "context"
"time"
functions "github.com/mudler/LocalAI/pkg/functions" functions "github.com/mudler/LocalAI/pkg/functions"
) )
@ -99,6 +100,37 @@ type OpenAIModel struct {
Object string `json:"object"` Object string `json:"object"`
} }
type DeleteAssistantResponse struct {
ID string `json:"id"`
Object string `json:"object"`
Deleted bool `json:"deleted"`
}
// File represents the structure of a file object from the OpenAI API.
type File struct {
ID string `json:"id"` // Unique identifier for the file
Object string `json:"object"` // Type of the object (e.g., "file")
Bytes int `json:"bytes"` // Size of the file in bytes
CreatedAt time.Time `json:"created_at"` // The time at which the file was created
Filename string `json:"filename"` // The name of the file
Purpose string `json:"purpose"` // The purpose of the file (e.g., "fine-tune", "classifications", etc.)
}
type ListFiles struct {
Data []File
Object string
}
type AssistantFileRequest struct {
FileID string `json:"file_id"`
}
type DeleteAssistantFileResponse struct {
ID string `json:"id"`
Object string `json:"object"`
Deleted bool `json:"deleted"`
}
type ImageGenerationResponseFormat string type ImageGenerationResponseFormat string
type ChatCompletionResponseFormatType string type ChatCompletionResponseFormatType string
@ -147,8 +179,7 @@ type OpenAIRequest struct {
// A grammar to constrain the LLM output // A grammar to constrain the LLM output
Grammar string `json:"grammar" yaml:"grammar"` Grammar string `json:"grammar" yaml:"grammar"`
JSONFunctionGrammarObject *functions.JSONFunctionStructureFunction `json:"grammar_json_functions" yaml:"grammar_json_functions"` JSONFunctionGrammarObject *functions.JSONFunctionStructure `json:"grammar_json_functions" yaml:"grammar_json_functions"`
JSONFunctionGrammarObjectName *functions.JSONFunctionStructureName `json:"grammar_json_name" yaml:"grammar_json_name"`
Backend string `json:"backend" yaml:"backend"` Backend string `json:"backend" yaml:"backend"`

View File

@ -9,7 +9,7 @@ import (
"time" "time"
"github.com/fsnotify/fsnotify" "github.com/fsnotify/fsnotify"
"github.com/imdario/mergo" "dario.cat/mergo"
"github.com/mudler/LocalAI/core/config" "github.com/mudler/LocalAI/core/config"
"github.com/rs/zerolog/log" "github.com/rs/zerolog/log"
) )

View File

@ -112,6 +112,8 @@ name: "" # Model name, used to identify the model in API calls.
# Precision settings for the model, reducing precision can enhance performance on some hardware. # Precision settings for the model, reducing precision can enhance performance on some hardware.
f16: null # Whether to use 16-bit floating-point precision. f16: null # Whether to use 16-bit floating-point precision.
embeddings: true # Enable embeddings for the model.
# Concurrency settings for the application. # Concurrency settings for the application.
threads: null # Number of threads to use for processing. threads: null # Number of threads to use for processing.
@ -150,7 +152,8 @@ function:
replace_function_results: [] # Placeholder to replace function call results with arbitrary strings or patterns. replace_function_results: [] # Placeholder to replace function call results with arbitrary strings or patterns.
replace_llm_results: [] # Replace language model results with arbitrary strings or patterns. replace_llm_results: [] # Replace language model results with arbitrary strings or patterns.
capture_llm_results: [] # Capture language model results as text result, among JSON, in function calls. For instance, if a model returns a block for "thinking" and a block for "response", this will allow you to capture the thinking block. capture_llm_results: [] # Capture language model results as text result, among JSON, in function calls. For instance, if a model returns a block for "thinking" and a block for "response", this will allow you to capture the thinking block.
return_name_in_function_response: false # Some models might prefer to use "name" rather then "function" when returning JSON data. This will allow to use "name" as a key in the JSON response. function_name_key: "name"
function_arguments_key: "arguments"
# Feature gating flags to enable experimental or optional features. # Feature gating flags to enable experimental or optional features.
feature_flags: {} feature_flags: {}

View File

@ -8,9 +8,9 @@ icon = "rocket_launch"
## Running other models ## Running other models
> _Do you have already a model file? Skip to [Run models manually]({{%relref "docs/getting-started/manual" %}})_. > _Do you have already a model file? Skip to [Run models manually]({{%relref "docs/getting-started/models" %}})_.
To load models into LocalAI, you can either [use models manually]({{%relref "docs/getting-started/manual" %}}) or configure LocalAI to pull the models from external sources, like Huggingface and configure the model. To load models into LocalAI, you can either [use models manually]({{%relref "docs/getting-started/models" %}}) or configure LocalAI to pull the models from external sources, like Huggingface and configure the model.
To do that, you can point LocalAI to an URL to a YAML configuration file - however - LocalAI does also have some popular model configuration embedded in the binary as well. Below you can find a list of the models configuration that LocalAI has pre-built, see [Model customization]({{%relref "docs/getting-started/customize-model" %}}) on how to configure models from URLs. To do that, you can point LocalAI to an URL to a YAML configuration file - however - LocalAI does also have some popular model configuration embedded in the binary as well. Below you can find a list of the models configuration that LocalAI has pre-built, see [Model customization]({{%relref "docs/getting-started/customize-model" %}}) on how to configure models from URLs.

View File

@ -16,6 +16,10 @@ Here are answers to some of the most common questions.
Most gguf-based models should work, but newer models may require additions to the API. If a model doesn't work, please feel free to open up issues. However, be cautious about downloading models from the internet and directly onto your machine, as there may be security vulnerabilities in lama.cpp or ggml that could be maliciously exploited. Some models can be found on Hugging Face: https://huggingface.co/models?search=gguf, or models from gpt4all are compatible too: https://github.com/nomic-ai/gpt4all. Most gguf-based models should work, but newer models may require additions to the API. If a model doesn't work, please feel free to open up issues. However, be cautious about downloading models from the internet and directly onto your machine, as there may be security vulnerabilities in lama.cpp or ggml that could be maliciously exploited. Some models can be found on Hugging Face: https://huggingface.co/models?search=gguf, or models from gpt4all are compatible too: https://github.com/nomic-ai/gpt4all.
### Benchmarking LocalAI and llama.cpp shows different results!
LocalAI applies a set of defaults when loading models with the llama.cpp backend, one of these is mirostat sampling - while it achieves better results, it slows down the inference. You can disable this by setting `mirostat: 0` in the model config file. See also the advanced section ({{%relref "docs/advanced/advanced-usage" %}}) for more information and [this issue](https://github.com/mudler/LocalAI/issues/2780).
### What's the difference with Serge, or XXX? ### What's the difference with Serge, or XXX?
LocalAI is a multi-model solution that doesn't focus on a specific model type (e.g., llama.cpp or alpaca.cpp), and it handles all of these internally for faster inference, easy to set up locally and deploy to Kubernetes. LocalAI is a multi-model solution that doesn't focus on a specific model type (e.g., llama.cpp or alpaca.cpp), and it handles all of these internally for faster inference, easy to set up locally and deploy to Kubernetes.

View File

@ -55,8 +55,8 @@ apt install cmake golang libgrpc-dev make protobuf-compiler-grpc python3-grpc-to
After you have golang installed and working, you can install the required binaries for compiling the golang protobuf components via the following commands After you have golang installed and working, you can install the required binaries for compiling the golang protobuf components via the following commands
```bash ```bash
go install google.golang.org/protobuf/cmd/protoc-gen-go@v1.34.0 go install google.golang.org/protobuf/cmd/protoc-gen-go@v1.34.2
go install google.golang.org/grpc/cmd/protoc-gen-go-grpc@8ba23be9613c672d40ae261d2a1335d639bdd59b go install google.golang.org/grpc/cmd/protoc-gen-go-grpc@1958fcbe2ca8bd93af633f11e97d44e567e945af
``` ```

View File

@ -1,21 +1,69 @@
--- +++
disableToc = false
title = "Install and Run Models"
weight = 4
icon = "rocket_launch"
+++
disableToc: false To install models with LocalAI, you can:
title: "Run models manually"
weight: 5
icon: "rocket_launch"
--- - Browse the Model Gallery from the Web Interface and install models with a couple of clicks. For more details, refer to the [Gallery Documentation]({{% relref "docs/features/model-gallery" %}}).
- Specify a model from the LocalAI gallery during startup, e.g., `local-ai run <model_gallery_name>`.
- Use a URI to specify a model file (e.g., `huggingface://...`, `oci://`, or `ollama://`) when starting LocalAI, e.g., `local-ai run huggingface://TheBloke/phi-2-GGUF/phi-2.Q8_0.gguf`.
- Specify a URL to a model configuration file when starting LocalAI, e.g., `local-ai run https://gist.githubusercontent.com/.../phi-2.yaml`.
- Manually install the models by copying the files into the models directory (`--models`).
# Run Models Manually ## Run and Install Models via the Gallery
To run models available in the LocalAI gallery, you can use the WebUI or specify the model name when starting LocalAI. Models can be found in the gallery via the Web interface, the [model gallery](https://models.localai.io), or the CLI with: `local-ai models list`.
To install a model from the gallery, use the model name as the URI. For example, to run LocalAI with the Hermes model, execute:
```bash
local-ai run hermes-2-theta-llama-3-8b
```
To install only the model, use:
```bash
local-ai models install hermes-2-theta-llama-3-8b
```
Note: The galleries available in LocalAI can be customized to point to a different URL or a local directory. For more information on how to setup your own gallery, see the [Gallery Documentation]({{% relref "docs/features/model-gallery" %}}).
## Run Models via URI
To run models via URI, specify a URI to a model file or a configuration file when starting LocalAI. Valid syntax includes:
- `file://path/to/model`
- `huggingface://repository_id/model_file` (e.g., `huggingface://TheBloke/phi-2-GGUF/phi-2.Q8_0.gguf`)
- From OCIs: `oci://container_image:tag`, `ollama://model_id:tag`
- From configuration files: `https://gist.githubusercontent.com/.../phi-2.yaml`
Configuration files can be used to customize the model defaults and settings. For advanced configurations, refer to the [Customize Models section]({{% relref "docs/getting-started/customize-model" %}}).
### Examples
```bash
# Start LocalAI with the phi-2 model
local-ai run huggingface://TheBloke/phi-2-GGUF/phi-2.Q8_0.gguf
# Install and run a model from the Ollama OCI registry
local-ai run ollama://gemma:2b
# Run a model from a configuration file
local-ai run https://gist.githubusercontent.com/.../phi-2.yaml
# Install and run a model from a standard OCI registry (e.g., Docker Hub)
local-ai run oci://localai/phi-2:latest
```
## Run Models Manually
Follow these steps to manually run models using LocalAI: Follow these steps to manually run models using LocalAI:
1. **Prepare Your Model and Configuration Files**: 1. **Prepare Your Model and Configuration Files**:
Ensure you have a model file and a configuration YAML file, if necessary. Customize model defaults and specific settings with a configuration file. For advanced configurations, refer to the [Advanced Documentation]({{% relref "docs/advanced" %}}). Ensure you have a model file and, if necessary, a configuration YAML file. Customize model defaults and settings with a configuration file. For advanced configurations, refer to the [Advanced Documentation]({{% relref "docs/advanced" %}}).
2. **GPU Acceleration**: 2. **GPU Acceleration**:
For instructions on GPU acceleration, visit the [GPU acceleration]({{% relref "docs/features/gpu-acceleration" %}}) page. For instructions on GPU acceleration, visit the [GPU Acceleration]({{% relref "docs/features/gpu-acceleration" %}}) page.
3. **Run LocalAI**: 3. **Run LocalAI**:
Choose one of the following methods to run LocalAI: Choose one of the following methods to run LocalAI:
@ -160,5 +208,3 @@ For instructions on building LocalAI from source, see the [Build Section]({{% re
{{< /tabs >}} {{< /tabs >}}
For more model configurations, visit the [Examples Section](https://github.com/mudler/LocalAI/tree/master/examples/configurations). For more model configurations, visit the [Examples Section](https://github.com/mudler/LocalAI/tree/master/examples/configurations).
---

View File

@ -38,13 +38,13 @@ For detailed instructions, see [Using container images]({{% relref "docs/getting
## Running LocalAI with All-in-One (AIO) Images ## Running LocalAI with All-in-One (AIO) Images
> _Already have a model file? Skip to [Run models manually]({{% relref "docs/getting-started/manual" %}})_. > _Already have a model file? Skip to [Run models manually]({{% relref "docs/getting-started/models" %}})_.
LocalAI's All-in-One (AIO) images are pre-configured with a set of models and backends to fully leverage almost all the features of LocalAI. If pre-configured models are not required, you can use the standard [images]({{% relref "docs/getting-started/container-images" %}}). LocalAI's All-in-One (AIO) images are pre-configured with a set of models and backends to fully leverage almost all the features of LocalAI. If pre-configured models are not required, you can use the standard [images]({{% relref "docs/getting-started/container-images" %}}).
These images are available for both CPU and GPU environments. AIO images are designed for ease of use and require no additional configuration. These images are available for both CPU and GPU environments. AIO images are designed for ease of use and require no additional configuration.
It is recommended to use AIO images if you prefer not to configure the models manually or via the web interface. For running specific models, refer to the [manual method]({{% relref "docs/getting-started/manual" %}}). It is recommended to use AIO images if you prefer not to configure the models manually or via the web interface. For running specific models, refer to the [manual method]({{% relref "docs/getting-started/models" %}}).
The AIO images come pre-configured with the following features: The AIO images come pre-configured with the following features:
- Text to Speech (TTS) - Text to Speech (TTS)
@ -66,5 +66,5 @@ Explore additional resources and community contributions:
- [Run from Container images]({{% relref "docs/getting-started/container-images" %}}) - [Run from Container images]({{% relref "docs/getting-started/container-images" %}})
- [Examples to try from the CLI]({{% relref "docs/getting-started/try-it-out" %}}) - [Examples to try from the CLI]({{% relref "docs/getting-started/try-it-out" %}})
- [Build LocalAI and the container image]({{% relref "docs/getting-started/build" %}}) - [Build LocalAI and the container image]({{% relref "docs/getting-started/build" %}})
- [Run models manually]({{% relref "docs/getting-started/manual" %}}) - [Run models manually]({{% relref "docs/getting-started/models" %}})
- [Examples](https://github.com/mudler/LocalAI/tree/master/examples#examples) - [Examples](https://github.com/mudler/LocalAI/tree/master/examples#examples)

View File

@ -17,10 +17,10 @@ After installation, install new models by navigating the model gallery, or by us
To install models with the WebUI, see the [Models section]({{%relref "docs/features/model-gallery" %}}). To install models with the WebUI, see the [Models section]({{%relref "docs/features/model-gallery" %}}).
With the CLI you can list the models with `local-ai models list` and install them with `local-ai models install <model-name>`. With the CLI you can list the models with `local-ai models list` and install them with `local-ai models install <model-name>`.
You can also [run models manually]({{%relref "docs/getting-started/manual" %}}) by copying files into the `models` directory. You can also [run models manually]({{%relref "docs/getting-started/models" %}}) by copying files into the `models` directory.
{{% /alert %}} {{% /alert %}}
You can test out the API endpoints using `curl`, few examples are listed below. The models we are refering here (`gpt-4`, `gpt-4-vision-preview`, `tts-1`, `whisper-1`) are the default models that come with the AIO images - you can also use any other model you have installed. You can test out the API endpoints using `curl`, few examples are listed below. The models we are referring here (`gpt-4`, `gpt-4-vision-preview`, `tts-1`, `whisper-1`) are the default models that come with the AIO images - you can also use any other model you have installed.
### Text Generation ### Text Generation
@ -193,4 +193,4 @@ Don't use the model file as `model` in the request unless you want to handle the
Use the model names like you would do with OpenAI like in the examples below. For instance `gpt-4-vision-preview`, or `gpt-4`. Use the model names like you would do with OpenAI like in the examples below. For instance `gpt-4-vision-preview`, or `gpt-4`.
{{% /alert %}} {{% /alert %}}

@ -1 +1 @@
Subproject commit c25bc2a27ab46649393ef7b310e14fff1311116d Subproject commit 1b2e139512106f8074ac7d4a884135d159720cc4

View File

@ -1,5 +1,5 @@
# Use an official Python runtime as a parent image # Use an official Python runtime as a parent image
FROM harbor.home.sfxworks.net/docker/library/python:3.9-slim FROM python:3.12-slim
# Set the working directory in the container # Set the working directory in the container
WORKDIR /app WORKDIR /app
@ -7,8 +7,17 @@ WORKDIR /app
# Copy the current directory contents into the container at /app # Copy the current directory contents into the container at /app
COPY requirements.txt /app COPY requirements.txt /app
# Install c++ compiler
RUN apt-get update \
&& DEBIAN_FRONTEND=noninteractive apt-get install -y build-essential \
&& apt-get clean \
&& rm -rf /var/lib/apt/lists/*
# Install any needed packages specified in requirements.txt # Install any needed packages specified in requirements.txt
RUN pip install -r requirements.txt RUN pip install --no-cache-dir -r requirements.txt \
&& DEBIAN_FRONTEND=noninteractive apt-get remove -y build-essential \
&& apt-get clean \
&& rm -rf /var/lib/apt/lists/*
COPY . /app COPY . /app

View File

@ -1,7 +1,6 @@
llama_hub==0.0.41 llama_index==0.10.55
llama_index==0.8.55 requests==2.32.3
Requests==2.31.0 weaviate_client==4.6.5
weaviate_client==3.25.1
transformers transformers
torch torch
chainlit chainlit

View File

@ -1,5 +1,5 @@
FROM python:3.10-bullseye FROM python:3.12-slim-bullseye
COPY . /app COPY . /app
WORKDIR /app WORKDIR /app
RUN pip install --no-cache-dir -r requirements.txt RUN pip install --no-cache-dir -r requirements.txt
ENTRYPOINT [ "python", "./functions-openai.py" ]; ENTRYPOINT [ "python", "./functions-openai.py" ]

View File

@ -1,2 +1,2 @@
langchain==0.1.0 langchain==0.2.8
openai==0.27.8 openai==1.35.13

View File

@ -5,7 +5,7 @@ metadata:
spec: spec:
containers: containers:
- name: broken-pod - name: broken-pod
image: nginx:1.a.b.c image: nginx:1.27.0
livenessProbe: livenessProbe:
httpGet: httpGet:
path: / path: /

View File

@ -2,7 +2,7 @@ replicaCount: 1
deployment: deployment:
# https://quay.io/repository/go-skynet/local-ai?tab=tags # https://quay.io/repository/go-skynet/local-ai?tab=tags
image: quay.io/go-skynet/local-ai:v1.23.0 image: quay.io/go-skynet/local-ai:v1.40.0
env: env:
threads: 4 threads: 4
debug: "true" debug: "true"
@ -93,4 +93,4 @@ nodeSelector: {}
tolerations: [] tolerations: []
affinity: {} affinity: {}

View File

@ -1,4 +1,4 @@
langchain==0.1.0 langchain==0.2.8
openai==0.27.6 openai==1.35.13
chromadb==0.3.21 chromadb==0.5.4
llama-index==0.9.36 llama-index==0.10.55

View File

@ -1,6 +1,6 @@
FROM node:latest FROM node:lts-alpine
COPY ./langchainjs-localai-example /app COPY ./langchainjs-localai-example /app
WORKDIR /app WORKDIR /app
RUN npm install RUN npm install
RUN npm run build RUN npm run build
ENTRYPOINT [ "npm", "run", "start" ] ENTRYPOINT [ "npm", "run", "start" ]

View File

@ -1,5 +1,5 @@
FROM python:3.10-bullseye FROM python:3.12-bullseye
COPY ./langchainpy-localai-example /app COPY ./langchainpy-localai-example /app
WORKDIR /app WORKDIR /app
RUN pip install --no-cache-dir -r requirements.txt RUN pip install --no-cache-dir -r requirements.txt
ENTRYPOINT [ "python", "./full_demo.py" ]; ENTRYPOINT [ "python", "./full_demo.py" ]

View File

@ -1,32 +1,33 @@
aiohttp==3.9.4 aiohttp==3.9.5
aiosignal==1.3.1 aiosignal==1.3.1
async-timeout==4.0.2 async-timeout==4.0.3
attrs==23.1.0 attrs==23.2.0
certifi==2023.7.22 certifi==2024.7.4
charset-normalizer==3.1.0 charset-normalizer==3.3.2
colorama==0.4.6 colorama==0.4.6
dataclasses-json==0.5.7 dataclasses-json==0.6.7
debugpy==1.6.7 debugpy==1.8.2
frozenlist==1.3.3 frozenlist==1.4.1
greenlet==2.0.2 greenlet==3.0.3
idna==3.7 idna==3.7
langchain==0.1.0 langchain==0.2.8
marshmallow==3.19.0 langchain-community==0.2.7
marshmallow==3.21.3
marshmallow-enum==1.5.1 marshmallow-enum==1.5.1
multidict==6.0.4 multidict==6.0.5
mypy-extensions==1.0.0 mypy-extensions==1.0.0
numexpr==2.8.4 numexpr==2.10.1
numpy==1.24.3 numpy==1.26.4
openai==0.27.6 openai==1.35.13
openapi-schema-pydantic==1.2.4 openapi-schema-pydantic==1.2.4
packaging==23.1 packaging>=23.2
pydantic==1.10.13 pydantic==2.8.2
PyYAML==6.0 PyYAML==6.0.1
requests==2.31.0 requests==2.32.3
SQLAlchemy==2.0.12 SQLAlchemy==2.0.30
tenacity==8.2.2 tenacity==8.5.0
tqdm==4.66.3 tqdm==4.66.4
typing-inspect==0.8.0 typing-inspect==0.9.0
typing_extensions==4.5.0 typing_extensions==4.12.2
urllib3==1.26.18 urllib3==2.2.2
yarl==1.9.2 yarl==1.9.4

View File

@ -1,10 +1,10 @@
module semantic-todo module semantic-todo
go 1.21.6 go 1.22
require ( require (
github.com/gdamore/tcell/v2 v2.7.1 github.com/gdamore/tcell/v2 v2.7.4
github.com/rivo/tview v0.0.0-20240307173318-e804876934a1 github.com/rivo/tview v0.0.0-20240524063012-037df494fb76
) )
require ( require (

Some files were not shown because too many files have changed in this diff Show More