mirror of
https://github.com/mudler/LocalAI.git
synced 2024-12-18 20:27:57 +00:00
Merge branch 'master' into ci/static-check
This commit is contained in:
commit
c28e8ca697
112
.github/dependabot.yml
vendored
112
.github/dependabot.yml
vendored
@ -1,6 +1,10 @@
|
|||||||
# https://docs.github.com/en/code-security/dependabot/dependabot-version-updates/configuration-options-for-the-dependabot.yml-file
|
# https://docs.github.com/en/code-security/dependabot/dependabot-version-updates/configuration-options-for-the-dependabot.yml-file
|
||||||
version: 2
|
version: 2
|
||||||
updates:
|
updates:
|
||||||
|
- package-ecosystem: "gitsubmodule"
|
||||||
|
directory: "/"
|
||||||
|
schedule:
|
||||||
|
interval: "weekly"
|
||||||
- package-ecosystem: "gomod"
|
- package-ecosystem: "gomod"
|
||||||
directory: "/"
|
directory: "/"
|
||||||
schedule:
|
schedule:
|
||||||
@ -23,3 +27,111 @@ updates:
|
|||||||
schedule:
|
schedule:
|
||||||
# Check for updates to GitHub Actions every weekday
|
# Check for updates to GitHub Actions every weekday
|
||||||
interval: "weekly"
|
interval: "weekly"
|
||||||
|
- package-ecosystem: "pip"
|
||||||
|
directory: "/backend/python/autogptq"
|
||||||
|
schedule:
|
||||||
|
interval: "weekly"
|
||||||
|
- package-ecosystem: "pip"
|
||||||
|
directory: "/backend/python/bark"
|
||||||
|
schedule:
|
||||||
|
interval: "weekly"
|
||||||
|
- package-ecosystem: "pip"
|
||||||
|
directory: "/backend/python/common/template"
|
||||||
|
schedule:
|
||||||
|
interval: "weekly"
|
||||||
|
- package-ecosystem: "pip"
|
||||||
|
directory: "/backend/python/coqui"
|
||||||
|
schedule:
|
||||||
|
interval: "weekly"
|
||||||
|
- package-ecosystem: "pip"
|
||||||
|
directory: "/backend/python/diffusers"
|
||||||
|
schedule:
|
||||||
|
interval: "weekly"
|
||||||
|
- package-ecosystem: "pip"
|
||||||
|
directory: "/backend/python/exllama"
|
||||||
|
schedule:
|
||||||
|
interval: "weekly"
|
||||||
|
- package-ecosystem: "pip"
|
||||||
|
directory: "/backend/python/exllama2"
|
||||||
|
schedule:
|
||||||
|
interval: "weekly"
|
||||||
|
- package-ecosystem: "pip"
|
||||||
|
directory: "/backend/python/mamba"
|
||||||
|
schedule:
|
||||||
|
interval: "weekly"
|
||||||
|
- package-ecosystem: "pip"
|
||||||
|
directory: "/backend/python/openvoice"
|
||||||
|
schedule:
|
||||||
|
interval: "weekly"
|
||||||
|
- package-ecosystem: "pip"
|
||||||
|
directory: "/backend/python/parler-tts"
|
||||||
|
schedule:
|
||||||
|
interval: "weekly"
|
||||||
|
- package-ecosystem: "pip"
|
||||||
|
directory: "/backend/python/petals"
|
||||||
|
schedule:
|
||||||
|
interval: "weekly"
|
||||||
|
- package-ecosystem: "pip"
|
||||||
|
directory: "/backend/python/rerankers"
|
||||||
|
schedule:
|
||||||
|
interval: "weekly"
|
||||||
|
- package-ecosystem: "pip"
|
||||||
|
directory: "/backend/python/sentencetransformers"
|
||||||
|
schedule:
|
||||||
|
interval: "weekly"
|
||||||
|
- package-ecosystem: "pip"
|
||||||
|
directory: "/backend/python/transformers"
|
||||||
|
schedule:
|
||||||
|
interval: "weekly"
|
||||||
|
- package-ecosystem: "pip"
|
||||||
|
directory: "/backend/python/transformers-musicgen"
|
||||||
|
schedule:
|
||||||
|
interval: "weekly"
|
||||||
|
- package-ecosystem: "pip"
|
||||||
|
directory: "/backend/python/vall-e-x"
|
||||||
|
schedule:
|
||||||
|
interval: "weekly"
|
||||||
|
- package-ecosystem: "pip"
|
||||||
|
directory: "/backend/python/vllm"
|
||||||
|
schedule:
|
||||||
|
interval: "weekly"
|
||||||
|
- package-ecosystem: "pip"
|
||||||
|
directory: "/examples/chainlit"
|
||||||
|
schedule:
|
||||||
|
interval: "weekly"
|
||||||
|
- package-ecosystem: "pip"
|
||||||
|
directory: "/examples/functions"
|
||||||
|
schedule:
|
||||||
|
interval: "weekly"
|
||||||
|
- package-ecosystem: "pip"
|
||||||
|
directory: "/examples/langchain/langchainpy-localai-example"
|
||||||
|
schedule:
|
||||||
|
interval: "weekly"
|
||||||
|
- package-ecosystem: "pip"
|
||||||
|
directory: "/examples/langchain-chroma"
|
||||||
|
schedule:
|
||||||
|
interval: "weekly"
|
||||||
|
- package-ecosystem: "pip"
|
||||||
|
directory: "/examples/streamlit-bot"
|
||||||
|
schedule:
|
||||||
|
interval: "weekly"
|
||||||
|
- package-ecosystem: "docker"
|
||||||
|
directory: "/examples/k8sgpt"
|
||||||
|
schedule:
|
||||||
|
interval: "weekly"
|
||||||
|
- package-ecosystem: "docker"
|
||||||
|
directory: "/examples/kubernetes"
|
||||||
|
schedule:
|
||||||
|
interval: "weekly"
|
||||||
|
- package-ecosystem: "docker"
|
||||||
|
directory: "/examples/langchain"
|
||||||
|
schedule:
|
||||||
|
interval: "weekly"
|
||||||
|
- package-ecosystem: "gomod"
|
||||||
|
directory: "/examples/semantic-todo"
|
||||||
|
schedule:
|
||||||
|
interval: "weekly"
|
||||||
|
- package-ecosystem: "docker"
|
||||||
|
directory: "/examples/telegram-bot"
|
||||||
|
schedule:
|
||||||
|
interval: "weekly"
|
||||||
|
5
.github/workflows/bump_deps.yaml
vendored
5
.github/workflows/bump_deps.yaml
vendored
@ -27,9 +27,6 @@ jobs:
|
|||||||
- repository: "go-skynet/bloomz.cpp"
|
- repository: "go-skynet/bloomz.cpp"
|
||||||
variable: "BLOOMZ_VERSION"
|
variable: "BLOOMZ_VERSION"
|
||||||
branch: "main"
|
branch: "main"
|
||||||
- repository: "nomic-ai/gpt4all"
|
|
||||||
variable: "GPT4ALL_VERSION"
|
|
||||||
branch: "main"
|
|
||||||
- repository: "mudler/go-ggllm.cpp"
|
- repository: "mudler/go-ggllm.cpp"
|
||||||
variable: "GOGGLLM_VERSION"
|
variable: "GOGGLLM_VERSION"
|
||||||
branch: "master"
|
branch: "master"
|
||||||
@ -51,7 +48,7 @@ jobs:
|
|||||||
token: ${{ secrets.UPDATE_BOT_TOKEN }}
|
token: ${{ secrets.UPDATE_BOT_TOKEN }}
|
||||||
push-to-fork: ci-forks/LocalAI
|
push-to-fork: ci-forks/LocalAI
|
||||||
commit-message: ':arrow_up: Update ${{ matrix.repository }}'
|
commit-message: ':arrow_up: Update ${{ matrix.repository }}'
|
||||||
title: ':arrow_up: Update ${{ matrix.repository }}'
|
title: 'chore: :arrow_up: Update ${{ matrix.repository }}'
|
||||||
branch: "update/${{ matrix.variable }}"
|
branch: "update/${{ matrix.variable }}"
|
||||||
body: Bump of ${{ matrix.repository }} version
|
body: Bump of ${{ matrix.repository }} version
|
||||||
signoff: true
|
signoff: true
|
||||||
|
2
.github/workflows/bump_docs.yaml
vendored
2
.github/workflows/bump_docs.yaml
vendored
@ -22,7 +22,7 @@ jobs:
|
|||||||
token: ${{ secrets.UPDATE_BOT_TOKEN }}
|
token: ${{ secrets.UPDATE_BOT_TOKEN }}
|
||||||
push-to-fork: ci-forks/LocalAI
|
push-to-fork: ci-forks/LocalAI
|
||||||
commit-message: ':arrow_up: Update docs version ${{ matrix.repository }}'
|
commit-message: ':arrow_up: Update docs version ${{ matrix.repository }}'
|
||||||
title: ':arrow_up: Update docs version ${{ matrix.repository }}'
|
title: 'docs: :arrow_up: update docs version ${{ matrix.repository }}'
|
||||||
branch: "update/docs"
|
branch: "update/docs"
|
||||||
body: Bump of ${{ matrix.repository }} version inside docs
|
body: Bump of ${{ matrix.repository }} version inside docs
|
||||||
signoff: true
|
signoff: true
|
||||||
|
4
.github/workflows/checksum_checker.yaml
vendored
4
.github/workflows/checksum_checker.yaml
vendored
@ -20,12 +20,12 @@ jobs:
|
|||||||
run: |
|
run: |
|
||||||
sudo apt-get update
|
sudo apt-get update
|
||||||
sudo apt-get install -y pip wget
|
sudo apt-get install -y pip wget
|
||||||
sudo pip install --upgrade pip
|
sudo pip install --upgrade pip
|
||||||
pip install huggingface_hub
|
pip install huggingface_hub
|
||||||
- name: 'Setup yq'
|
- name: 'Setup yq'
|
||||||
uses: dcarbone/install-yq-action@v1.1.1
|
uses: dcarbone/install-yq-action@v1.1.1
|
||||||
with:
|
with:
|
||||||
version: 'v4.43.1'
|
version: 'v4.44.2'
|
||||||
download-compressed: true
|
download-compressed: true
|
||||||
force: true
|
force: true
|
||||||
|
|
||||||
|
81
.github/workflows/comment-pr.yaml
vendored
Normal file
81
.github/workflows/comment-pr.yaml
vendored
Normal file
@ -0,0 +1,81 @@
|
|||||||
|
name: Comment PRs
|
||||||
|
on:
|
||||||
|
pull_request_target:
|
||||||
|
|
||||||
|
jobs:
|
||||||
|
comment-pr:
|
||||||
|
env:
|
||||||
|
MODEL_NAME: hermes-2-theta-llama-3-8b
|
||||||
|
runs-on: ubuntu-latest
|
||||||
|
steps:
|
||||||
|
- name: Checkout code
|
||||||
|
uses: actions/checkout@v3
|
||||||
|
with:
|
||||||
|
ref: "${{ github.event.pull_request.merge_commit_sha }}"
|
||||||
|
- uses: mudler/localai-github-action@v1
|
||||||
|
with:
|
||||||
|
model: 'hermes-2-theta-llama-3-8b' # Any from models.localai.io, or from huggingface.com with: "huggingface://<repository>/file"
|
||||||
|
# Check the PR diff using the current branch and the base branch of the PR
|
||||||
|
- uses: GrantBirki/git-diff-action@v2.7.0
|
||||||
|
id: git-diff-action
|
||||||
|
with:
|
||||||
|
json_diff_file_output: diff.json
|
||||||
|
raw_diff_file_output: diff.txt
|
||||||
|
file_output_only: "true"
|
||||||
|
- name: Show diff
|
||||||
|
env:
|
||||||
|
DIFF: ${{ steps.git-diff-action.outputs.raw-diff-path }}
|
||||||
|
run: |
|
||||||
|
cat $DIFF
|
||||||
|
- name: Summarize
|
||||||
|
env:
|
||||||
|
DIFF: ${{ steps.git-diff-action.outputs.raw-diff-path }}
|
||||||
|
id: summarize
|
||||||
|
run: |
|
||||||
|
input="$(cat $DIFF)"
|
||||||
|
|
||||||
|
# Define the LocalAI API endpoint
|
||||||
|
API_URL="http://localhost:8080/chat/completions"
|
||||||
|
|
||||||
|
# Create a JSON payload using jq to handle special characters
|
||||||
|
json_payload=$(jq -n --arg input "$input" '{
|
||||||
|
model: "'$MODEL_NAME'",
|
||||||
|
messages: [
|
||||||
|
{
|
||||||
|
role: "system",
|
||||||
|
content: "You are LocalAI-bot in Github that helps understanding PRs and assess complexity. Explain what has changed in this PR diff and why"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
role: "user",
|
||||||
|
content: $input
|
||||||
|
}
|
||||||
|
]
|
||||||
|
}')
|
||||||
|
|
||||||
|
# Send the request to LocalAI
|
||||||
|
response=$(curl -s -X POST $API_URL \
|
||||||
|
-H "Content-Type: application/json" \
|
||||||
|
-d "$json_payload")
|
||||||
|
|
||||||
|
# Extract the summary from the response
|
||||||
|
summary="$(echo $response | jq -r '.choices[0].message.content')"
|
||||||
|
|
||||||
|
# Print the summary
|
||||||
|
# -H "Authorization: Bearer $API_KEY" \
|
||||||
|
echo "Summary:"
|
||||||
|
echo "$summary"
|
||||||
|
echo "payload sent"
|
||||||
|
echo "$json_payload"
|
||||||
|
{
|
||||||
|
echo 'message<<EOF'
|
||||||
|
echo "$summary"
|
||||||
|
echo EOF
|
||||||
|
} >> "$GITHUB_OUTPUT"
|
||||||
|
docker logs --tail 10 local-ai
|
||||||
|
- uses: mshick/add-pr-comment@v2
|
||||||
|
if: always()
|
||||||
|
with:
|
||||||
|
repo-token: ${{ secrets.UPDATE_BOT_TOKEN }}
|
||||||
|
message: ${{ steps.summarize.outputs.message }}
|
||||||
|
message-failure: |
|
||||||
|
Uh oh! Could not analyze this PR, maybe it's too big?
|
2
.github/workflows/dependabot_auto.yml
vendored
2
.github/workflows/dependabot_auto.yml
vendored
@ -14,7 +14,7 @@ jobs:
|
|||||||
steps:
|
steps:
|
||||||
- name: Dependabot metadata
|
- name: Dependabot metadata
|
||||||
id: metadata
|
id: metadata
|
||||||
uses: dependabot/fetch-metadata@v2.1.0
|
uses: dependabot/fetch-metadata@v2.2.0
|
||||||
with:
|
with:
|
||||||
github-token: "${{ secrets.GITHUB_TOKEN }}"
|
github-token: "${{ secrets.GITHUB_TOKEN }}"
|
||||||
skip-commit-verification: true
|
skip-commit-verification: true
|
||||||
|
6
.github/workflows/generate_grpc_cache.yaml
vendored
6
.github/workflows/generate_grpc_cache.yaml
vendored
@ -75,7 +75,7 @@ jobs:
|
|||||||
uses: actions/checkout@v4
|
uses: actions/checkout@v4
|
||||||
|
|
||||||
- name: Cache GRPC
|
- name: Cache GRPC
|
||||||
uses: docker/build-push-action@v5
|
uses: docker/build-push-action@v6
|
||||||
with:
|
with:
|
||||||
builder: ${{ steps.buildx.outputs.name }}
|
builder: ${{ steps.buildx.outputs.name }}
|
||||||
# The build-args MUST be an EXACT match between the image cache and other workflow steps that want to use that cache.
|
# The build-args MUST be an EXACT match between the image cache and other workflow steps that want to use that cache.
|
||||||
@ -84,11 +84,11 @@ jobs:
|
|||||||
build-args: |
|
build-args: |
|
||||||
GRPC_BASE_IMAGE=${{ matrix.grpc-base-image }}
|
GRPC_BASE_IMAGE=${{ matrix.grpc-base-image }}
|
||||||
GRPC_MAKEFLAGS=--jobs=4 --output-sync=target
|
GRPC_MAKEFLAGS=--jobs=4 --output-sync=target
|
||||||
GRPC_VERSION=v1.64.0
|
GRPC_VERSION=v1.65.0
|
||||||
context: .
|
context: .
|
||||||
file: ./Dockerfile
|
file: ./Dockerfile
|
||||||
cache-to: type=gha,ignore-error=true
|
cache-to: type=gha,ignore-error=true
|
||||||
cache-from: type=gha
|
cache-from: type=gha
|
||||||
target: grpc
|
target: grpc
|
||||||
platforms: ${{ matrix.platforms }}
|
platforms: ${{ matrix.platforms }}
|
||||||
push: false
|
push: false
|
||||||
|
4
.github/workflows/generate_intel_image.yaml
vendored
4
.github/workflows/generate_intel_image.yaml
vendored
@ -15,7 +15,7 @@ jobs:
|
|||||||
strategy:
|
strategy:
|
||||||
matrix:
|
matrix:
|
||||||
include:
|
include:
|
||||||
- base-image: intel/oneapi-basekit:2024.1.0-devel-ubuntu22.04
|
- base-image: intel/oneapi-basekit:2024.2.0-devel-ubuntu22.04
|
||||||
runs-on: 'ubuntu-latest'
|
runs-on: 'ubuntu-latest'
|
||||||
platforms: 'linux/amd64'
|
platforms: 'linux/amd64'
|
||||||
runs-on: ${{matrix.runs-on}}
|
runs-on: ${{matrix.runs-on}}
|
||||||
@ -46,7 +46,7 @@ jobs:
|
|||||||
uses: actions/checkout@v4
|
uses: actions/checkout@v4
|
||||||
|
|
||||||
- name: Cache Intel images
|
- name: Cache Intel images
|
||||||
uses: docker/build-push-action@v5
|
uses: docker/build-push-action@v6
|
||||||
with:
|
with:
|
||||||
builder: ${{ steps.buildx.outputs.name }}
|
builder: ${{ steps.buildx.outputs.name }}
|
||||||
build-args: |
|
build-args: |
|
||||||
|
12
.github/workflows/image_build.yml
vendored
12
.github/workflows/image_build.yml
vendored
@ -215,7 +215,7 @@ jobs:
|
|||||||
password: ${{ secrets.quayPassword }}
|
password: ${{ secrets.quayPassword }}
|
||||||
|
|
||||||
- name: Build and push
|
- name: Build and push
|
||||||
uses: docker/build-push-action@v5
|
uses: docker/build-push-action@v6
|
||||||
if: github.event_name != 'pull_request'
|
if: github.event_name != 'pull_request'
|
||||||
with:
|
with:
|
||||||
builder: ${{ steps.buildx.outputs.name }}
|
builder: ${{ steps.buildx.outputs.name }}
|
||||||
@ -232,7 +232,7 @@ jobs:
|
|||||||
BASE_IMAGE=${{ inputs.base-image }}
|
BASE_IMAGE=${{ inputs.base-image }}
|
||||||
GRPC_BASE_IMAGE=${{ inputs.grpc-base-image || inputs.base-image }}
|
GRPC_BASE_IMAGE=${{ inputs.grpc-base-image || inputs.base-image }}
|
||||||
GRPC_MAKEFLAGS=--jobs=4 --output-sync=target
|
GRPC_MAKEFLAGS=--jobs=4 --output-sync=target
|
||||||
GRPC_VERSION=v1.64.0
|
GRPC_VERSION=v1.65.0
|
||||||
MAKEFLAGS=${{ inputs.makeflags }}
|
MAKEFLAGS=${{ inputs.makeflags }}
|
||||||
context: .
|
context: .
|
||||||
file: ./Dockerfile
|
file: ./Dockerfile
|
||||||
@ -243,7 +243,7 @@ jobs:
|
|||||||
labels: ${{ steps.meta.outputs.labels }}
|
labels: ${{ steps.meta.outputs.labels }}
|
||||||
### Start testing image
|
### Start testing image
|
||||||
- name: Build and push
|
- name: Build and push
|
||||||
uses: docker/build-push-action@v5
|
uses: docker/build-push-action@v6
|
||||||
if: github.event_name == 'pull_request'
|
if: github.event_name == 'pull_request'
|
||||||
with:
|
with:
|
||||||
builder: ${{ steps.buildx.outputs.name }}
|
builder: ${{ steps.buildx.outputs.name }}
|
||||||
@ -260,7 +260,7 @@ jobs:
|
|||||||
BASE_IMAGE=${{ inputs.base-image }}
|
BASE_IMAGE=${{ inputs.base-image }}
|
||||||
GRPC_BASE_IMAGE=${{ inputs.grpc-base-image || inputs.base-image }}
|
GRPC_BASE_IMAGE=${{ inputs.grpc-base-image || inputs.base-image }}
|
||||||
GRPC_MAKEFLAGS=--jobs=4 --output-sync=target
|
GRPC_MAKEFLAGS=--jobs=4 --output-sync=target
|
||||||
GRPC_VERSION=v1.64.0
|
GRPC_VERSION=v1.65.0
|
||||||
MAKEFLAGS=${{ inputs.makeflags }}
|
MAKEFLAGS=${{ inputs.makeflags }}
|
||||||
context: .
|
context: .
|
||||||
file: ./Dockerfile
|
file: ./Dockerfile
|
||||||
@ -276,7 +276,7 @@ jobs:
|
|||||||
## End testing image
|
## End testing image
|
||||||
- name: Build and push AIO image
|
- name: Build and push AIO image
|
||||||
if: inputs.aio != ''
|
if: inputs.aio != ''
|
||||||
uses: docker/build-push-action@v5
|
uses: docker/build-push-action@v6
|
||||||
with:
|
with:
|
||||||
builder: ${{ steps.buildx.outputs.name }}
|
builder: ${{ steps.buildx.outputs.name }}
|
||||||
build-args: |
|
build-args: |
|
||||||
@ -291,7 +291,7 @@ jobs:
|
|||||||
|
|
||||||
- name: Build and push AIO image (dockerhub)
|
- name: Build and push AIO image (dockerhub)
|
||||||
if: inputs.aio != ''
|
if: inputs.aio != ''
|
||||||
uses: docker/build-push-action@v5
|
uses: docker/build-push-action@v6
|
||||||
with:
|
with:
|
||||||
builder: ${{ steps.buildx.outputs.name }}
|
builder: ${{ steps.buildx.outputs.name }}
|
||||||
build-args: |
|
build-args: |
|
||||||
|
10
.github/workflows/notify-models.yaml
vendored
10
.github/workflows/notify-models.yaml
vendored
@ -14,12 +14,10 @@ jobs:
|
|||||||
- uses: actions/checkout@v4
|
- uses: actions/checkout@v4
|
||||||
with:
|
with:
|
||||||
fetch-depth: 0 # needed to checkout all branches for this Action to work
|
fetch-depth: 0 # needed to checkout all branches for this Action to work
|
||||||
- name: Start LocalAI
|
- uses: mudler/localai-github-action@v1
|
||||||
run: |
|
with:
|
||||||
echo "Starting LocalAI..."
|
model: 'hermes-2-theta-llama-3-8b' # Any from models.localai.io, or from huggingface.com with: "huggingface://<repository>/file"
|
||||||
docker run -e -ti -d --name local-ai -p 8080:8080 localai/localai:master-ffmpeg-core run --debug $MODEL_NAME
|
# Check the PR diff using the current branch and the base branch of the PR
|
||||||
until [ "`docker inspect -f {{.State.Health.Status}} local-ai`" == "healthy" ]; do echo "Waiting for container to be ready"; docker logs --tail 10 local-ai; sleep 2; done
|
|
||||||
# Check the PR diff using the current branch and the base branch of the PR
|
|
||||||
- uses: GrantBirki/git-diff-action@v2.7.0
|
- uses: GrantBirki/git-diff-action@v2.7.0
|
||||||
id: git-diff-action
|
id: git-diff-action
|
||||||
with:
|
with:
|
||||||
|
8
.github/workflows/notify-releases.yaml
vendored
8
.github/workflows/notify-releases.yaml
vendored
@ -12,11 +12,9 @@ jobs:
|
|||||||
RELEASE_TITLE: ${{ github.event.release.name }}
|
RELEASE_TITLE: ${{ github.event.release.name }}
|
||||||
RELEASE_TAG_NAME: ${{ github.event.release.tag_name }}
|
RELEASE_TAG_NAME: ${{ github.event.release.tag_name }}
|
||||||
steps:
|
steps:
|
||||||
- name: Start LocalAI
|
- uses: mudler/localai-github-action@v1
|
||||||
run: |
|
with:
|
||||||
echo "Starting LocalAI..."
|
model: 'hermes-2-theta-llama-3-8b' # Any from models.localai.io, or from huggingface.com with: "huggingface://<repository>/file"
|
||||||
docker run -e -ti -d --name local-ai -p 8080:8080 localai/localai:master-ffmpeg-core run --debug $MODEL_NAME
|
|
||||||
until [ "`docker inspect -f {{.State.Health.Status}} local-ai`" == "healthy" ]; do echo "Waiting for container to be ready"; docker logs --tail 10 local-ai; sleep 2; done
|
|
||||||
- name: Summarize
|
- name: Summarize
|
||||||
id: summarize
|
id: summarize
|
||||||
run: |
|
run: |
|
||||||
|
18
.github/workflows/prlint.yaml
vendored
18
.github/workflows/prlint.yaml
vendored
@ -17,12 +17,12 @@ jobs:
|
|||||||
- uses: aslafy-z/conventional-pr-title-action@v3
|
- uses: aslafy-z/conventional-pr-title-action@v3
|
||||||
env:
|
env:
|
||||||
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
|
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
|
||||||
check-pr-description:
|
# check-pr-description:
|
||||||
runs-on: ubuntu-latest
|
# runs-on: ubuntu-latest
|
||||||
steps:
|
# steps:
|
||||||
- uses: actions/checkout@v2
|
# - uses: actions/checkout@v2
|
||||||
- uses: jadrol/pr-description-checker-action@v1.0.0
|
# - uses: jadrol/pr-description-checker-action@v1.0.0
|
||||||
id: description-checker
|
# id: description-checker
|
||||||
with:
|
# with:
|
||||||
repo-token: ${{ secrets.GITHUB_TOKEN }}
|
# repo-token: ${{ secrets.GITHUB_TOKEN }}
|
||||||
exempt-labels: no qa
|
# exempt-labels: no qa
|
||||||
|
20
.github/workflows/release.yaml
vendored
20
.github/workflows/release.yaml
vendored
@ -7,7 +7,7 @@ on:
|
|||||||
pull_request:
|
pull_request:
|
||||||
|
|
||||||
env:
|
env:
|
||||||
GRPC_VERSION: v1.64.0
|
GRPC_VERSION: v1.65.0
|
||||||
|
|
||||||
permissions:
|
permissions:
|
||||||
contents: write
|
contents: write
|
||||||
@ -99,8 +99,8 @@ jobs:
|
|||||||
CROSS_TOOLCHAIN=/usr/$GNU_HOST
|
CROSS_TOOLCHAIN=/usr/$GNU_HOST
|
||||||
CROSS_STAGING_PREFIX=$CROSS_TOOLCHAIN/stage
|
CROSS_STAGING_PREFIX=$CROSS_TOOLCHAIN/stage
|
||||||
CMAKE_CROSS_TOOLCHAIN=/tmp/arm.toolchain.cmake
|
CMAKE_CROSS_TOOLCHAIN=/tmp/arm.toolchain.cmake
|
||||||
go install google.golang.org/grpc/cmd/protoc-gen-go-grpc@8ba23be9613c672d40ae261d2a1335d639bdd59b
|
go install google.golang.org/grpc/cmd/protoc-gen-go-grpc@1958fcbe2ca8bd93af633f11e97d44e567e945af
|
||||||
go install google.golang.org/protobuf/cmd/protoc-gen-go@v1.34.0
|
go install google.golang.org/protobuf/cmd/protoc-gen-go@v1.34.2
|
||||||
export PATH=$PATH:$GOPATH/bin
|
export PATH=$PATH:$GOPATH/bin
|
||||||
export PATH=/usr/local/cuda/bin:$PATH
|
export PATH=/usr/local/cuda/bin:$PATH
|
||||||
sudo rm -rf /usr/aarch64-linux-gnu/lib/libstdc++.so.6
|
sudo rm -rf /usr/aarch64-linux-gnu/lib/libstdc++.so.6
|
||||||
@ -163,7 +163,7 @@ jobs:
|
|||||||
sudo apt-get update
|
sudo apt-get update
|
||||||
sudo apt-get install -y cuda-nvcc-${CUDA_VERSION} libcublas-dev-${CUDA_VERSION}
|
sudo apt-get install -y cuda-nvcc-${CUDA_VERSION} libcublas-dev-${CUDA_VERSION}
|
||||||
env:
|
env:
|
||||||
CUDA_VERSION: 12-3
|
CUDA_VERSION: 12-5
|
||||||
- name: "Install Hipblas"
|
- name: "Install Hipblas"
|
||||||
env:
|
env:
|
||||||
ROCM_VERSION: "6.1"
|
ROCM_VERSION: "6.1"
|
||||||
@ -210,8 +210,8 @@ jobs:
|
|||||||
- name: Build
|
- name: Build
|
||||||
id: build
|
id: build
|
||||||
run: |
|
run: |
|
||||||
go install google.golang.org/grpc/cmd/protoc-gen-go-grpc@8ba23be9613c672d40ae261d2a1335d639bdd59b
|
go install google.golang.org/grpc/cmd/protoc-gen-go-grpc@1958fcbe2ca8bd93af633f11e97d44e567e945af
|
||||||
go install google.golang.org/protobuf/cmd/protoc-gen-go@v1.34.0
|
go install google.golang.org/protobuf/cmd/protoc-gen-go@v1.34.2
|
||||||
export PATH=$PATH:$GOPATH/bin
|
export PATH=$PATH:$GOPATH/bin
|
||||||
export PATH=/usr/local/cuda/bin:$PATH
|
export PATH=/usr/local/cuda/bin:$PATH
|
||||||
export PATH=/opt/rocm/bin:$PATH
|
export PATH=/opt/rocm/bin:$PATH
|
||||||
@ -251,8 +251,8 @@ jobs:
|
|||||||
run: |
|
run: |
|
||||||
sudo apt-get update
|
sudo apt-get update
|
||||||
sudo apt-get install -y --no-install-recommends libopencv-dev protobuf-compiler ccache
|
sudo apt-get install -y --no-install-recommends libopencv-dev protobuf-compiler ccache
|
||||||
go install google.golang.org/grpc/cmd/protoc-gen-go-grpc@8ba23be9613c672d40ae261d2a1335d639bdd59b
|
go install google.golang.org/grpc/cmd/protoc-gen-go-grpc@1958fcbe2ca8bd93af633f11e97d44e567e945af
|
||||||
go install google.golang.org/protobuf/cmd/protoc-gen-go@v1.34.0
|
go install google.golang.org/protobuf/cmd/protoc-gen-go@v1.34.2
|
||||||
- name: Build stablediffusion
|
- name: Build stablediffusion
|
||||||
run: |
|
run: |
|
||||||
export PATH=$PATH:$GOPATH/bin
|
export PATH=$PATH:$GOPATH/bin
|
||||||
@ -327,8 +327,8 @@ jobs:
|
|||||||
- name: Dependencies
|
- name: Dependencies
|
||||||
run: |
|
run: |
|
||||||
brew install protobuf grpc
|
brew install protobuf grpc
|
||||||
go install google.golang.org/grpc/cmd/protoc-gen-go-grpc@8ba23be9613c672d40ae261d2a1335d639bdd59b
|
go install google.golang.org/grpc/cmd/protoc-gen-go-grpc@1958fcbe2ca8bd93af633f11e97d44e567e945af
|
||||||
go install google.golang.org/protobuf/cmd/protoc-gen-go@v1.34.0
|
go install google.golang.org/protobuf/cmd/protoc-gen-go@v1.34.2
|
||||||
- name: Build
|
- name: Build
|
||||||
id: build
|
id: build
|
||||||
run: |
|
run: |
|
||||||
|
60
.github/workflows/test-extra.yml
vendored
60
.github/workflows/test-extra.yml
vendored
@ -19,7 +19,7 @@ jobs:
|
|||||||
steps:
|
steps:
|
||||||
- name: Clone
|
- name: Clone
|
||||||
uses: actions/checkout@v4
|
uses: actions/checkout@v4
|
||||||
with:
|
with:
|
||||||
submodules: true
|
submodules: true
|
||||||
- name: Dependencies
|
- name: Dependencies
|
||||||
run: |
|
run: |
|
||||||
@ -29,8 +29,8 @@ jobs:
|
|||||||
curl -LsSf https://astral.sh/uv/install.sh | sh
|
curl -LsSf https://astral.sh/uv/install.sh | sh
|
||||||
sudo apt-get install -y ca-certificates cmake curl patch python3-pip
|
sudo apt-get install -y ca-certificates cmake curl patch python3-pip
|
||||||
sudo apt-get install -y libopencv-dev
|
sudo apt-get install -y libopencv-dev
|
||||||
pip install --user grpcio-tools==1.64.0
|
pip install --user --no-cache-dir grpcio-tools==1.64.1
|
||||||
|
|
||||||
- name: Test transformers
|
- name: Test transformers
|
||||||
run: |
|
run: |
|
||||||
make --jobs=5 --output-sync=target -C backend/python/transformers
|
make --jobs=5 --output-sync=target -C backend/python/transformers
|
||||||
@ -41,7 +41,7 @@ jobs:
|
|||||||
steps:
|
steps:
|
||||||
- name: Clone
|
- name: Clone
|
||||||
uses: actions/checkout@v4
|
uses: actions/checkout@v4
|
||||||
with:
|
with:
|
||||||
submodules: true
|
submodules: true
|
||||||
- name: Dependencies
|
- name: Dependencies
|
||||||
run: |
|
run: |
|
||||||
@ -51,8 +51,8 @@ jobs:
|
|||||||
curl -LsSf https://astral.sh/uv/install.sh | sh
|
curl -LsSf https://astral.sh/uv/install.sh | sh
|
||||||
sudo apt-get install -y ca-certificates cmake curl patch python3-pip
|
sudo apt-get install -y ca-certificates cmake curl patch python3-pip
|
||||||
sudo apt-get install -y libopencv-dev
|
sudo apt-get install -y libopencv-dev
|
||||||
pip install --user grpcio-tools==1.64.0
|
pip install --user --no-cache-dir grpcio-tools==1.64.1
|
||||||
|
|
||||||
- name: Test sentencetransformers
|
- name: Test sentencetransformers
|
||||||
run: |
|
run: |
|
||||||
make --jobs=5 --output-sync=target -C backend/python/sentencetransformers
|
make --jobs=5 --output-sync=target -C backend/python/sentencetransformers
|
||||||
@ -64,7 +64,7 @@ jobs:
|
|||||||
steps:
|
steps:
|
||||||
- name: Clone
|
- name: Clone
|
||||||
uses: actions/checkout@v4
|
uses: actions/checkout@v4
|
||||||
with:
|
with:
|
||||||
submodules: true
|
submodules: true
|
||||||
- name: Dependencies
|
- name: Dependencies
|
||||||
run: |
|
run: |
|
||||||
@ -74,7 +74,7 @@ jobs:
|
|||||||
curl -LsSf https://astral.sh/uv/install.sh | sh
|
curl -LsSf https://astral.sh/uv/install.sh | sh
|
||||||
sudo apt-get install -y ca-certificates cmake curl patch python3-pip
|
sudo apt-get install -y ca-certificates cmake curl patch python3-pip
|
||||||
sudo apt-get install -y libopencv-dev
|
sudo apt-get install -y libopencv-dev
|
||||||
pip install --user grpcio-tools==1.64.0
|
pip install --user --no-cache-dir grpcio-tools==1.64.1
|
||||||
|
|
||||||
- name: Test rerankers
|
- name: Test rerankers
|
||||||
run: |
|
run: |
|
||||||
@ -86,7 +86,7 @@ jobs:
|
|||||||
steps:
|
steps:
|
||||||
- name: Clone
|
- name: Clone
|
||||||
uses: actions/checkout@v4
|
uses: actions/checkout@v4
|
||||||
with:
|
with:
|
||||||
submodules: true
|
submodules: true
|
||||||
- name: Dependencies
|
- name: Dependencies
|
||||||
run: |
|
run: |
|
||||||
@ -96,7 +96,7 @@ jobs:
|
|||||||
sudo apt-get install -y libopencv-dev
|
sudo apt-get install -y libopencv-dev
|
||||||
# Install UV
|
# Install UV
|
||||||
curl -LsSf https://astral.sh/uv/install.sh | sh
|
curl -LsSf https://astral.sh/uv/install.sh | sh
|
||||||
pip install --user grpcio-tools==1.64.0
|
pip install --user --no-cache-dir grpcio-tools==1.64.1
|
||||||
- name: Test diffusers
|
- name: Test diffusers
|
||||||
run: |
|
run: |
|
||||||
make --jobs=5 --output-sync=target -C backend/python/diffusers
|
make --jobs=5 --output-sync=target -C backend/python/diffusers
|
||||||
@ -107,7 +107,7 @@ jobs:
|
|||||||
steps:
|
steps:
|
||||||
- name: Clone
|
- name: Clone
|
||||||
uses: actions/checkout@v4
|
uses: actions/checkout@v4
|
||||||
with:
|
with:
|
||||||
submodules: true
|
submodules: true
|
||||||
- name: Dependencies
|
- name: Dependencies
|
||||||
run: |
|
run: |
|
||||||
@ -117,19 +117,19 @@ jobs:
|
|||||||
curl -LsSf https://astral.sh/uv/install.sh | sh
|
curl -LsSf https://astral.sh/uv/install.sh | sh
|
||||||
sudo apt-get install -y ca-certificates cmake curl patch python3-pip
|
sudo apt-get install -y ca-certificates cmake curl patch python3-pip
|
||||||
sudo apt-get install -y libopencv-dev
|
sudo apt-get install -y libopencv-dev
|
||||||
pip install --user grpcio-tools==1.64.0
|
pip install --user --no-cache-dir grpcio-tools==1.64.1
|
||||||
|
|
||||||
- name: Test parler-tts
|
- name: Test parler-tts
|
||||||
run: |
|
run: |
|
||||||
make --jobs=5 --output-sync=target -C backend/python/parler-tts
|
make --jobs=5 --output-sync=target -C backend/python/parler-tts
|
||||||
make --jobs=5 --output-sync=target -C backend/python/parler-tts test
|
make --jobs=5 --output-sync=target -C backend/python/parler-tts test
|
||||||
|
|
||||||
tests-openvoice:
|
tests-openvoice:
|
||||||
runs-on: ubuntu-latest
|
runs-on: ubuntu-latest
|
||||||
steps:
|
steps:
|
||||||
- name: Clone
|
- name: Clone
|
||||||
uses: actions/checkout@v4
|
uses: actions/checkout@v4
|
||||||
with:
|
with:
|
||||||
submodules: true
|
submodules: true
|
||||||
- name: Dependencies
|
- name: Dependencies
|
||||||
run: |
|
run: |
|
||||||
@ -139,7 +139,7 @@ jobs:
|
|||||||
curl -LsSf https://astral.sh/uv/install.sh | sh
|
curl -LsSf https://astral.sh/uv/install.sh | sh
|
||||||
sudo apt-get install -y ca-certificates cmake curl patch python3-pip
|
sudo apt-get install -y ca-certificates cmake curl patch python3-pip
|
||||||
sudo apt-get install -y libopencv-dev
|
sudo apt-get install -y libopencv-dev
|
||||||
pip install --user grpcio-tools==1.64.0
|
pip install --user --no-cache-dir grpcio-tools==1.64.1
|
||||||
|
|
||||||
- name: Test openvoice
|
- name: Test openvoice
|
||||||
run: |
|
run: |
|
||||||
@ -151,7 +151,7 @@ jobs:
|
|||||||
steps:
|
steps:
|
||||||
- name: Clone
|
- name: Clone
|
||||||
uses: actions/checkout@v4
|
uses: actions/checkout@v4
|
||||||
with:
|
with:
|
||||||
submodules: true
|
submodules: true
|
||||||
- name: Dependencies
|
- name: Dependencies
|
||||||
run: |
|
run: |
|
||||||
@ -161,7 +161,7 @@ jobs:
|
|||||||
curl -LsSf https://astral.sh/uv/install.sh | sh
|
curl -LsSf https://astral.sh/uv/install.sh | sh
|
||||||
sudo apt-get install -y ca-certificates cmake curl patch python3-pip
|
sudo apt-get install -y ca-certificates cmake curl patch python3-pip
|
||||||
sudo apt-get install -y libopencv-dev
|
sudo apt-get install -y libopencv-dev
|
||||||
pip install --user grpcio-tools==1.64.0
|
pip install --user --no-cache-dir grpcio-tools==1.64.1
|
||||||
|
|
||||||
- name: Test transformers-musicgen
|
- name: Test transformers-musicgen
|
||||||
run: |
|
run: |
|
||||||
@ -175,7 +175,7 @@ jobs:
|
|||||||
# steps:
|
# steps:
|
||||||
# - name: Clone
|
# - name: Clone
|
||||||
# uses: actions/checkout@v4
|
# uses: actions/checkout@v4
|
||||||
# with:
|
# with:
|
||||||
# submodules: true
|
# submodules: true
|
||||||
# - name: Dependencies
|
# - name: Dependencies
|
||||||
# run: |
|
# run: |
|
||||||
@ -185,14 +185,14 @@ jobs:
|
|||||||
# curl -LsSf https://astral.sh/uv/install.sh | sh
|
# curl -LsSf https://astral.sh/uv/install.sh | sh
|
||||||
# sudo apt-get install -y ca-certificates cmake curl patch python3-pip
|
# sudo apt-get install -y ca-certificates cmake curl patch python3-pip
|
||||||
# sudo apt-get install -y libopencv-dev
|
# sudo apt-get install -y libopencv-dev
|
||||||
# pip install --user grpcio-tools==1.64.0
|
# pip install --user --no-cache-dir grpcio-tools==1.64.1
|
||||||
|
|
||||||
# - name: Test petals
|
# - name: Test petals
|
||||||
# run: |
|
# run: |
|
||||||
# make --jobs=5 --output-sync=target -C backend/python/petals
|
# make --jobs=5 --output-sync=target -C backend/python/petals
|
||||||
# make --jobs=5 --output-sync=target -C backend/python/petals test
|
# make --jobs=5 --output-sync=target -C backend/python/petals test
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
# tests-bark:
|
# tests-bark:
|
||||||
# runs-on: ubuntu-latest
|
# runs-on: ubuntu-latest
|
||||||
@ -239,7 +239,7 @@ jobs:
|
|||||||
# df -h
|
# df -h
|
||||||
# - name: Clone
|
# - name: Clone
|
||||||
# uses: actions/checkout@v4
|
# uses: actions/checkout@v4
|
||||||
# with:
|
# with:
|
||||||
# submodules: true
|
# submodules: true
|
||||||
# - name: Dependencies
|
# - name: Dependencies
|
||||||
# run: |
|
# run: |
|
||||||
@ -249,14 +249,14 @@ jobs:
|
|||||||
# curl -LsSf https://astral.sh/uv/install.sh | sh
|
# curl -LsSf https://astral.sh/uv/install.sh | sh
|
||||||
# sudo apt-get install -y ca-certificates cmake curl patch python3-pip
|
# sudo apt-get install -y ca-certificates cmake curl patch python3-pip
|
||||||
# sudo apt-get install -y libopencv-dev
|
# sudo apt-get install -y libopencv-dev
|
||||||
# pip install --user grpcio-tools==1.64.0
|
# pip install --user --no-cache-dir grpcio-tools==1.64.1
|
||||||
|
|
||||||
# - name: Test bark
|
# - name: Test bark
|
||||||
# run: |
|
# run: |
|
||||||
# make --jobs=5 --output-sync=target -C backend/python/bark
|
# make --jobs=5 --output-sync=target -C backend/python/bark
|
||||||
# make --jobs=5 --output-sync=target -C backend/python/bark test
|
# make --jobs=5 --output-sync=target -C backend/python/bark test
|
||||||
|
|
||||||
|
|
||||||
# Below tests needs GPU. Commented out for now
|
# Below tests needs GPU. Commented out for now
|
||||||
# TODO: Re-enable as soon as we have GPU nodes
|
# TODO: Re-enable as soon as we have GPU nodes
|
||||||
# tests-vllm:
|
# tests-vllm:
|
||||||
@ -264,7 +264,7 @@ jobs:
|
|||||||
# steps:
|
# steps:
|
||||||
# - name: Clone
|
# - name: Clone
|
||||||
# uses: actions/checkout@v4
|
# uses: actions/checkout@v4
|
||||||
# with:
|
# with:
|
||||||
# submodules: true
|
# submodules: true
|
||||||
# - name: Dependencies
|
# - name: Dependencies
|
||||||
# run: |
|
# run: |
|
||||||
@ -274,7 +274,7 @@ jobs:
|
|||||||
# curl -LsSf https://astral.sh/uv/install.sh | sh
|
# curl -LsSf https://astral.sh/uv/install.sh | sh
|
||||||
# sudo apt-get install -y ca-certificates cmake curl patch python3-pip
|
# sudo apt-get install -y ca-certificates cmake curl patch python3-pip
|
||||||
# sudo apt-get install -y libopencv-dev
|
# sudo apt-get install -y libopencv-dev
|
||||||
# pip install --user grpcio-tools==1.64.0
|
# pip install --user --no-cache-dir grpcio-tools==1.64.1
|
||||||
# - name: Test vllm
|
# - name: Test vllm
|
||||||
# run: |
|
# run: |
|
||||||
# make --jobs=5 --output-sync=target -C backend/python/vllm
|
# make --jobs=5 --output-sync=target -C backend/python/vllm
|
||||||
@ -284,7 +284,7 @@ jobs:
|
|||||||
steps:
|
steps:
|
||||||
- name: Clone
|
- name: Clone
|
||||||
uses: actions/checkout@v4
|
uses: actions/checkout@v4
|
||||||
with:
|
with:
|
||||||
submodules: true
|
submodules: true
|
||||||
- name: Dependencies
|
- name: Dependencies
|
||||||
run: |
|
run: |
|
||||||
@ -294,7 +294,7 @@ jobs:
|
|||||||
curl -LsSf https://astral.sh/uv/install.sh | sh
|
curl -LsSf https://astral.sh/uv/install.sh | sh
|
||||||
sudo apt-get install -y ca-certificates cmake curl patch python3-pip
|
sudo apt-get install -y ca-certificates cmake curl patch python3-pip
|
||||||
sudo apt-get install -y libopencv-dev
|
sudo apt-get install -y libopencv-dev
|
||||||
pip install --user grpcio-tools==1.64.0
|
pip install --user --no-cache-dir grpcio-tools==1.64.1
|
||||||
- name: Test vall-e-x
|
- name: Test vall-e-x
|
||||||
run: |
|
run: |
|
||||||
make --jobs=5 --output-sync=target -C backend/python/vall-e-x
|
make --jobs=5 --output-sync=target -C backend/python/vall-e-x
|
||||||
@ -305,7 +305,7 @@ jobs:
|
|||||||
steps:
|
steps:
|
||||||
- name: Clone
|
- name: Clone
|
||||||
uses: actions/checkout@v4
|
uses: actions/checkout@v4
|
||||||
with:
|
with:
|
||||||
submodules: true
|
submodules: true
|
||||||
- name: Dependencies
|
- name: Dependencies
|
||||||
run: |
|
run: |
|
||||||
@ -314,8 +314,8 @@ jobs:
|
|||||||
sudo apt-get install -y ca-certificates cmake curl patch espeak espeak-ng python3-pip
|
sudo apt-get install -y ca-certificates cmake curl patch espeak espeak-ng python3-pip
|
||||||
# Install UV
|
# Install UV
|
||||||
curl -LsSf https://astral.sh/uv/install.sh | sh
|
curl -LsSf https://astral.sh/uv/install.sh | sh
|
||||||
pip install --user grpcio-tools==1.64.0
|
pip install --user --no-cache-dir grpcio-tools==1.64.1
|
||||||
- name: Test coqui
|
- name: Test coqui
|
||||||
run: |
|
run: |
|
||||||
make --jobs=5 --output-sync=target -C backend/python/coqui
|
make --jobs=5 --output-sync=target -C backend/python/coqui
|
||||||
make --jobs=5 --output-sync=target -C backend/python/coqui test
|
make --jobs=5 --output-sync=target -C backend/python/coqui test
|
||||||
|
10
.github/workflows/test.yml
vendored
10
.github/workflows/test.yml
vendored
@ -10,7 +10,7 @@ on:
|
|||||||
- '*'
|
- '*'
|
||||||
|
|
||||||
env:
|
env:
|
||||||
GRPC_VERSION: v1.64.0
|
GRPC_VERSION: v1.65.0
|
||||||
|
|
||||||
concurrency:
|
concurrency:
|
||||||
group: ci-tests-${{ github.head_ref || github.ref }}-${{ github.repository }}
|
group: ci-tests-${{ github.head_ref || github.ref }}-${{ github.repository }}
|
||||||
@ -94,8 +94,8 @@ jobs:
|
|||||||
sudo apt-get install -y cuda-nvcc-${CUDA_VERSION} libcublas-dev-${CUDA_VERSION}
|
sudo apt-get install -y cuda-nvcc-${CUDA_VERSION} libcublas-dev-${CUDA_VERSION}
|
||||||
export CUDACXX=/usr/local/cuda/bin/nvcc
|
export CUDACXX=/usr/local/cuda/bin/nvcc
|
||||||
|
|
||||||
go install google.golang.org/protobuf/cmd/protoc-gen-go@v1.34.0
|
go install google.golang.org/protobuf/cmd/protoc-gen-go@v1.34.2
|
||||||
go install google.golang.org/grpc/cmd/protoc-gen-go-grpc@8ba23be9613c672d40ae261d2a1335d639bdd59b
|
go install google.golang.org/grpc/cmd/protoc-gen-go-grpc@1958fcbe2ca8bd93af633f11e97d44e567e945af
|
||||||
|
|
||||||
# The python3-grpc-tools package in 22.04 is too old
|
# The python3-grpc-tools package in 22.04 is too old
|
||||||
pip install --user grpcio-tools
|
pip install --user grpcio-tools
|
||||||
@ -110,7 +110,7 @@ jobs:
|
|||||||
# Pre-build stable diffusion before we install a newer version of abseil (not compatible with stablediffusion-ncn)
|
# Pre-build stable diffusion before we install a newer version of abseil (not compatible with stablediffusion-ncn)
|
||||||
PATH="$PATH:/root/go/bin" GO_TAGS="stablediffusion tts" GRPC_BACKENDS=backend-assets/grpc/stablediffusion make build
|
PATH="$PATH:/root/go/bin" GO_TAGS="stablediffusion tts" GRPC_BACKENDS=backend-assets/grpc/stablediffusion make build
|
||||||
env:
|
env:
|
||||||
CUDA_VERSION: 12-3
|
CUDA_VERSION: 12-4
|
||||||
- name: Cache grpc
|
- name: Cache grpc
|
||||||
id: cache-grpc
|
id: cache-grpc
|
||||||
uses: actions/cache@v4
|
uses: actions/cache@v4
|
||||||
@ -215,7 +215,7 @@ jobs:
|
|||||||
- name: Dependencies
|
- name: Dependencies
|
||||||
run: |
|
run: |
|
||||||
brew install protobuf grpc make protoc-gen-go protoc-gen-go-grpc
|
brew install protobuf grpc make protoc-gen-go protoc-gen-go-grpc
|
||||||
pip install --user grpcio-tools==1.64.0
|
pip install --user --no-cache-dir grpcio-tools==1.64.1
|
||||||
- name: Test
|
- name: Test
|
||||||
run: |
|
run: |
|
||||||
export C_INCLUDE_PATH=/usr/local/include
|
export C_INCLUDE_PATH=/usr/local/include
|
||||||
|
8
.github/workflows/update_swagger.yaml
vendored
8
.github/workflows/update_swagger.yaml
vendored
@ -13,11 +13,17 @@ jobs:
|
|||||||
- uses: actions/setup-go@v5
|
- uses: actions/setup-go@v5
|
||||||
with:
|
with:
|
||||||
go-version: 'stable'
|
go-version: 'stable'
|
||||||
|
- name: Dependencies
|
||||||
|
run: |
|
||||||
|
sudo apt-get update
|
||||||
|
sudo apt-get install protobuf-compiler
|
||||||
- run: |
|
- run: |
|
||||||
go install github.com/swaggo/swag/cmd/swag@latest
|
go install github.com/swaggo/swag/cmd/swag@latest
|
||||||
|
go install google.golang.org/grpc/cmd/protoc-gen-go-grpc@1958fcbe2ca8bd93af633f11e97d44e567e945af
|
||||||
|
go install google.golang.org/protobuf/cmd/protoc-gen-go@v1.34.2
|
||||||
- name: Bump swagger 🔧
|
- name: Bump swagger 🔧
|
||||||
run: |
|
run: |
|
||||||
make swagger
|
make protogen-go swagger
|
||||||
- name: Create Pull Request
|
- name: Create Pull Request
|
||||||
uses: peter-evans/create-pull-request@v6
|
uses: peter-evans/create-pull-request@v6
|
||||||
with:
|
with:
|
||||||
|
37
Dockerfile
37
Dockerfile
@ -8,7 +8,7 @@ FROM ${BASE_IMAGE} AS requirements-core
|
|||||||
|
|
||||||
USER root
|
USER root
|
||||||
|
|
||||||
ARG GO_VERSION=1.22.4
|
ARG GO_VERSION=1.22.5
|
||||||
ARG TARGETARCH
|
ARG TARGETARCH
|
||||||
ARG TARGETVARIANT
|
ARG TARGETVARIANT
|
||||||
|
|
||||||
@ -108,11 +108,11 @@ RUN <<EOT bash
|
|||||||
if [ "${BUILD_TYPE}" = "vulkan" ]; then
|
if [ "${BUILD_TYPE}" = "vulkan" ]; then
|
||||||
apt-get update && \
|
apt-get update && \
|
||||||
apt-get install -y --no-install-recommends \
|
apt-get install -y --no-install-recommends \
|
||||||
software-properties-common pciutils wget gpg-agent && \
|
software-properties-common pciutils wget gpg-agent && \
|
||||||
wget -qO - https://packages.lunarg.com/lunarg-signing-key-pub.asc | apt-key add - && \
|
wget -qO - https://packages.lunarg.com/lunarg-signing-key-pub.asc | apt-key add - && \
|
||||||
wget -qO /etc/apt/sources.list.d/lunarg-vulkan-jammy.list https://packages.lunarg.com/vulkan/lunarg-vulkan-jammy.list && \
|
wget -qO /etc/apt/sources.list.d/lunarg-vulkan-jammy.list https://packages.lunarg.com/vulkan/lunarg-vulkan-jammy.list && \
|
||||||
apt-get update && \
|
apt-get update && \
|
||||||
apt-get install -y \
|
apt-get install -y \
|
||||||
vulkan-sdk && \
|
vulkan-sdk && \
|
||||||
apt-get clean && \
|
apt-get clean && \
|
||||||
rm -rf /var/lib/apt/lists/*
|
rm -rf /var/lib/apt/lists/*
|
||||||
@ -124,33 +124,13 @@ RUN <<EOT bash
|
|||||||
if [ "${BUILD_TYPE}" = "cublas" ]; then
|
if [ "${BUILD_TYPE}" = "cublas" ]; then
|
||||||
apt-get update && \
|
apt-get update && \
|
||||||
apt-get install -y --no-install-recommends \
|
apt-get install -y --no-install-recommends \
|
||||||
software-properties-common pciutils
|
software-properties-common pciutils
|
||||||
if [ "amd64" = "$TARGETARCH" ]; then
|
if [ "amd64" = "$TARGETARCH" ]; then
|
||||||
curl -O https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2204/x86_64/cuda-keyring_1.1-1_all.deb
|
curl -O https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2204/x86_64/cuda-keyring_1.1-1_all.deb
|
||||||
fi
|
fi
|
||||||
if [ "arm64" = "$TARGETARCH" ]; then
|
if [ "arm64" = "$TARGETARCH" ]; then
|
||||||
curl -O https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2204/arm64/cuda-keyring_1.1-1_all.deb
|
curl -O https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2204/arm64/cuda-keyring_1.1-1_all.deb
|
||||||
fi
|
fi
|
||||||
dpkg -i cuda-keyring_1.1-1_all.deb && \
|
|
||||||
rm -f cuda-keyring_1.1-1_all.deb && \
|
|
||||||
apt-get update && \
|
|
||||||
apt-get install -y --no-install-recommends \
|
|
||||||
cuda-nvcc-${CUDA_MAJOR_VERSION}-${CUDA_MINOR_VERSION} \
|
|
||||||
libcufft-dev-${CUDA_MAJOR_VERSION}-${CUDA_MINOR_VERSION} \
|
|
||||||
libcurand-dev-${CUDA_MAJOR_VERSION}-${CUDA_MINOR_VERSION} \
|
|
||||||
libcublas-dev-${CUDA_MAJOR_VERSION}-${CUDA_MINOR_VERSION} \
|
|
||||||
libcusparse-dev-${CUDA_MAJOR_VERSION}-${CUDA_MINOR_VERSION} \
|
|
||||||
libcusolver-dev-${CUDA_MAJOR_VERSION}-${CUDA_MINOR_VERSION} && \
|
|
||||||
apt-get clean && \
|
|
||||||
rm -rf /var/lib/apt/lists/*
|
|
||||||
fi
|
|
||||||
EOT
|
|
||||||
|
|
||||||
RUN if [ "${BUILD_TYPE}" = "cublas" ]; then \
|
|
||||||
apt-get update && \
|
|
||||||
apt-get install -y --no-install-recommends \
|
|
||||||
software-properties-common pciutils && \
|
|
||||||
curl -O https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2204/x86_64/cuda-keyring_1.1-1_all.deb && \
|
|
||||||
dpkg -i cuda-keyring_1.1-1_all.deb && \
|
dpkg -i cuda-keyring_1.1-1_all.deb && \
|
||||||
rm -f cuda-keyring_1.1-1_all.deb && \
|
rm -f cuda-keyring_1.1-1_all.deb && \
|
||||||
apt-get update && \
|
apt-get update && \
|
||||||
@ -162,8 +142,9 @@ RUN if [ "${BUILD_TYPE}" = "cublas" ]; then \
|
|||||||
libcusparse-dev-${CUDA_MAJOR_VERSION}-${CUDA_MINOR_VERSION} \
|
libcusparse-dev-${CUDA_MAJOR_VERSION}-${CUDA_MINOR_VERSION} \
|
||||||
libcusolver-dev-${CUDA_MAJOR_VERSION}-${CUDA_MINOR_VERSION} && \
|
libcusolver-dev-${CUDA_MAJOR_VERSION}-${CUDA_MINOR_VERSION} && \
|
||||||
apt-get clean && \
|
apt-get clean && \
|
||||||
rm -rf /var/lib/apt/lists/* \
|
rm -rf /var/lib/apt/lists/*
|
||||||
; fi
|
fi
|
||||||
|
EOT
|
||||||
|
|
||||||
# If we are building with clblas support, we need the libraries for the builds
|
# If we are building with clblas support, we need the libraries for the builds
|
||||||
RUN if [ "${BUILD_TYPE}" = "clblas" ]; then \
|
RUN if [ "${BUILD_TYPE}" = "clblas" ]; then \
|
||||||
@ -206,7 +187,7 @@ FROM ${GRPC_BASE_IMAGE} AS grpc
|
|||||||
|
|
||||||
# This is a bit of a hack, but it's required in order to be able to effectively cache this layer in CI
|
# This is a bit of a hack, but it's required in order to be able to effectively cache this layer in CI
|
||||||
ARG GRPC_MAKEFLAGS="-j4 -Otarget"
|
ARG GRPC_MAKEFLAGS="-j4 -Otarget"
|
||||||
ARG GRPC_VERSION=v1.64.2
|
ARG GRPC_VERSION=v1.65.0
|
||||||
|
|
||||||
ENV MAKEFLAGS=${GRPC_MAKEFLAGS}
|
ENV MAKEFLAGS=${GRPC_MAKEFLAGS}
|
||||||
|
|
||||||
|
46
Makefile
46
Makefile
@ -8,7 +8,7 @@ DETECT_LIBS?=true
|
|||||||
# llama.cpp versions
|
# llama.cpp versions
|
||||||
GOLLAMA_REPO?=https://github.com/go-skynet/go-llama.cpp
|
GOLLAMA_REPO?=https://github.com/go-skynet/go-llama.cpp
|
||||||
GOLLAMA_VERSION?=2b57a8ae43e4699d3dc5d1496a1ccd42922993be
|
GOLLAMA_VERSION?=2b57a8ae43e4699d3dc5d1496a1ccd42922993be
|
||||||
CPPLLAMA_VERSION?=368645698ab648e390dcd7c00a2bf60efa654f57
|
CPPLLAMA_VERSION?=b3283448ce9a5098226afe1d8648ccc578511fe4
|
||||||
|
|
||||||
# gpt4all version
|
# gpt4all version
|
||||||
GPT4ALL_REPO?=https://github.com/nomic-ai/gpt4all
|
GPT4ALL_REPO?=https://github.com/nomic-ai/gpt4all
|
||||||
@ -20,7 +20,7 @@ RWKV_VERSION?=661e7ae26d442f5cfebd2a0881b44e8c55949ec6
|
|||||||
|
|
||||||
# whisper.cpp version
|
# whisper.cpp version
|
||||||
WHISPER_REPO?=https://github.com/ggerganov/whisper.cpp
|
WHISPER_REPO?=https://github.com/ggerganov/whisper.cpp
|
||||||
WHISPER_CPP_VERSION?=d207c6882247984689091ae9d780d2e51eab1df7
|
WHISPER_CPP_VERSION?=f68298ce06ca3edd6e6f3f21c3d0bb5f073942c3
|
||||||
|
|
||||||
# bert.cpp version
|
# bert.cpp version
|
||||||
BERT_REPO?=https://github.com/go-skynet/go-bert.cpp
|
BERT_REPO?=https://github.com/go-skynet/go-bert.cpp
|
||||||
@ -214,7 +214,7 @@ sources/go-bert.cpp:
|
|||||||
git remote add origin $(BERT_REPO) && \
|
git remote add origin $(BERT_REPO) && \
|
||||||
git fetch origin && \
|
git fetch origin && \
|
||||||
git checkout $(BERT_VERSION) && \
|
git checkout $(BERT_VERSION) && \
|
||||||
git submodule update --init --recursive --depth 1
|
git submodule update --init --recursive --depth 1 --single-branch
|
||||||
|
|
||||||
sources/go-bert.cpp/libgobert.a: sources/go-bert.cpp
|
sources/go-bert.cpp/libgobert.a: sources/go-bert.cpp
|
||||||
$(MAKE) -C sources/go-bert.cpp libgobert.a
|
$(MAKE) -C sources/go-bert.cpp libgobert.a
|
||||||
@ -227,7 +227,7 @@ sources/go-llama.cpp:
|
|||||||
git remote add origin $(GOLLAMA_REPO) && \
|
git remote add origin $(GOLLAMA_REPO) && \
|
||||||
git fetch origin && \
|
git fetch origin && \
|
||||||
git checkout $(GOLLAMA_VERSION) && \
|
git checkout $(GOLLAMA_VERSION) && \
|
||||||
git submodule update --init --recursive --depth 1
|
git submodule update --init --recursive --depth 1 --single-branch
|
||||||
|
|
||||||
sources/go-llama.cpp/libbinding.a: sources/go-llama.cpp
|
sources/go-llama.cpp/libbinding.a: sources/go-llama.cpp
|
||||||
$(MAKE) -C sources/go-llama.cpp BUILD_TYPE=$(STABLE_BUILD_TYPE) libbinding.a
|
$(MAKE) -C sources/go-llama.cpp BUILD_TYPE=$(STABLE_BUILD_TYPE) libbinding.a
|
||||||
@ -240,7 +240,7 @@ sources/go-piper:
|
|||||||
git remote add origin $(PIPER_REPO) && \
|
git remote add origin $(PIPER_REPO) && \
|
||||||
git fetch origin && \
|
git fetch origin && \
|
||||||
git checkout $(PIPER_VERSION) && \
|
git checkout $(PIPER_VERSION) && \
|
||||||
git submodule update --init --recursive --depth 1
|
git submodule update --init --recursive --depth 1 --single-branch
|
||||||
|
|
||||||
sources/go-piper/libpiper_binding.a: sources/go-piper
|
sources/go-piper/libpiper_binding.a: sources/go-piper
|
||||||
$(MAKE) -C sources/go-piper libpiper_binding.a example/main piper.o
|
$(MAKE) -C sources/go-piper libpiper_binding.a example/main piper.o
|
||||||
@ -253,7 +253,7 @@ sources/gpt4all:
|
|||||||
git remote add origin $(GPT4ALL_REPO) && \
|
git remote add origin $(GPT4ALL_REPO) && \
|
||||||
git fetch origin && \
|
git fetch origin && \
|
||||||
git checkout $(GPT4ALL_VERSION) && \
|
git checkout $(GPT4ALL_VERSION) && \
|
||||||
git submodule update --init --recursive --depth 1
|
git submodule update --init --recursive --depth 1 --single-branch
|
||||||
|
|
||||||
sources/gpt4all/gpt4all-bindings/golang/libgpt4all.a: sources/gpt4all
|
sources/gpt4all/gpt4all-bindings/golang/libgpt4all.a: sources/gpt4all
|
||||||
$(MAKE) -C sources/gpt4all/gpt4all-bindings/golang/ libgpt4all.a
|
$(MAKE) -C sources/gpt4all/gpt4all-bindings/golang/ libgpt4all.a
|
||||||
@ -266,7 +266,7 @@ sources/go-rwkv.cpp:
|
|||||||
git remote add origin $(RWKV_REPO) && \
|
git remote add origin $(RWKV_REPO) && \
|
||||||
git fetch origin && \
|
git fetch origin && \
|
||||||
git checkout $(RWKV_VERSION) && \
|
git checkout $(RWKV_VERSION) && \
|
||||||
git submodule update --init --recursive --depth 1
|
git submodule update --init --recursive --depth 1 --single-branch
|
||||||
|
|
||||||
sources/go-rwkv.cpp/librwkv.a: sources/go-rwkv.cpp
|
sources/go-rwkv.cpp/librwkv.a: sources/go-rwkv.cpp
|
||||||
cd sources/go-rwkv.cpp && cd rwkv.cpp && cmake . -DRWKV_BUILD_SHARED_LIBRARY=OFF && cmake --build . && cp librwkv.a ..
|
cd sources/go-rwkv.cpp && cd rwkv.cpp && cmake . -DRWKV_BUILD_SHARED_LIBRARY=OFF && cmake --build . && cp librwkv.a ..
|
||||||
@ -279,7 +279,7 @@ sources/go-stable-diffusion:
|
|||||||
git remote add origin $(STABLEDIFFUSION_REPO) && \
|
git remote add origin $(STABLEDIFFUSION_REPO) && \
|
||||||
git fetch origin && \
|
git fetch origin && \
|
||||||
git checkout $(STABLEDIFFUSION_VERSION) && \
|
git checkout $(STABLEDIFFUSION_VERSION) && \
|
||||||
git submodule update --init --recursive --depth 1
|
git submodule update --init --recursive --depth 1 --single-branch
|
||||||
|
|
||||||
sources/go-stable-diffusion/libstablediffusion.a: sources/go-stable-diffusion
|
sources/go-stable-diffusion/libstablediffusion.a: sources/go-stable-diffusion
|
||||||
CPATH="$(CPATH):/usr/include/opencv4" $(MAKE) -C sources/go-stable-diffusion libstablediffusion.a
|
CPATH="$(CPATH):/usr/include/opencv4" $(MAKE) -C sources/go-stable-diffusion libstablediffusion.a
|
||||||
@ -292,7 +292,7 @@ sources/go-tiny-dream:
|
|||||||
git remote add origin $(TINYDREAM_REPO) && \
|
git remote add origin $(TINYDREAM_REPO) && \
|
||||||
git fetch origin && \
|
git fetch origin && \
|
||||||
git checkout $(TINYDREAM_VERSION) && \
|
git checkout $(TINYDREAM_VERSION) && \
|
||||||
git submodule update --init --recursive --depth 1
|
git submodule update --init --recursive --depth 1 --single-branch
|
||||||
|
|
||||||
sources/go-tiny-dream/libtinydream.a: sources/go-tiny-dream
|
sources/go-tiny-dream/libtinydream.a: sources/go-tiny-dream
|
||||||
$(MAKE) -C sources/go-tiny-dream libtinydream.a
|
$(MAKE) -C sources/go-tiny-dream libtinydream.a
|
||||||
@ -305,12 +305,12 @@ sources/whisper.cpp:
|
|||||||
git remote add origin $(WHISPER_REPO) && \
|
git remote add origin $(WHISPER_REPO) && \
|
||||||
git fetch origin && \
|
git fetch origin && \
|
||||||
git checkout $(WHISPER_CPP_VERSION) && \
|
git checkout $(WHISPER_CPP_VERSION) && \
|
||||||
git submodule update --init --recursive --depth 1
|
git submodule update --init --recursive --depth 1 --single-branch
|
||||||
|
|
||||||
sources/whisper.cpp/libwhisper.a: sources/whisper.cpp
|
sources/whisper.cpp/libwhisper.a: sources/whisper.cpp
|
||||||
cd sources/whisper.cpp && $(MAKE) libwhisper.a libggml.a
|
cd sources/whisper.cpp && $(MAKE) libwhisper.a libggml.a
|
||||||
|
|
||||||
get-sources: sources/go-llama.cpp sources/gpt4all sources/go-piper sources/go-rwkv.cpp sources/whisper.cpp sources/go-bert.cpp sources/go-stable-diffusion sources/go-tiny-dream
|
get-sources: sources/go-llama.cpp sources/gpt4all sources/go-piper sources/go-rwkv.cpp sources/whisper.cpp sources/go-bert.cpp sources/go-stable-diffusion sources/go-tiny-dream backend/cpp/llama/llama.cpp
|
||||||
|
|
||||||
replace:
|
replace:
|
||||||
$(GOCMD) mod edit -replace github.com/donomii/go-rwkv.cpp=$(CURDIR)/sources/go-rwkv.cpp
|
$(GOCMD) mod edit -replace github.com/donomii/go-rwkv.cpp=$(CURDIR)/sources/go-rwkv.cpp
|
||||||
@ -384,7 +384,7 @@ endif
|
|||||||
CGO_LDFLAGS="$(CGO_LDFLAGS)" $(GOCMD) build -ldflags "$(LD_FLAGS)" -tags "$(GO_TAGS)" -o $(BINARY_NAME) ./
|
CGO_LDFLAGS="$(CGO_LDFLAGS)" $(GOCMD) build -ldflags "$(LD_FLAGS)" -tags "$(GO_TAGS)" -o $(BINARY_NAME) ./
|
||||||
|
|
||||||
build-minimal:
|
build-minimal:
|
||||||
BUILD_GRPC_FOR_BACKEND_LLAMA=true GRPC_BACKENDS="backend-assets/grpc/llama-cpp-avx2" GO_TAGS=none $(MAKE) build
|
BUILD_GRPC_FOR_BACKEND_LLAMA=true GRPC_BACKENDS="backend-assets/grpc/llama-cpp-avx2" GO_TAGS=p2p $(MAKE) build
|
||||||
|
|
||||||
build-api:
|
build-api:
|
||||||
BUILD_GRPC_FOR_BACKEND_LLAMA=true BUILD_API_ONLY=true GO_TAGS=none $(MAKE) build
|
BUILD_GRPC_FOR_BACKEND_LLAMA=true BUILD_API_ONLY=true GO_TAGS=none $(MAKE) build
|
||||||
@ -767,28 +767,28 @@ else
|
|||||||
endif
|
endif
|
||||||
|
|
||||||
# This target is for manually building a variant with-auto detected flags
|
# This target is for manually building a variant with-auto detected flags
|
||||||
backend-assets/grpc/llama-cpp: backend-assets/grpc
|
backend-assets/grpc/llama-cpp: backend-assets/grpc backend/cpp/llama/llama.cpp
|
||||||
cp -rf backend/cpp/llama backend/cpp/llama-cpp
|
cp -rf backend/cpp/llama backend/cpp/llama-cpp
|
||||||
$(MAKE) -C backend/cpp/llama-cpp purge
|
$(MAKE) -C backend/cpp/llama-cpp purge
|
||||||
$(info ${GREEN}I llama-cpp build info:avx2${RESET})
|
$(info ${GREEN}I llama-cpp build info:avx2${RESET})
|
||||||
$(MAKE) VARIANT="llama-cpp" build-llama-cpp-grpc-server
|
$(MAKE) VARIANT="llama-cpp" build-llama-cpp-grpc-server
|
||||||
cp -rfv backend/cpp/llama-cpp/grpc-server backend-assets/grpc/llama-cpp
|
cp -rfv backend/cpp/llama-cpp/grpc-server backend-assets/grpc/llama-cpp
|
||||||
|
|
||||||
backend-assets/grpc/llama-cpp-avx2: backend-assets/grpc
|
backend-assets/grpc/llama-cpp-avx2: backend-assets/grpc backend/cpp/llama/llama.cpp
|
||||||
cp -rf backend/cpp/llama backend/cpp/llama-avx2
|
cp -rf backend/cpp/llama backend/cpp/llama-avx2
|
||||||
$(MAKE) -C backend/cpp/llama-avx2 purge
|
$(MAKE) -C backend/cpp/llama-avx2 purge
|
||||||
$(info ${GREEN}I llama-cpp build info:avx2${RESET})
|
$(info ${GREEN}I llama-cpp build info:avx2${RESET})
|
||||||
CMAKE_ARGS="$(CMAKE_ARGS) -DGGML_AVX=on -DGGML_AVX2=on -DGGML_AVX512=off -DGGML_FMA=on -DGGML_F16C=on" $(MAKE) VARIANT="llama-avx2" build-llama-cpp-grpc-server
|
CMAKE_ARGS="$(CMAKE_ARGS) -DGGML_AVX=on -DGGML_AVX2=on -DGGML_AVX512=off -DGGML_FMA=on -DGGML_F16C=on" $(MAKE) VARIANT="llama-avx2" build-llama-cpp-grpc-server
|
||||||
cp -rfv backend/cpp/llama-avx2/grpc-server backend-assets/grpc/llama-cpp-avx2
|
cp -rfv backend/cpp/llama-avx2/grpc-server backend-assets/grpc/llama-cpp-avx2
|
||||||
|
|
||||||
backend-assets/grpc/llama-cpp-avx: backend-assets/grpc
|
backend-assets/grpc/llama-cpp-avx: backend-assets/grpc backend/cpp/llama/llama.cpp
|
||||||
cp -rf backend/cpp/llama backend/cpp/llama-avx
|
cp -rf backend/cpp/llama backend/cpp/llama-avx
|
||||||
$(MAKE) -C backend/cpp/llama-avx purge
|
$(MAKE) -C backend/cpp/llama-avx purge
|
||||||
$(info ${GREEN}I llama-cpp build info:avx${RESET})
|
$(info ${GREEN}I llama-cpp build info:avx${RESET})
|
||||||
CMAKE_ARGS="$(CMAKE_ARGS) -DGGML_AVX=on -DGGML_AVX2=off -DGGML_AVX512=off -DGGML_FMA=off -DGGML_F16C=off" $(MAKE) VARIANT="llama-avx" build-llama-cpp-grpc-server
|
CMAKE_ARGS="$(CMAKE_ARGS) -DGGML_AVX=on -DGGML_AVX2=off -DGGML_AVX512=off -DGGML_FMA=off -DGGML_F16C=off" $(MAKE) VARIANT="llama-avx" build-llama-cpp-grpc-server
|
||||||
cp -rfv backend/cpp/llama-avx/grpc-server backend-assets/grpc/llama-cpp-avx
|
cp -rfv backend/cpp/llama-avx/grpc-server backend-assets/grpc/llama-cpp-avx
|
||||||
|
|
||||||
backend-assets/grpc/llama-cpp-fallback: backend-assets/grpc
|
backend-assets/grpc/llama-cpp-fallback: backend-assets/grpc backend/cpp/llama/llama.cpp
|
||||||
cp -rf backend/cpp/llama backend/cpp/llama-fallback
|
cp -rf backend/cpp/llama backend/cpp/llama-fallback
|
||||||
$(MAKE) -C backend/cpp/llama-fallback purge
|
$(MAKE) -C backend/cpp/llama-fallback purge
|
||||||
$(info ${GREEN}I llama-cpp build info:fallback${RESET})
|
$(info ${GREEN}I llama-cpp build info:fallback${RESET})
|
||||||
@ -799,35 +799,35 @@ ifeq ($(BUILD_TYPE),metal)
|
|||||||
cp backend/cpp/llama-fallback/llama.cpp/build/bin/default.metallib backend-assets/grpc/
|
cp backend/cpp/llama-fallback/llama.cpp/build/bin/default.metallib backend-assets/grpc/
|
||||||
endif
|
endif
|
||||||
|
|
||||||
backend-assets/grpc/llama-cpp-cuda: backend-assets/grpc
|
backend-assets/grpc/llama-cpp-cuda: backend-assets/grpc backend/cpp/llama/llama.cpp
|
||||||
cp -rf backend/cpp/llama backend/cpp/llama-cuda
|
cp -rf backend/cpp/llama backend/cpp/llama-cuda
|
||||||
$(MAKE) -C backend/cpp/llama-cuda purge
|
$(MAKE) -C backend/cpp/llama-cuda purge
|
||||||
$(info ${GREEN}I llama-cpp build info:cuda${RESET})
|
$(info ${GREEN}I llama-cpp build info:cuda${RESET})
|
||||||
CMAKE_ARGS="$(CMAKE_ARGS) -DGGML_AVX=on -DGGML_AVX2=off -DGGML_AVX512=off -DGGML_FMA=off -DGGML_F16C=off -DGGML_CUDA=ON" $(MAKE) VARIANT="llama-cuda" build-llama-cpp-grpc-server
|
CMAKE_ARGS="$(CMAKE_ARGS) -DGGML_AVX=on -DGGML_AVX2=off -DGGML_AVX512=off -DGGML_FMA=off -DGGML_F16C=off -DGGML_CUDA=ON" $(MAKE) VARIANT="llama-cuda" build-llama-cpp-grpc-server
|
||||||
cp -rfv backend/cpp/llama-cuda/grpc-server backend-assets/grpc/llama-cpp-cuda
|
cp -rfv backend/cpp/llama-cuda/grpc-server backend-assets/grpc/llama-cpp-cuda
|
||||||
|
|
||||||
backend-assets/grpc/llama-cpp-hipblas: backend-assets/grpc
|
backend-assets/grpc/llama-cpp-hipblas: backend-assets/grpc backend/cpp/llama/llama.cpp
|
||||||
cp -rf backend/cpp/llama backend/cpp/llama-hipblas
|
cp -rf backend/cpp/llama backend/cpp/llama-hipblas
|
||||||
$(MAKE) -C backend/cpp/llama-hipblas purge
|
$(MAKE) -C backend/cpp/llama-hipblas purge
|
||||||
$(info ${GREEN}I llama-cpp build info:hipblas${RESET})
|
$(info ${GREEN}I llama-cpp build info:hipblas${RESET})
|
||||||
BUILD_TYPE="hipblas" $(MAKE) VARIANT="llama-hipblas" build-llama-cpp-grpc-server
|
BUILD_TYPE="hipblas" $(MAKE) VARIANT="llama-hipblas" build-llama-cpp-grpc-server
|
||||||
cp -rfv backend/cpp/llama-hipblas/grpc-server backend-assets/grpc/llama-cpp-hipblas
|
cp -rfv backend/cpp/llama-hipblas/grpc-server backend-assets/grpc/llama-cpp-hipblas
|
||||||
|
|
||||||
backend-assets/grpc/llama-cpp-sycl_f16: backend-assets/grpc
|
backend-assets/grpc/llama-cpp-sycl_f16: backend-assets/grpc backend/cpp/llama/llama.cpp
|
||||||
cp -rf backend/cpp/llama backend/cpp/llama-sycl_f16
|
cp -rf backend/cpp/llama backend/cpp/llama-sycl_f16
|
||||||
$(MAKE) -C backend/cpp/llama-sycl_f16 purge
|
$(MAKE) -C backend/cpp/llama-sycl_f16 purge
|
||||||
$(info ${GREEN}I llama-cpp build info:sycl_f16${RESET})
|
$(info ${GREEN}I llama-cpp build info:sycl_f16${RESET})
|
||||||
BUILD_TYPE="sycl_f16" $(MAKE) VARIANT="llama-sycl_f16" build-llama-cpp-grpc-server
|
BUILD_TYPE="sycl_f16" $(MAKE) VARIANT="llama-sycl_f16" build-llama-cpp-grpc-server
|
||||||
cp -rfv backend/cpp/llama-sycl_f16/grpc-server backend-assets/grpc/llama-cpp-sycl_f16
|
cp -rfv backend/cpp/llama-sycl_f16/grpc-server backend-assets/grpc/llama-cpp-sycl_f16
|
||||||
|
|
||||||
backend-assets/grpc/llama-cpp-sycl_f32: backend-assets/grpc
|
backend-assets/grpc/llama-cpp-sycl_f32: backend-assets/grpc backend/cpp/llama/llama.cpp
|
||||||
cp -rf backend/cpp/llama backend/cpp/llama-sycl_f32
|
cp -rf backend/cpp/llama backend/cpp/llama-sycl_f32
|
||||||
$(MAKE) -C backend/cpp/llama-sycl_f32 purge
|
$(MAKE) -C backend/cpp/llama-sycl_f32 purge
|
||||||
$(info ${GREEN}I llama-cpp build info:sycl_f32${RESET})
|
$(info ${GREEN}I llama-cpp build info:sycl_f32${RESET})
|
||||||
BUILD_TYPE="sycl_f32" $(MAKE) VARIANT="llama-sycl_f32" build-llama-cpp-grpc-server
|
BUILD_TYPE="sycl_f32" $(MAKE) VARIANT="llama-sycl_f32" build-llama-cpp-grpc-server
|
||||||
cp -rfv backend/cpp/llama-sycl_f32/grpc-server backend-assets/grpc/llama-cpp-sycl_f32
|
cp -rfv backend/cpp/llama-sycl_f32/grpc-server backend-assets/grpc/llama-cpp-sycl_f32
|
||||||
|
|
||||||
backend-assets/grpc/llama-cpp-grpc: backend-assets/grpc
|
backend-assets/grpc/llama-cpp-grpc: backend-assets/grpc backend/cpp/llama/llama.cpp
|
||||||
cp -rf backend/cpp/llama backend/cpp/llama-grpc
|
cp -rf backend/cpp/llama backend/cpp/llama-grpc
|
||||||
$(MAKE) -C backend/cpp/llama-grpc purge
|
$(MAKE) -C backend/cpp/llama-grpc purge
|
||||||
$(info ${GREEN}I llama-cpp build info:grpc${RESET})
|
$(info ${GREEN}I llama-cpp build info:grpc${RESET})
|
||||||
@ -905,7 +905,7 @@ docker-aio-all:
|
|||||||
|
|
||||||
docker-image-intel:
|
docker-image-intel:
|
||||||
docker build \
|
docker build \
|
||||||
--build-arg BASE_IMAGE=intel/oneapi-basekit:2024.1.0-devel-ubuntu22.04 \
|
--build-arg BASE_IMAGE=intel/oneapi-basekit:2024.2.0-devel-ubuntu22.04 \
|
||||||
--build-arg IMAGE_TYPE=$(IMAGE_TYPE) \
|
--build-arg IMAGE_TYPE=$(IMAGE_TYPE) \
|
||||||
--build-arg GO_TAGS="none" \
|
--build-arg GO_TAGS="none" \
|
||||||
--build-arg MAKEFLAGS="$(DOCKER_MAKEFLAGS)" \
|
--build-arg MAKEFLAGS="$(DOCKER_MAKEFLAGS)" \
|
||||||
@ -913,7 +913,7 @@ docker-image-intel:
|
|||||||
|
|
||||||
docker-image-intel-xpu:
|
docker-image-intel-xpu:
|
||||||
docker build \
|
docker build \
|
||||||
--build-arg BASE_IMAGE=intel/oneapi-basekit:2024.1.0-devel-ubuntu22.04 \
|
--build-arg BASE_IMAGE=intel/oneapi-basekit:2024.2.0-devel-ubuntu22.04 \
|
||||||
--build-arg IMAGE_TYPE=$(IMAGE_TYPE) \
|
--build-arg IMAGE_TYPE=$(IMAGE_TYPE) \
|
||||||
--build-arg GO_TAGS="none" \
|
--build-arg GO_TAGS="none" \
|
||||||
--build-arg MAKEFLAGS="$(DOCKER_MAKEFLAGS)" \
|
--build-arg MAKEFLAGS="$(DOCKER_MAKEFLAGS)" \
|
||||||
|
19
README.md
19
README.md
@ -72,14 +72,15 @@ docker run -ti --name local-ai -p 8080:8080 localai/localai:latest-aio-cpu
|
|||||||
|
|
||||||
[Roadmap](https://github.com/mudler/LocalAI/issues?q=is%3Aissue+is%3Aopen+label%3Aroadmap)
|
[Roadmap](https://github.com/mudler/LocalAI/issues?q=is%3Aissue+is%3Aopen+label%3Aroadmap)
|
||||||
|
|
||||||
- 🔥🔥 🆕 P2P Dashboard, LocalAI Federated mode and AI Swarms: https://github.com/mudler/LocalAI/pull/2723
|
- July 2024: 🔥🔥 🆕 P2P Dashboard, LocalAI Federated mode and AI Swarms: https://github.com/mudler/LocalAI/pull/2723
|
||||||
- 🆕 You can browse now the model gallery without LocalAI! Check out https://models.localai.io
|
- June 2024: 🆕 You can browse now the model gallery without LocalAI! Check out https://models.localai.io
|
||||||
- 🔥🔥 Decentralized llama.cpp: https://github.com/mudler/LocalAI/pull/2343 (peer2peer llama.cpp!) 👉 Docs https://localai.io/features/distribute/
|
- June 2024: Support for models from OCI registries: https://github.com/mudler/LocalAI/pull/2628
|
||||||
- 🔥🔥 Openvoice: https://github.com/mudler/LocalAI/pull/2334
|
- May 2024: 🔥🔥 Decentralized P2P llama.cpp: https://github.com/mudler/LocalAI/pull/2343 (peer2peer llama.cpp!) 👉 Docs https://localai.io/features/distribute/
|
||||||
- 🆕 Function calls without grammars and mixed mode: https://github.com/mudler/LocalAI/pull/2328
|
- May 2024: 🔥🔥 Openvoice: https://github.com/mudler/LocalAI/pull/2334
|
||||||
- 🔥🔥 Distributed inferencing: https://github.com/mudler/LocalAI/pull/2324
|
- May 2024: 🆕 Function calls without grammars and mixed mode: https://github.com/mudler/LocalAI/pull/2328
|
||||||
- Chat, TTS, and Image generation in the WebUI: https://github.com/mudler/LocalAI/pull/2222
|
- May 2024: 🔥🔥 Distributed inferencing: https://github.com/mudler/LocalAI/pull/2324
|
||||||
- Reranker API: https://github.com/mudler/LocalAI/pull/2121
|
- May 2024: Chat, TTS, and Image generation in the WebUI: https://github.com/mudler/LocalAI/pull/2222
|
||||||
|
- April 2024: Reranker API: https://github.com/mudler/LocalAI/pull/2121
|
||||||
|
|
||||||
Hot topics (looking for contributors):
|
Hot topics (looking for contributors):
|
||||||
|
|
||||||
@ -89,6 +90,7 @@ Hot topics (looking for contributors):
|
|||||||
- Assistant API: https://github.com/mudler/LocalAI/issues/1273
|
- Assistant API: https://github.com/mudler/LocalAI/issues/1273
|
||||||
- Moderation endpoint: https://github.com/mudler/LocalAI/issues/999
|
- Moderation endpoint: https://github.com/mudler/LocalAI/issues/999
|
||||||
- Vulkan: https://github.com/mudler/LocalAI/issues/1647
|
- Vulkan: https://github.com/mudler/LocalAI/issues/1647
|
||||||
|
- Anthropic API: https://github.com/mudler/LocalAI/issues/1808
|
||||||
|
|
||||||
If you want to help and contribute, issues up for grabs: https://github.com/mudler/LocalAI/issues?q=is%3Aissue+is%3Aopen+label%3A%22up+for+grabs%22
|
If you want to help and contribute, issues up for grabs: https://github.com/mudler/LocalAI/issues?q=is%3Aissue+is%3Aopen+label%3A%22up+for+grabs%22
|
||||||
|
|
||||||
@ -134,6 +136,7 @@ Other:
|
|||||||
- Slack bot https://github.com/mudler/LocalAGI/tree/main/examples/slack
|
- Slack bot https://github.com/mudler/LocalAGI/tree/main/examples/slack
|
||||||
- Shell-Pilot(Interact with LLM using LocalAI models via pure shell scripts on your Linux or MacOS system) https://github.com/reid41/shell-pilot
|
- Shell-Pilot(Interact with LLM using LocalAI models via pure shell scripts on your Linux or MacOS system) https://github.com/reid41/shell-pilot
|
||||||
- Telegram bot https://github.com/mudler/LocalAI/tree/master/examples/telegram-bot
|
- Telegram bot https://github.com/mudler/LocalAI/tree/master/examples/telegram-bot
|
||||||
|
- Github Actions: https://github.com/marketplace/actions/start-localai
|
||||||
- Examples: https://github.com/mudler/LocalAI/tree/master/examples/
|
- Examples: https://github.com/mudler/LocalAI/tree/master/examples/
|
||||||
|
|
||||||
|
|
||||||
|
@ -52,7 +52,7 @@ $(GRPC_REPO):
|
|||||||
git remote add origin $(GIT_REPO_LIB_GRPC) && \
|
git remote add origin $(GIT_REPO_LIB_GRPC) && \
|
||||||
git fetch origin && \
|
git fetch origin && \
|
||||||
git checkout $(TAG_LIB_GRPC) && \
|
git checkout $(TAG_LIB_GRPC) && \
|
||||||
git submodule update --init --recursive --depth 1
|
git submodule update --init --recursive --depth 1 --single-branch
|
||||||
|
|
||||||
$(GRPC_BUILD): $(GRPC_REPO)
|
$(GRPC_BUILD): $(GRPC_REPO)
|
||||||
mkdir -p $(GRPC_BUILD)
|
mkdir -p $(GRPC_BUILD)
|
||||||
|
@ -52,7 +52,7 @@ llama.cpp:
|
|||||||
git remote add origin $(LLAMA_REPO) && \
|
git remote add origin $(LLAMA_REPO) && \
|
||||||
git fetch origin && \
|
git fetch origin && \
|
||||||
git checkout -b build $(LLAMA_VERSION) && \
|
git checkout -b build $(LLAMA_VERSION) && \
|
||||||
git submodule update --init --recursive --depth 1
|
git submodule update --init --recursive --depth 1 --single-branch
|
||||||
|
|
||||||
llama.cpp/examples/grpc-server: llama.cpp
|
llama.cpp/examples/grpc-server: llama.cpp
|
||||||
mkdir -p llama.cpp/examples/grpc-server
|
mkdir -p llama.cpp/examples/grpc-server
|
||||||
|
@ -2108,6 +2108,7 @@ json parse_options(bool streaming, const backend::PredictOptions* predict, llama
|
|||||||
data["grammar"] = predict->grammar();
|
data["grammar"] = predict->grammar();
|
||||||
data["prompt"] = predict->prompt();
|
data["prompt"] = predict->prompt();
|
||||||
data["ignore_eos"] = predict->ignoreeos();
|
data["ignore_eos"] = predict->ignoreeos();
|
||||||
|
data["embeddings"] = predict->embeddings();
|
||||||
|
|
||||||
// for each image in the request, add the image data
|
// for each image in the request, add the image data
|
||||||
//
|
//
|
||||||
@ -2385,6 +2386,31 @@ public:
|
|||||||
|
|
||||||
return grpc::Status::OK;
|
return grpc::Status::OK;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// https://github.com/ggerganov/llama.cpp/blob/aa2341298924ac89778252015efcb792f2df1e20/examples/server/server.cpp#L2969
|
||||||
|
grpc::Status Embedding(ServerContext* context, const backend::PredictOptions* request, backend::EmbeddingResult* embeddingResult) {
|
||||||
|
json data = parse_options(false, request, llama);
|
||||||
|
const int task_id = llama.queue_tasks.get_new_id();
|
||||||
|
llama.queue_results.add_waiting_task_id(task_id);
|
||||||
|
llama.request_completion(task_id, { {"prompt", data["embeddings"]}, { "n_predict", 0}, {"image_data", ""} }, false, true, -1);
|
||||||
|
// get the result
|
||||||
|
task_result result = llama.queue_results.recv(task_id);
|
||||||
|
//std::cout << "Embedding result JSON" << result.result_json.dump() << std::endl;
|
||||||
|
llama.queue_results.remove_waiting_task_id(task_id);
|
||||||
|
if (!result.error && result.stop) {
|
||||||
|
std::vector<float> embeddings = result.result_json.value("embedding", std::vector<float>());
|
||||||
|
// loop the vector and set the embeddings results
|
||||||
|
for (int i = 0; i < embeddings.size(); i++) {
|
||||||
|
embeddingResult->add_embeddings(embeddings[i]);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
return grpc::Status::OK;
|
||||||
|
}
|
||||||
|
|
||||||
|
return grpc::Status::OK;
|
||||||
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
void RunServer(const std::string& server_address) {
|
void RunServer(const std::string& server_address) {
|
||||||
|
@ -6,9 +6,9 @@ import (
|
|||||||
"fmt"
|
"fmt"
|
||||||
"path/filepath"
|
"path/filepath"
|
||||||
|
|
||||||
pb "github.com/go-skynet/LocalAI/pkg/grpc/proto"
|
|
||||||
"github.com/go-skynet/go-llama.cpp"
|
"github.com/go-skynet/go-llama.cpp"
|
||||||
"github.com/mudler/LocalAI/pkg/grpc/base"
|
"github.com/mudler/LocalAI/pkg/grpc/base"
|
||||||
|
pb "github.com/mudler/LocalAI/pkg/grpc/proto"
|
||||||
)
|
)
|
||||||
|
|
||||||
type LLM struct {
|
type LLM struct {
|
||||||
|
@ -2,4 +2,4 @@
|
|||||||
intel-extension-for-pytorch
|
intel-extension-for-pytorch
|
||||||
torch
|
torch
|
||||||
optimum[openvino]
|
optimum[openvino]
|
||||||
setuptools==69.5.1 # https://github.com/mudler/LocalAI/issues/2406
|
setuptools==70.3.0 # https://github.com/mudler/LocalAI/issues/2406
|
@ -1,6 +1,6 @@
|
|||||||
accelerate
|
accelerate
|
||||||
auto-gptq==0.7.1
|
auto-gptq==0.7.1
|
||||||
grpcio==1.64.0
|
grpcio==1.65.0
|
||||||
protobuf
|
protobuf
|
||||||
torch
|
torch
|
||||||
certifi
|
certifi
|
||||||
|
@ -3,4 +3,4 @@ intel-extension-for-pytorch
|
|||||||
torch
|
torch
|
||||||
torchaudio
|
torchaudio
|
||||||
optimum[openvino]
|
optimum[openvino]
|
||||||
setuptools==69.5.1 # https://github.com/mudler/LocalAI/issues/2406
|
setuptools==70.3.0 # https://github.com/mudler/LocalAI/issues/2406
|
@ -1,6 +1,6 @@
|
|||||||
accelerate
|
accelerate
|
||||||
bark==0.1.5
|
bark==0.1.5
|
||||||
grpcio==1.64.0
|
grpcio==1.65.0
|
||||||
protobuf
|
protobuf
|
||||||
certifi
|
certifi
|
||||||
transformers
|
transformers
|
@ -1,2 +1,2 @@
|
|||||||
grpcio==1.64.0
|
grpcio==1.65.0
|
||||||
protobuf
|
protobuf
|
@ -3,4 +3,4 @@ intel-extension-for-pytorch
|
|||||||
torch
|
torch
|
||||||
torchaudio
|
torchaudio
|
||||||
optimum[openvino]
|
optimum[openvino]
|
||||||
setuptools==69.5.1 # https://github.com/mudler/LocalAI/issues/2406
|
setuptools==70.3.0 # https://github.com/mudler/LocalAI/issues/2406
|
@ -1,6 +1,6 @@
|
|||||||
accelerate
|
accelerate
|
||||||
TTS==0.22.0
|
TTS==0.22.0
|
||||||
grpcio==1.64.0
|
grpcio==1.65.0
|
||||||
protobuf
|
protobuf
|
||||||
certifi
|
certifi
|
||||||
transformers
|
transformers
|
@ -1,6 +1,6 @@
|
|||||||
#!/usr/bin/env python3
|
#!/usr/bin/env python3
|
||||||
from concurrent import futures
|
from concurrent import futures
|
||||||
|
import traceback
|
||||||
import argparse
|
import argparse
|
||||||
from collections import defaultdict
|
from collections import defaultdict
|
||||||
from enum import Enum
|
from enum import Enum
|
||||||
@ -17,35 +17,39 @@ import backend_pb2_grpc
|
|||||||
|
|
||||||
import grpc
|
import grpc
|
||||||
|
|
||||||
from diffusers import StableDiffusion3Pipeline, StableDiffusionXLPipeline, StableDiffusionDepth2ImgPipeline, DPMSolverMultistepScheduler, StableDiffusionPipeline, DiffusionPipeline, EulerAncestralDiscreteScheduler
|
from diffusers import StableDiffusion3Pipeline, StableDiffusionXLPipeline, StableDiffusionDepth2ImgPipeline, DPMSolverMultistepScheduler, StableDiffusionPipeline, DiffusionPipeline, \
|
||||||
|
EulerAncestralDiscreteScheduler
|
||||||
from diffusers import StableDiffusionImg2ImgPipeline, AutoPipelineForText2Image, ControlNetModel, StableVideoDiffusionPipeline
|
from diffusers import StableDiffusionImg2ImgPipeline, AutoPipelineForText2Image, ControlNetModel, StableVideoDiffusionPipeline
|
||||||
from diffusers.pipelines.stable_diffusion import safety_checker
|
from diffusers.pipelines.stable_diffusion import safety_checker
|
||||||
from diffusers.utils import load_image,export_to_video
|
from diffusers.utils import load_image, export_to_video
|
||||||
from compel import Compel, ReturnedEmbeddingsType
|
from compel import Compel, ReturnedEmbeddingsType
|
||||||
|
|
||||||
from transformers import CLIPTextModel
|
from transformers import CLIPTextModel
|
||||||
from safetensors.torch import load_file
|
from safetensors.torch import load_file
|
||||||
|
|
||||||
|
|
||||||
_ONE_DAY_IN_SECONDS = 60 * 60 * 24
|
_ONE_DAY_IN_SECONDS = 60 * 60 * 24
|
||||||
COMPEL=os.environ.get("COMPEL", "0") == "1"
|
COMPEL = os.environ.get("COMPEL", "0") == "1"
|
||||||
XPU=os.environ.get("XPU", "0") == "1"
|
XPU = os.environ.get("XPU", "0") == "1"
|
||||||
CLIPSKIP=os.environ.get("CLIPSKIP", "1") == "1"
|
CLIPSKIP = os.environ.get("CLIPSKIP", "1") == "1"
|
||||||
SAFETENSORS=os.environ.get("SAFETENSORS", "1") == "1"
|
SAFETENSORS = os.environ.get("SAFETENSORS", "1") == "1"
|
||||||
CHUNK_SIZE=os.environ.get("CHUNK_SIZE", "8")
|
CHUNK_SIZE = os.environ.get("CHUNK_SIZE", "8")
|
||||||
FPS=os.environ.get("FPS", "7")
|
FPS = os.environ.get("FPS", "7")
|
||||||
DISABLE_CPU_OFFLOAD=os.environ.get("DISABLE_CPU_OFFLOAD", "0") == "1"
|
DISABLE_CPU_OFFLOAD = os.environ.get("DISABLE_CPU_OFFLOAD", "0") == "1"
|
||||||
FRAMES=os.environ.get("FRAMES", "64")
|
FRAMES = os.environ.get("FRAMES", "64")
|
||||||
|
|
||||||
if XPU:
|
if XPU:
|
||||||
import intel_extension_for_pytorch as ipex
|
import intel_extension_for_pytorch as ipex
|
||||||
|
|
||||||
print(ipex.xpu.get_device_name(0))
|
print(ipex.xpu.get_device_name(0))
|
||||||
|
|
||||||
# If MAX_WORKERS are specified in the environment use it, otherwise default to 1
|
# If MAX_WORKERS are specified in the environment use it, otherwise default to 1
|
||||||
MAX_WORKERS = int(os.environ.get('PYTHON_GRPC_MAX_WORKERS', '1'))
|
MAX_WORKERS = int(os.environ.get('PYTHON_GRPC_MAX_WORKERS', '1'))
|
||||||
|
|
||||||
|
|
||||||
# https://github.com/CompVis/stable-diffusion/issues/239#issuecomment-1627615287
|
# https://github.com/CompVis/stable-diffusion/issues/239#issuecomment-1627615287
|
||||||
def sc(self, clip_input, images) : return images, [False for i in images]
|
def sc(self, clip_input, images): return images, [False for i in images]
|
||||||
|
|
||||||
|
|
||||||
# edit the StableDiffusionSafetyChecker class so that, when called, it just returns the images and an array of True values
|
# edit the StableDiffusionSafetyChecker class so that, when called, it just returns the images and an array of True values
|
||||||
safety_checker.StableDiffusionSafetyChecker.forward = sc
|
safety_checker.StableDiffusionSafetyChecker.forward = sc
|
||||||
|
|
||||||
@ -62,6 +66,8 @@ from diffusers.schedulers import (
|
|||||||
PNDMScheduler,
|
PNDMScheduler,
|
||||||
UniPCMultistepScheduler,
|
UniPCMultistepScheduler,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
# The scheduler list mapping was taken from here: https://github.com/neggles/animatediff-cli/blob/6f336f5f4b5e38e85d7f06f1744ef42d0a45f2a7/src/animatediff/schedulers.py#L39
|
# The scheduler list mapping was taken from here: https://github.com/neggles/animatediff-cli/blob/6f336f5f4b5e38e85d7f06f1744ef42d0a45f2a7/src/animatediff/schedulers.py#L39
|
||||||
# Credits to https://github.com/neggles
|
# Credits to https://github.com/neggles
|
||||||
# See https://github.com/huggingface/diffusers/issues/4167 for more details on sched mapping from A1111
|
# See https://github.com/huggingface/diffusers/issues/4167 for more details on sched mapping from A1111
|
||||||
@ -136,10 +142,12 @@ def get_scheduler(name: str, config: dict = {}):
|
|||||||
|
|
||||||
return sched_class.from_config(config)
|
return sched_class.from_config(config)
|
||||||
|
|
||||||
|
|
||||||
# Implement the BackendServicer class with the service methods
|
# Implement the BackendServicer class with the service methods
|
||||||
class BackendServicer(backend_pb2_grpc.BackendServicer):
|
class BackendServicer(backend_pb2_grpc.BackendServicer):
|
||||||
def Health(self, request, context):
|
def Health(self, request, context):
|
||||||
return backend_pb2.Reply(message=bytes("OK", 'utf-8'))
|
return backend_pb2.Reply(message=bytes("OK", 'utf-8'))
|
||||||
|
|
||||||
def LoadModel(self, request, context):
|
def LoadModel(self, request, context):
|
||||||
try:
|
try:
|
||||||
print(f"Loading model {request.Model}...", file=sys.stderr)
|
print(f"Loading model {request.Model}...", file=sys.stderr)
|
||||||
@ -149,7 +157,7 @@ class BackendServicer(backend_pb2_grpc.BackendServicer):
|
|||||||
|
|
||||||
if request.F16Memory:
|
if request.F16Memory:
|
||||||
torchType = torch.float16
|
torchType = torch.float16
|
||||||
variant="fp16"
|
variant = "fp16"
|
||||||
|
|
||||||
local = False
|
local = False
|
||||||
modelFile = request.Model
|
modelFile = request.Model
|
||||||
@ -157,38 +165,38 @@ class BackendServicer(backend_pb2_grpc.BackendServicer):
|
|||||||
self.cfg_scale = 7
|
self.cfg_scale = 7
|
||||||
if request.CFGScale != 0:
|
if request.CFGScale != 0:
|
||||||
self.cfg_scale = request.CFGScale
|
self.cfg_scale = request.CFGScale
|
||||||
|
|
||||||
clipmodel = "runwayml/stable-diffusion-v1-5"
|
clipmodel = "runwayml/stable-diffusion-v1-5"
|
||||||
if request.CLIPModel != "":
|
if request.CLIPModel != "":
|
||||||
clipmodel = request.CLIPModel
|
clipmodel = request.CLIPModel
|
||||||
clipsubfolder = "text_encoder"
|
clipsubfolder = "text_encoder"
|
||||||
if request.CLIPSubfolder != "":
|
if request.CLIPSubfolder != "":
|
||||||
clipsubfolder = request.CLIPSubfolder
|
clipsubfolder = request.CLIPSubfolder
|
||||||
|
|
||||||
# Check if ModelFile exists
|
# Check if ModelFile exists
|
||||||
if request.ModelFile != "":
|
if request.ModelFile != "":
|
||||||
if os.path.exists(request.ModelFile):
|
if os.path.exists(request.ModelFile):
|
||||||
local = True
|
local = True
|
||||||
modelFile = request.ModelFile
|
modelFile = request.ModelFile
|
||||||
|
|
||||||
fromSingleFile = request.Model.startswith("http") or request.Model.startswith("/") or local
|
fromSingleFile = request.Model.startswith("http") or request.Model.startswith("/") or local
|
||||||
self.img2vid=False
|
self.img2vid = False
|
||||||
self.txt2vid=False
|
self.txt2vid = False
|
||||||
## img2img
|
## img2img
|
||||||
if (request.PipelineType == "StableDiffusionImg2ImgPipeline") or (request.IMG2IMG and request.PipelineType == ""):
|
if (request.PipelineType == "StableDiffusionImg2ImgPipeline") or (request.IMG2IMG and request.PipelineType == ""):
|
||||||
if fromSingleFile:
|
if fromSingleFile:
|
||||||
self.pipe = StableDiffusionImg2ImgPipeline.from_single_file(modelFile,
|
self.pipe = StableDiffusionImg2ImgPipeline.from_single_file(modelFile,
|
||||||
torch_dtype=torchType)
|
torch_dtype=torchType)
|
||||||
else:
|
else:
|
||||||
self.pipe = StableDiffusionImg2ImgPipeline.from_pretrained(request.Model,
|
self.pipe = StableDiffusionImg2ImgPipeline.from_pretrained(request.Model,
|
||||||
torch_dtype=torchType)
|
torch_dtype=torchType)
|
||||||
|
|
||||||
elif request.PipelineType == "StableDiffusionDepth2ImgPipeline":
|
elif request.PipelineType == "StableDiffusionDepth2ImgPipeline":
|
||||||
self.pipe = StableDiffusionDepth2ImgPipeline.from_pretrained(request.Model,
|
self.pipe = StableDiffusionDepth2ImgPipeline.from_pretrained(request.Model,
|
||||||
torch_dtype=torchType)
|
torch_dtype=torchType)
|
||||||
## img2vid
|
## img2vid
|
||||||
elif request.PipelineType == "StableVideoDiffusionPipeline":
|
elif request.PipelineType == "StableVideoDiffusionPipeline":
|
||||||
self.img2vid=True
|
self.img2vid = True
|
||||||
self.pipe = StableVideoDiffusionPipeline.from_pretrained(
|
self.pipe = StableVideoDiffusionPipeline.from_pretrained(
|
||||||
request.Model, torch_dtype=torchType, variant=variant
|
request.Model, torch_dtype=torchType, variant=variant
|
||||||
)
|
)
|
||||||
@ -197,64 +205,63 @@ class BackendServicer(backend_pb2_grpc.BackendServicer):
|
|||||||
## text2img
|
## text2img
|
||||||
elif request.PipelineType == "AutoPipelineForText2Image" or request.PipelineType == "":
|
elif request.PipelineType == "AutoPipelineForText2Image" or request.PipelineType == "":
|
||||||
self.pipe = AutoPipelineForText2Image.from_pretrained(request.Model,
|
self.pipe = AutoPipelineForText2Image.from_pretrained(request.Model,
|
||||||
torch_dtype=torchType,
|
torch_dtype=torchType,
|
||||||
use_safetensors=SAFETENSORS,
|
use_safetensors=SAFETENSORS,
|
||||||
variant=variant)
|
variant=variant)
|
||||||
elif request.PipelineType == "StableDiffusionPipeline":
|
elif request.PipelineType == "StableDiffusionPipeline":
|
||||||
if fromSingleFile:
|
if fromSingleFile:
|
||||||
self.pipe = StableDiffusionPipeline.from_single_file(modelFile,
|
self.pipe = StableDiffusionPipeline.from_single_file(modelFile,
|
||||||
torch_dtype=torchType)
|
torch_dtype=torchType)
|
||||||
else:
|
else:
|
||||||
self.pipe = StableDiffusionPipeline.from_pretrained(request.Model,
|
self.pipe = StableDiffusionPipeline.from_pretrained(request.Model,
|
||||||
torch_dtype=torchType)
|
torch_dtype=torchType)
|
||||||
elif request.PipelineType == "DiffusionPipeline":
|
elif request.PipelineType == "DiffusionPipeline":
|
||||||
self.pipe = DiffusionPipeline.from_pretrained(request.Model,
|
self.pipe = DiffusionPipeline.from_pretrained(request.Model,
|
||||||
torch_dtype=torchType)
|
torch_dtype=torchType)
|
||||||
elif request.PipelineType == "VideoDiffusionPipeline":
|
elif request.PipelineType == "VideoDiffusionPipeline":
|
||||||
self.txt2vid=True
|
self.txt2vid = True
|
||||||
self.pipe = DiffusionPipeline.from_pretrained(request.Model,
|
self.pipe = DiffusionPipeline.from_pretrained(request.Model,
|
||||||
torch_dtype=torchType)
|
torch_dtype=torchType)
|
||||||
elif request.PipelineType == "StableDiffusionXLPipeline":
|
elif request.PipelineType == "StableDiffusionXLPipeline":
|
||||||
if fromSingleFile:
|
if fromSingleFile:
|
||||||
self.pipe = StableDiffusionXLPipeline.from_single_file(modelFile,
|
self.pipe = StableDiffusionXLPipeline.from_single_file(modelFile,
|
||||||
torch_dtype=torchType,
|
torch_dtype=torchType,
|
||||||
use_safetensors=True)
|
use_safetensors=True)
|
||||||
else:
|
else:
|
||||||
self.pipe = StableDiffusionXLPipeline.from_pretrained(
|
self.pipe = StableDiffusionXLPipeline.from_pretrained(
|
||||||
request.Model,
|
request.Model,
|
||||||
torch_dtype=torchType,
|
torch_dtype=torchType,
|
||||||
use_safetensors=True,
|
use_safetensors=True,
|
||||||
variant=variant)
|
variant=variant)
|
||||||
elif request.PipelineType == "StableDiffusion3Pipeline":
|
elif request.PipelineType == "StableDiffusion3Pipeline":
|
||||||
if fromSingleFile:
|
if fromSingleFile:
|
||||||
self.pipe = StableDiffusion3Pipeline.from_single_file(modelFile,
|
self.pipe = StableDiffusion3Pipeline.from_single_file(modelFile,
|
||||||
torch_dtype=torchType,
|
torch_dtype=torchType,
|
||||||
use_safetensors=True)
|
use_safetensors=True)
|
||||||
else:
|
else:
|
||||||
self.pipe = StableDiffusion3Pipeline.from_pretrained(
|
self.pipe = StableDiffusion3Pipeline.from_pretrained(
|
||||||
request.Model,
|
request.Model,
|
||||||
torch_dtype=torchType,
|
torch_dtype=torchType,
|
||||||
use_safetensors=True,
|
use_safetensors=True,
|
||||||
variant=variant)
|
variant=variant)
|
||||||
|
|
||||||
if CLIPSKIP and request.CLIPSkip != 0:
|
if CLIPSKIP and request.CLIPSkip != 0:
|
||||||
self.clip_skip = request.CLIPSkip
|
self.clip_skip = request.CLIPSkip
|
||||||
else:
|
else:
|
||||||
self.clip_skip = 0
|
self.clip_skip = 0
|
||||||
|
|
||||||
# torch_dtype needs to be customized. float16 for GPU, float32 for CPU
|
# torch_dtype needs to be customized. float16 for GPU, float32 for CPU
|
||||||
# TODO: this needs to be customized
|
# TODO: this needs to be customized
|
||||||
if request.SchedulerType != "":
|
if request.SchedulerType != "":
|
||||||
self.pipe.scheduler = get_scheduler(request.SchedulerType, self.pipe.scheduler.config)
|
self.pipe.scheduler = get_scheduler(request.SchedulerType, self.pipe.scheduler.config)
|
||||||
|
|
||||||
if COMPEL:
|
if COMPEL:
|
||||||
self.compel = Compel(
|
self.compel = Compel(
|
||||||
tokenizer=[self.pipe.tokenizer, self.pipe.tokenizer_2 ],
|
tokenizer=[self.pipe.tokenizer, self.pipe.tokenizer_2],
|
||||||
text_encoder=[self.pipe.text_encoder, self.pipe.text_encoder_2],
|
text_encoder=[self.pipe.text_encoder, self.pipe.text_encoder_2],
|
||||||
returned_embeddings_type=ReturnedEmbeddingsType.PENULTIMATE_HIDDEN_STATES_NON_NORMALIZED,
|
returned_embeddings_type=ReturnedEmbeddingsType.PENULTIMATE_HIDDEN_STATES_NON_NORMALIZED,
|
||||||
requires_pooled=[False, True]
|
requires_pooled=[False, True]
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
if request.ControlNet:
|
if request.ControlNet:
|
||||||
self.controlnet = ControlNetModel.from_pretrained(
|
self.controlnet = ControlNetModel.from_pretrained(
|
||||||
@ -263,13 +270,6 @@ class BackendServicer(backend_pb2_grpc.BackendServicer):
|
|||||||
self.pipe.controlnet = self.controlnet
|
self.pipe.controlnet = self.controlnet
|
||||||
else:
|
else:
|
||||||
self.controlnet = None
|
self.controlnet = None
|
||||||
|
|
||||||
if request.CUDA:
|
|
||||||
self.pipe.to('cuda')
|
|
||||||
if self.controlnet:
|
|
||||||
self.controlnet.to('cuda')
|
|
||||||
if XPU:
|
|
||||||
self.pipe = self.pipe.to("xpu")
|
|
||||||
# Assume directory from request.ModelFile.
|
# Assume directory from request.ModelFile.
|
||||||
# Only if request.LoraAdapter it's not an absolute path
|
# Only if request.LoraAdapter it's not an absolute path
|
||||||
if request.LoraAdapter and request.ModelFile != "" and not os.path.isabs(request.LoraAdapter) and request.LoraAdapter:
|
if request.LoraAdapter and request.ModelFile != "" and not os.path.isabs(request.LoraAdapter) and request.LoraAdapter:
|
||||||
@ -282,10 +282,17 @@ class BackendServicer(backend_pb2_grpc.BackendServicer):
|
|||||||
if request.LoraAdapter:
|
if request.LoraAdapter:
|
||||||
# Check if its a local file and not a directory ( we load lora differently for a safetensor file )
|
# Check if its a local file and not a directory ( we load lora differently for a safetensor file )
|
||||||
if os.path.exists(request.LoraAdapter) and not os.path.isdir(request.LoraAdapter):
|
if os.path.exists(request.LoraAdapter) and not os.path.isdir(request.LoraAdapter):
|
||||||
self.load_lora_weights(request.LoraAdapter, 1, device, torchType)
|
# self.load_lora_weights(request.LoraAdapter, 1, device, torchType)
|
||||||
|
self.pipe.load_lora_weights(request.LoraAdapter)
|
||||||
else:
|
else:
|
||||||
self.pipe.unet.load_attn_procs(request.LoraAdapter)
|
self.pipe.unet.load_attn_procs(request.LoraAdapter)
|
||||||
|
|
||||||
|
if request.CUDA:
|
||||||
|
self.pipe.to('cuda')
|
||||||
|
if self.controlnet:
|
||||||
|
self.controlnet.to('cuda')
|
||||||
|
if XPU:
|
||||||
|
self.pipe = self.pipe.to("xpu")
|
||||||
except Exception as err:
|
except Exception as err:
|
||||||
return backend_pb2.Result(success=False, message=f"Unexpected {err=}, {type(err)=}")
|
return backend_pb2.Result(success=False, message=f"Unexpected {err=}, {type(err)=}")
|
||||||
# Implement your logic here for the LoadModel service
|
# Implement your logic here for the LoadModel service
|
||||||
@ -358,9 +365,9 @@ class BackendServicer(backend_pb2_grpc.BackendServicer):
|
|||||||
|
|
||||||
# create a dictionary of values for the parameters
|
# create a dictionary of values for the parameters
|
||||||
options = {
|
options = {
|
||||||
"negative_prompt": request.negative_prompt,
|
"negative_prompt": request.negative_prompt,
|
||||||
"width": request.width,
|
"width": request.width,
|
||||||
"height": request.height,
|
"height": request.height,
|
||||||
"num_inference_steps": steps,
|
"num_inference_steps": steps,
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -372,7 +379,7 @@ class BackendServicer(backend_pb2_grpc.BackendServicer):
|
|||||||
options["image"] = pose_image
|
options["image"] = pose_image
|
||||||
|
|
||||||
if CLIPSKIP and self.clip_skip != 0:
|
if CLIPSKIP and self.clip_skip != 0:
|
||||||
options["clip_skip"]=self.clip_skip
|
options["clip_skip"] = self.clip_skip
|
||||||
|
|
||||||
# Get the keys that we will build the args for our pipe for
|
# Get the keys that we will build the args for our pipe for
|
||||||
keys = options.keys()
|
keys = options.keys()
|
||||||
@ -416,20 +423,21 @@ class BackendServicer(backend_pb2_grpc.BackendServicer):
|
|||||||
image = self.pipe(
|
image = self.pipe(
|
||||||
guidance_scale=self.cfg_scale,
|
guidance_scale=self.cfg_scale,
|
||||||
**kwargs
|
**kwargs
|
||||||
).images[0]
|
).images[0]
|
||||||
else:
|
else:
|
||||||
# pass the kwargs dictionary to the self.pipe method
|
# pass the kwargs dictionary to the self.pipe method
|
||||||
image = self.pipe(
|
image = self.pipe(
|
||||||
prompt,
|
prompt,
|
||||||
guidance_scale=self.cfg_scale,
|
guidance_scale=self.cfg_scale,
|
||||||
**kwargs
|
**kwargs
|
||||||
).images[0]
|
).images[0]
|
||||||
|
|
||||||
# save the result
|
# save the result
|
||||||
image.save(request.dst)
|
image.save(request.dst)
|
||||||
|
|
||||||
return backend_pb2.Result(message="Media generated", success=True)
|
return backend_pb2.Result(message="Media generated", success=True)
|
||||||
|
|
||||||
|
|
||||||
def serve(address):
|
def serve(address):
|
||||||
server = grpc.server(futures.ThreadPoolExecutor(max_workers=MAX_WORKERS))
|
server = grpc.server(futures.ThreadPoolExecutor(max_workers=MAX_WORKERS))
|
||||||
backend_pb2_grpc.add_BackendServicer_to_server(BackendServicer(), server)
|
backend_pb2_grpc.add_BackendServicer_to_server(BackendServicer(), server)
|
||||||
@ -453,6 +461,7 @@ def serve(address):
|
|||||||
except KeyboardInterrupt:
|
except KeyboardInterrupt:
|
||||||
server.stop(0)
|
server.stop(0)
|
||||||
|
|
||||||
|
|
||||||
if __name__ == "__main__":
|
if __name__ == "__main__":
|
||||||
parser = argparse.ArgumentParser(description="Run the gRPC server.")
|
parser = argparse.ArgumentParser(description="Run the gRPC server.")
|
||||||
parser.add_argument(
|
parser.add_argument(
|
||||||
@ -460,4 +469,4 @@ if __name__ == "__main__":
|
|||||||
)
|
)
|
||||||
args = parser.parse_args()
|
args = parser.parse_args()
|
||||||
|
|
||||||
serve(args.addr)
|
serve(args.addr)
|
||||||
|
@ -3,4 +3,4 @@ intel-extension-for-pytorch
|
|||||||
torch
|
torch
|
||||||
torchvision
|
torchvision
|
||||||
optimum[openvino]
|
optimum[openvino]
|
||||||
setuptools==69.5.1 # https://github.com/mudler/LocalAI/issues/2406
|
setuptools==70.3.0 # https://github.com/mudler/LocalAI/issues/2406
|
@ -1,7 +1,9 @@
|
|||||||
|
setuptools
|
||||||
accelerate
|
accelerate
|
||||||
compel
|
compel
|
||||||
|
peft
|
||||||
diffusers
|
diffusers
|
||||||
grpcio==1.64.0
|
grpcio==1.65.0
|
||||||
opencv-python
|
opencv-python
|
||||||
pillow
|
pillow
|
||||||
protobuf
|
protobuf
|
||||||
|
@ -1,4 +1,4 @@
|
|||||||
grpcio==1.64.0
|
grpcio==1.65.0
|
||||||
protobuf
|
protobuf
|
||||||
torch
|
torch
|
||||||
transformers
|
transformers
|
||||||
|
@ -1,5 +1,5 @@
|
|||||||
accelerate
|
accelerate
|
||||||
grpcio==1.64.0
|
grpcio==1.65.0
|
||||||
protobuf
|
protobuf
|
||||||
certifi
|
certifi
|
||||||
torch
|
torch
|
||||||
|
@ -4,4 +4,4 @@
|
|||||||
packaging
|
packaging
|
||||||
setuptools
|
setuptools
|
||||||
wheel
|
wheel
|
||||||
torch==2.2.0
|
torch==2.3.1
|
@ -1,6 +1,6 @@
|
|||||||
causal-conv1d==1.2.0.post2
|
causal-conv1d==1.4.0
|
||||||
mamba-ssm==1.2.0.post1
|
mamba-ssm==2.2.2
|
||||||
grpcio==1.64.0
|
grpcio==1.65.0
|
||||||
protobuf
|
protobuf
|
||||||
certifi
|
certifi
|
||||||
transformers
|
transformers
|
@ -2,22 +2,22 @@
|
|||||||
intel-extension-for-pytorch
|
intel-extension-for-pytorch
|
||||||
torch
|
torch
|
||||||
optimum[openvino]
|
optimum[openvino]
|
||||||
grpcio==1.64.0
|
grpcio==1.64.1
|
||||||
protobuf
|
protobuf
|
||||||
librosa==0.9.1
|
librosa==0.9.1
|
||||||
faster-whisper==0.9.0
|
faster-whisper==1.0.3
|
||||||
pydub==0.25.1
|
pydub==0.25.1
|
||||||
wavmark==0.0.3
|
wavmark==0.0.3
|
||||||
numpy==1.22.0
|
numpy==1.26.4
|
||||||
eng_to_ipa==0.0.2
|
eng_to_ipa==0.0.2
|
||||||
inflect==7.0.0
|
inflect==7.0.0
|
||||||
unidecode==1.3.7
|
unidecode==1.3.7
|
||||||
whisper-timestamped==1.14.2
|
whisper-timestamped==1.15.4
|
||||||
openai
|
openai
|
||||||
python-dotenv
|
python-dotenv
|
||||||
pypinyin==0.50.0
|
pypinyin==0.50.0
|
||||||
cn2an==0.5.22
|
cn2an==0.5.22
|
||||||
jieba==0.42.1
|
jieba==0.42.1
|
||||||
gradio==3.48.0
|
gradio==4.38.1
|
||||||
langid==1.1.6
|
langid==1.1.6
|
||||||
git+https://github.com/myshell-ai/MeloTTS.git
|
git+https://github.com/myshell-ai/MeloTTS.git
|
||||||
|
@ -1,20 +1,20 @@
|
|||||||
grpcio==1.64.0
|
grpcio==1.65.0
|
||||||
protobuf
|
protobuf
|
||||||
librosa==0.9.1
|
librosa
|
||||||
faster-whisper==0.9.0
|
faster-whisper
|
||||||
pydub==0.25.1
|
pydub==0.25.1
|
||||||
wavmark==0.0.3
|
wavmark==0.0.3
|
||||||
numpy==1.22.0
|
numpy
|
||||||
eng_to_ipa==0.0.2
|
eng_to_ipa==0.0.2
|
||||||
inflect==7.0.0
|
inflect
|
||||||
unidecode==1.3.7
|
unidecode
|
||||||
whisper-timestamped==1.14.2
|
whisper-timestamped
|
||||||
openai
|
openai
|
||||||
python-dotenv
|
python-dotenv
|
||||||
pypinyin==0.50.0
|
pypinyin
|
||||||
cn2an==0.5.22
|
cn2an==0.5.22
|
||||||
jieba==0.42.1
|
jieba==0.42.1
|
||||||
gradio==3.48.0
|
gradio
|
||||||
langid==1.1.6
|
langid==1.1.6
|
||||||
git+https://github.com/myshell-ai/MeloTTS.git
|
git+https://github.com/myshell-ai/MeloTTS.git
|
||||||
git+https://github.com/myshell-ai/OpenVoice.git
|
git+https://github.com/myshell-ai/OpenVoice.git
|
||||||
|
@ -3,4 +3,4 @@ intel-extension-for-pytorch
|
|||||||
torch
|
torch
|
||||||
torchaudio
|
torchaudio
|
||||||
optimum[openvino]
|
optimum[openvino]
|
||||||
setuptools==69.5.1 # https://github.com/mudler/LocalAI/issues/2406
|
setuptools==70.3.0 # https://github.com/mudler/LocalAI/issues/2406
|
@ -1,5 +1,5 @@
|
|||||||
accelerate
|
accelerate
|
||||||
grpcio==1.64.0
|
grpcio==1.65.0
|
||||||
protobuf
|
protobuf
|
||||||
torch
|
torch
|
||||||
git+https://github.com/huggingface/parler-tts.git@10016fb0300c0dc31a0fb70e26f3affee7b62f16
|
git+https://github.com/huggingface/parler-tts.git@10016fb0300c0dc31a0fb70e26f3affee7b62f16
|
||||||
|
@ -2,4 +2,4 @@
|
|||||||
intel-extension-for-pytorch
|
intel-extension-for-pytorch
|
||||||
torch
|
torch
|
||||||
optimum[openvino]
|
optimum[openvino]
|
||||||
setuptools==69.5.1 # https://github.com/mudler/LocalAI/issues/2406
|
setuptools==70.3.0 # https://github.com/mudler/LocalAI/issues/2406
|
@ -2,4 +2,4 @@
|
|||||||
intel-extension-for-pytorch
|
intel-extension-for-pytorch
|
||||||
torch
|
torch
|
||||||
optimum[openvino]
|
optimum[openvino]
|
||||||
setuptools==69.5.1 # https://github.com/mudler/LocalAI/issues/2406
|
setuptools==70.3.0 # https://github.com/mudler/LocalAI/issues/2406
|
@ -1,6 +1,6 @@
|
|||||||
accelerate
|
accelerate
|
||||||
rerankers[transformers]
|
rerankers[transformers]
|
||||||
grpcio==1.64.0
|
grpcio==1.65.0
|
||||||
protobuf
|
protobuf
|
||||||
certifi
|
certifi
|
||||||
transformers
|
transformers
|
@ -2,4 +2,4 @@
|
|||||||
intel-extension-for-pytorch
|
intel-extension-for-pytorch
|
||||||
torch
|
torch
|
||||||
optimum[openvino]
|
optimum[openvino]
|
||||||
setuptools==69.5.1 # https://github.com/mudler/LocalAI/issues/2406
|
setuptools==70.3.0 # https://github.com/mudler/LocalAI/issues/2406
|
@ -1,6 +1,6 @@
|
|||||||
accelerate
|
accelerate
|
||||||
sentence-transformers==2.5.1
|
sentence-transformers==3.0.1
|
||||||
transformers
|
transformers
|
||||||
grpcio==1.64.0
|
grpcio==1.65.0
|
||||||
protobuf
|
protobuf
|
||||||
certifi
|
certifi
|
@ -2,4 +2,4 @@
|
|||||||
intel-extension-for-pytorch
|
intel-extension-for-pytorch
|
||||||
torch
|
torch
|
||||||
optimum[openvino]
|
optimum[openvino]
|
||||||
setuptools==69.5.1 # https://github.com/mudler/LocalAI/issues/2406
|
setuptools==70.3.0 # https://github.com/mudler/LocalAI/issues/2406
|
@ -1,7 +1,7 @@
|
|||||||
accelerate
|
accelerate
|
||||||
transformers
|
transformers
|
||||||
grpcio==1.64.0
|
grpcio==1.65.0
|
||||||
protobuf
|
protobuf
|
||||||
torch
|
torch
|
||||||
scipy==1.13.0
|
scipy==1.14.0
|
||||||
certifi
|
certifi
|
@ -2,4 +2,4 @@
|
|||||||
intel-extension-for-pytorch
|
intel-extension-for-pytorch
|
||||||
torch
|
torch
|
||||||
optimum[openvino]
|
optimum[openvino]
|
||||||
setuptools==69.5.1 # https://github.com/mudler/LocalAI/issues/2406
|
setuptools==70.3.0 # https://github.com/mudler/LocalAI/issues/2406
|
@ -1,9 +1,9 @@
|
|||||||
accelerate
|
accelerate
|
||||||
transformers
|
transformers
|
||||||
grpcio==1.64.0
|
grpcio==1.65.0
|
||||||
protobuf
|
protobuf
|
||||||
torch
|
torch
|
||||||
certifi
|
certifi
|
||||||
intel-extension-for-transformers
|
intel-extension-for-transformers
|
||||||
bitsandbytes
|
bitsandbytes
|
||||||
setuptools==69.5.1 # https://github.com/mudler/LocalAI/issues/2406
|
setuptools==70.3.0 # https://github.com/mudler/LocalAI/issues/2406
|
||||||
|
@ -3,4 +3,4 @@ intel-extension-for-pytorch
|
|||||||
torch
|
torch
|
||||||
torchaudio
|
torchaudio
|
||||||
optimum[openvino]
|
optimum[openvino]
|
||||||
setuptools==69.5.1 # https://github.com/mudler/LocalAI/issues/2406
|
setuptools==70.3.0 # https://github.com/mudler/LocalAI/issues/2406
|
@ -1,4 +1,4 @@
|
|||||||
accelerate
|
accelerate
|
||||||
grpcio==1.64.0
|
grpcio==1.65.0
|
||||||
protobuf
|
protobuf
|
||||||
certifi
|
certifi
|
@ -2,4 +2,4 @@
|
|||||||
intel-extension-for-pytorch
|
intel-extension-for-pytorch
|
||||||
torch
|
torch
|
||||||
optimum[openvino]
|
optimum[openvino]
|
||||||
setuptools==69.5.1 # https://github.com/mudler/LocalAI/issues/2406
|
setuptools==70.3.0 # https://github.com/mudler/LocalAI/issues/2406
|
@ -1,6 +1,6 @@
|
|||||||
accelerate
|
accelerate
|
||||||
vllm
|
vllm
|
||||||
grpcio==1.64.0
|
grpcio==1.65.0
|
||||||
protobuf
|
protobuf
|
||||||
certifi
|
certifi
|
||||||
transformers
|
transformers
|
||||||
|
@ -91,7 +91,7 @@ func gRPCModelOpts(c config.BackendConfig) *pb.ModelOptions {
|
|||||||
Type: c.ModelType,
|
Type: c.ModelType,
|
||||||
RopeFreqScale: c.RopeFreqScale,
|
RopeFreqScale: c.RopeFreqScale,
|
||||||
NUMA: c.NUMA,
|
NUMA: c.NUMA,
|
||||||
Embeddings: c.Embeddings,
|
Embeddings: *c.Embeddings,
|
||||||
LowVRAM: *c.LowVRAM,
|
LowVRAM: *c.LowVRAM,
|
||||||
NGPULayers: int32(*c.NGPULayers),
|
NGPULayers: int32(*c.NGPULayers),
|
||||||
MMap: *c.MMap,
|
MMap: *c.MMap,
|
||||||
|
@ -2,20 +2,9 @@ package cli
|
|||||||
|
|
||||||
import (
|
import (
|
||||||
"context"
|
"context"
|
||||||
"errors"
|
|
||||||
"fmt"
|
|
||||||
"io"
|
|
||||||
"net"
|
|
||||||
"time"
|
|
||||||
|
|
||||||
"math/rand/v2"
|
|
||||||
|
|
||||||
cliContext "github.com/mudler/LocalAI/core/cli/context"
|
cliContext "github.com/mudler/LocalAI/core/cli/context"
|
||||||
"github.com/mudler/LocalAI/core/p2p"
|
"github.com/mudler/LocalAI/core/p2p"
|
||||||
"github.com/mudler/edgevpn/pkg/node"
|
|
||||||
"github.com/mudler/edgevpn/pkg/protocol"
|
|
||||||
"github.com/mudler/edgevpn/pkg/types"
|
|
||||||
"github.com/rs/zerolog/log"
|
|
||||||
)
|
)
|
||||||
|
|
||||||
type FederatedCLI struct {
|
type FederatedCLI struct {
|
||||||
@ -25,106 +14,7 @@ type FederatedCLI struct {
|
|||||||
|
|
||||||
func (f *FederatedCLI) Run(ctx *cliContext.Context) error {
|
func (f *FederatedCLI) Run(ctx *cliContext.Context) error {
|
||||||
|
|
||||||
n, err := p2p.NewNode(f.Peer2PeerToken)
|
fs := p2p.NewFederatedServer(f.Address, p2p.FederatedID, f.Peer2PeerToken)
|
||||||
if err != nil {
|
|
||||||
return fmt.Errorf("creating a new node: %w", err)
|
|
||||||
}
|
|
||||||
err = n.Start(context.Background())
|
|
||||||
if err != nil {
|
|
||||||
return fmt.Errorf("creating a new node: %w", err)
|
|
||||||
}
|
|
||||||
|
|
||||||
if err := p2p.ServiceDiscoverer(context.Background(), n, f.Peer2PeerToken, p2p.FederatedID, nil); err != nil {
|
return fs.Start(context.Background())
|
||||||
return err
|
|
||||||
}
|
|
||||||
|
|
||||||
return Proxy(context.Background(), n, f.Address, p2p.FederatedID)
|
|
||||||
}
|
|
||||||
|
|
||||||
func Proxy(ctx context.Context, node *node.Node, listenAddr, service string) error {
|
|
||||||
|
|
||||||
log.Info().Msgf("Allocating service '%s' on: %s", service, listenAddr)
|
|
||||||
// Open local port for listening
|
|
||||||
l, err := net.Listen("tcp", listenAddr)
|
|
||||||
if err != nil {
|
|
||||||
log.Error().Err(err).Msg("Error listening")
|
|
||||||
return err
|
|
||||||
}
|
|
||||||
// ll.Info("Binding local port on", srcaddr)
|
|
||||||
|
|
||||||
ledger, _ := node.Ledger()
|
|
||||||
|
|
||||||
// Announce ourselves so nodes accepts our connection
|
|
||||||
ledger.Announce(
|
|
||||||
ctx,
|
|
||||||
10*time.Second,
|
|
||||||
func() {
|
|
||||||
// Retrieve current ID for ip in the blockchain
|
|
||||||
//_, found := ledger.GetKey(protocol.UsersLedgerKey, node.Host().ID().String())
|
|
||||||
// If mismatch, update the blockchain
|
|
||||||
//if !found {
|
|
||||||
updatedMap := map[string]interface{}{}
|
|
||||||
updatedMap[node.Host().ID().String()] = &types.User{
|
|
||||||
PeerID: node.Host().ID().String(),
|
|
||||||
Timestamp: time.Now().String(),
|
|
||||||
}
|
|
||||||
ledger.Add(protocol.UsersLedgerKey, updatedMap)
|
|
||||||
// }
|
|
||||||
},
|
|
||||||
)
|
|
||||||
|
|
||||||
defer l.Close()
|
|
||||||
for {
|
|
||||||
select {
|
|
||||||
case <-ctx.Done():
|
|
||||||
return errors.New("context canceled")
|
|
||||||
default:
|
|
||||||
log.Debug().Msg("New for connection")
|
|
||||||
// Listen for an incoming connection.
|
|
||||||
conn, err := l.Accept()
|
|
||||||
if err != nil {
|
|
||||||
fmt.Println("Error accepting: ", err.Error())
|
|
||||||
continue
|
|
||||||
}
|
|
||||||
|
|
||||||
// Handle connections in a new goroutine, forwarding to the p2p service
|
|
||||||
go func() {
|
|
||||||
var tunnelAddresses []string
|
|
||||||
for _, v := range p2p.GetAvailableNodes(p2p.FederatedID) {
|
|
||||||
if v.IsOnline() {
|
|
||||||
tunnelAddresses = append(tunnelAddresses, v.TunnelAddress)
|
|
||||||
} else {
|
|
||||||
log.Info().Msgf("Node %s is offline", v.ID)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// open a TCP stream to one of the tunnels
|
|
||||||
// chosen randomly
|
|
||||||
// TODO: optimize this and track usage
|
|
||||||
tunnelAddr := tunnelAddresses[rand.IntN(len(tunnelAddresses))]
|
|
||||||
|
|
||||||
tunnelConn, err := net.Dial("tcp", tunnelAddr)
|
|
||||||
if err != nil {
|
|
||||||
log.Error().Err(err).Msg("Error connecting to tunnel")
|
|
||||||
return
|
|
||||||
}
|
|
||||||
|
|
||||||
log.Info().Msgf("Redirecting %s to %s", conn.LocalAddr().String(), tunnelConn.RemoteAddr().String())
|
|
||||||
closer := make(chan struct{}, 2)
|
|
||||||
go copyStream(closer, tunnelConn, conn)
|
|
||||||
go copyStream(closer, conn, tunnelConn)
|
|
||||||
<-closer
|
|
||||||
|
|
||||||
tunnelConn.Close()
|
|
||||||
conn.Close()
|
|
||||||
// ll.Infof("(service %s) Done handling %s", serviceID, l.Addr().String())
|
|
||||||
}()
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
}
|
|
||||||
|
|
||||||
func copyStream(closer chan struct{}, dst io.Writer, src io.Reader) {
|
|
||||||
defer func() { closer <- struct{}{} }() // connection is closed, send signal to stop proxy
|
|
||||||
io.Copy(dst, src)
|
|
||||||
}
|
}
|
||||||
|
@ -119,7 +119,7 @@ func (r *RunCMD) Run(ctx *cliContext.Context) error {
|
|||||||
}
|
}
|
||||||
|
|
||||||
log.Info().Msg("Starting P2P server discovery...")
|
log.Info().Msg("Starting P2P server discovery...")
|
||||||
if err := p2p.ServiceDiscoverer(context.Background(), node, token, "", func() {
|
if err := p2p.ServiceDiscoverer(context.Background(), node, token, "", func(serviceID string, node p2p.NodeData) {
|
||||||
var tunnelAddresses []string
|
var tunnelAddresses []string
|
||||||
for _, v := range p2p.GetAvailableNodes("") {
|
for _, v := range p2p.GetAvailableNodes("") {
|
||||||
if v.IsOnline() {
|
if v.IsOnline() {
|
||||||
|
@ -32,7 +32,7 @@ type BackendConfig struct {
|
|||||||
Threads *int `yaml:"threads"`
|
Threads *int `yaml:"threads"`
|
||||||
Debug *bool `yaml:"debug"`
|
Debug *bool `yaml:"debug"`
|
||||||
Roles map[string]string `yaml:"roles"`
|
Roles map[string]string `yaml:"roles"`
|
||||||
Embeddings bool `yaml:"embeddings"`
|
Embeddings *bool `yaml:"embeddings"`
|
||||||
Backend string `yaml:"backend"`
|
Backend string `yaml:"backend"`
|
||||||
TemplateConfig TemplateConfig `yaml:"template"`
|
TemplateConfig TemplateConfig `yaml:"template"`
|
||||||
|
|
||||||
@ -338,6 +338,10 @@ func (cfg *BackendConfig) SetDefaults(opts ...ConfigLoaderOption) {
|
|||||||
cfg.LowVRAM = &falseV
|
cfg.LowVRAM = &falseV
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if cfg.Embeddings == nil {
|
||||||
|
cfg.Embeddings = &falseV
|
||||||
|
}
|
||||||
|
|
||||||
// Value passed by the top level are treated as default (no implicit defaults)
|
// Value passed by the top level are treated as default (no implicit defaults)
|
||||||
// defaults are set by the user
|
// defaults are set by the user
|
||||||
if ctx == 0 {
|
if ctx == 0 {
|
||||||
|
@ -20,6 +20,7 @@ const (
|
|||||||
ChatML
|
ChatML
|
||||||
Mistral03
|
Mistral03
|
||||||
Gemma
|
Gemma
|
||||||
|
DeepSeek2
|
||||||
)
|
)
|
||||||
|
|
||||||
type settingsConfig struct {
|
type settingsConfig struct {
|
||||||
@ -37,6 +38,17 @@ var defaultsSettings map[familyType]settingsConfig = map[familyType]settingsConf
|
|||||||
Completion: "{{.Input}}",
|
Completion: "{{.Input}}",
|
||||||
},
|
},
|
||||||
},
|
},
|
||||||
|
DeepSeek2: {
|
||||||
|
StopWords: []string{"<|end▁of▁sentence|>"},
|
||||||
|
TemplateConfig: TemplateConfig{
|
||||||
|
ChatMessage: `{{if eq .RoleName "user" -}}User: {{.Content }}
|
||||||
|
{{ end -}}
|
||||||
|
{{if eq .RoleName "assistant" -}}Assistant: {{.Content}}<|end▁of▁sentence|>{{end}}
|
||||||
|
{{if eq .RoleName "system" -}}{{.Content}}
|
||||||
|
{{end -}}`,
|
||||||
|
Chat: "{{.Input -}}\nAssistant: ",
|
||||||
|
},
|
||||||
|
},
|
||||||
LLaMa3: {
|
LLaMa3: {
|
||||||
StopWords: []string{"<|eot_id|>"},
|
StopWords: []string{"<|eot_id|>"},
|
||||||
TemplateConfig: TemplateConfig{
|
TemplateConfig: TemplateConfig{
|
||||||
@ -208,8 +220,11 @@ func identifyFamily(f *gguf.GGUFFile) familyType {
|
|||||||
qwen2 := arch == "qwen2"
|
qwen2 := arch == "qwen2"
|
||||||
phi3 := arch == "phi-3"
|
phi3 := arch == "phi-3"
|
||||||
gemma := strings.HasPrefix(f.Model().Name, "gemma")
|
gemma := strings.HasPrefix(f.Model().Name, "gemma")
|
||||||
|
deepseek2 := arch == "deepseek2"
|
||||||
|
|
||||||
switch {
|
switch {
|
||||||
|
case deepseek2:
|
||||||
|
return DeepSeek2
|
||||||
case gemma:
|
case gemma:
|
||||||
return Gemma
|
return Gemma
|
||||||
case llama3:
|
case llama3:
|
||||||
|
@ -7,9 +7,10 @@ import (
|
|||||||
"path/filepath"
|
"path/filepath"
|
||||||
"strings"
|
"strings"
|
||||||
|
|
||||||
"github.com/imdario/mergo"
|
"dario.cat/mergo"
|
||||||
"github.com/mudler/LocalAI/core/config"
|
"github.com/mudler/LocalAI/core/config"
|
||||||
"github.com/mudler/LocalAI/pkg/downloader"
|
"github.com/mudler/LocalAI/pkg/downloader"
|
||||||
|
"github.com/mudler/LocalAI/pkg/utils"
|
||||||
"github.com/rs/zerolog/log"
|
"github.com/rs/zerolog/log"
|
||||||
"gopkg.in/yaml.v2"
|
"gopkg.in/yaml.v2"
|
||||||
)
|
)
|
||||||
@ -189,6 +190,12 @@ func DeleteModelFromSystem(basePath string, name string, additionalFiles []strin
|
|||||||
|
|
||||||
galleryFile := filepath.Join(basePath, galleryFileName(name))
|
galleryFile := filepath.Join(basePath, galleryFileName(name))
|
||||||
|
|
||||||
|
for _, f := range []string{configFile, galleryFile} {
|
||||||
|
if err := utils.VerifyPath(f, basePath); err != nil {
|
||||||
|
return fmt.Errorf("failed to verify path %s: %w", f, err)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
var err error
|
var err error
|
||||||
// Delete all the files associated to the model
|
// Delete all the files associated to the model
|
||||||
// read the model config
|
// read the model config
|
||||||
|
@ -6,7 +6,7 @@ import (
|
|||||||
"os"
|
"os"
|
||||||
"path/filepath"
|
"path/filepath"
|
||||||
|
|
||||||
"github.com/imdario/mergo"
|
"dario.cat/mergo"
|
||||||
lconfig "github.com/mudler/LocalAI/core/config"
|
lconfig "github.com/mudler/LocalAI/core/config"
|
||||||
"github.com/mudler/LocalAI/pkg/downloader"
|
"github.com/mudler/LocalAI/pkg/downloader"
|
||||||
"github.com/mudler/LocalAI/pkg/utils"
|
"github.com/mudler/LocalAI/pkg/utils"
|
||||||
|
@ -12,6 +12,11 @@ import (
|
|||||||
"github.com/rs/zerolog/log"
|
"github.com/rs/zerolog/log"
|
||||||
)
|
)
|
||||||
|
|
||||||
|
// JINARerankEndpoint acts like the Jina reranker endpoint (https://jina.ai/reranker/)
|
||||||
|
// @Summary Reranks a list of phrases by relevance to a given text query.
|
||||||
|
// @Param request body schema.JINARerankRequest true "query params"
|
||||||
|
// @Success 200 {object} schema.JINARerankResponse "Response"
|
||||||
|
// @Router /v1/rerank [post]
|
||||||
func JINARerankEndpoint(cl *config.BackendConfigLoader, ml *model.ModelLoader, appConfig *config.ApplicationConfig) func(c *fiber.Ctx) error {
|
func JINARerankEndpoint(cl *config.BackendConfigLoader, ml *model.ModelLoader, appConfig *config.ApplicationConfig) func(c *fiber.Ctx) error {
|
||||||
return func(c *fiber.Ctx) error {
|
return func(c *fiber.Ctx) error {
|
||||||
req := new(schema.JINARerankRequest)
|
req := new(schema.JINARerankRequest)
|
||||||
|
@ -6,6 +6,11 @@ import (
|
|||||||
"github.com/mudler/LocalAI/core/services"
|
"github.com/mudler/LocalAI/core/services"
|
||||||
)
|
)
|
||||||
|
|
||||||
|
// BackendMonitorEndpoint returns the status of the specified backend
|
||||||
|
// @Summary Backend monitor endpoint
|
||||||
|
// @Param request body schema.BackendMonitorRequest true "Backend statistics request"
|
||||||
|
// @Success 200 {object} proto.StatusResponse "Response"
|
||||||
|
// @Router /backend/monitor [get]
|
||||||
func BackendMonitorEndpoint(bm *services.BackendMonitorService) func(c *fiber.Ctx) error {
|
func BackendMonitorEndpoint(bm *services.BackendMonitorService) func(c *fiber.Ctx) error {
|
||||||
return func(c *fiber.Ctx) error {
|
return func(c *fiber.Ctx) error {
|
||||||
|
|
||||||
@ -23,6 +28,10 @@ func BackendMonitorEndpoint(bm *services.BackendMonitorService) func(c *fiber.Ct
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// BackendMonitorEndpoint shuts down the specified backend
|
||||||
|
// @Summary Backend monitor endpoint
|
||||||
|
// @Param request body schema.BackendMonitorRequest true "Backend statistics request"
|
||||||
|
// @Router /backend/shutdown [post]
|
||||||
func BackendShutdownEndpoint(bm *services.BackendMonitorService) func(c *fiber.Ctx) error {
|
func BackendShutdownEndpoint(bm *services.BackendMonitorService) func(c *fiber.Ctx) error {
|
||||||
return func(c *fiber.Ctx) error {
|
return func(c *fiber.Ctx) error {
|
||||||
input := new(schema.BackendMonitorRequest)
|
input := new(schema.BackendMonitorRequest)
|
||||||
|
@ -9,6 +9,7 @@ import (
|
|||||||
"github.com/google/uuid"
|
"github.com/google/uuid"
|
||||||
"github.com/mudler/LocalAI/core/config"
|
"github.com/mudler/LocalAI/core/config"
|
||||||
"github.com/mudler/LocalAI/core/gallery"
|
"github.com/mudler/LocalAI/core/gallery"
|
||||||
|
"github.com/mudler/LocalAI/core/schema"
|
||||||
"github.com/mudler/LocalAI/core/services"
|
"github.com/mudler/LocalAI/core/services"
|
||||||
"github.com/rs/zerolog/log"
|
"github.com/rs/zerolog/log"
|
||||||
)
|
)
|
||||||
@ -33,6 +34,10 @@ func CreateModelGalleryEndpointService(galleries []config.Gallery, modelPath str
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// GetOpStatusEndpoint returns the job status
|
||||||
|
// @Summary Returns the job status
|
||||||
|
// @Success 200 {object} gallery.GalleryOpStatus "Response"
|
||||||
|
// @Router /models/jobs/{uuid} [get]
|
||||||
func (mgs *ModelGalleryEndpointService) GetOpStatusEndpoint() func(c *fiber.Ctx) error {
|
func (mgs *ModelGalleryEndpointService) GetOpStatusEndpoint() func(c *fiber.Ctx) error {
|
||||||
return func(c *fiber.Ctx) error {
|
return func(c *fiber.Ctx) error {
|
||||||
status := mgs.galleryApplier.GetStatus(c.Params("uuid"))
|
status := mgs.galleryApplier.GetStatus(c.Params("uuid"))
|
||||||
@ -43,12 +48,21 @@ func (mgs *ModelGalleryEndpointService) GetOpStatusEndpoint() func(c *fiber.Ctx)
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// GetAllStatusEndpoint returns all the jobs status progress
|
||||||
|
// @Summary Returns all the jobs status progress
|
||||||
|
// @Success 200 {object} map[string]gallery.GalleryOpStatus "Response"
|
||||||
|
// @Router /models/jobs [get]
|
||||||
func (mgs *ModelGalleryEndpointService) GetAllStatusEndpoint() func(c *fiber.Ctx) error {
|
func (mgs *ModelGalleryEndpointService) GetAllStatusEndpoint() func(c *fiber.Ctx) error {
|
||||||
return func(c *fiber.Ctx) error {
|
return func(c *fiber.Ctx) error {
|
||||||
return c.JSON(mgs.galleryApplier.GetAllStatus())
|
return c.JSON(mgs.galleryApplier.GetAllStatus())
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// ApplyModelGalleryEndpoint installs a new model to a LocalAI instance from the model gallery
|
||||||
|
// @Summary Install models to LocalAI.
|
||||||
|
// @Param request body GalleryModel true "query params"
|
||||||
|
// @Success 200 {object} schema.GalleryResponse "Response"
|
||||||
|
// @Router /models/apply [post]
|
||||||
func (mgs *ModelGalleryEndpointService) ApplyModelGalleryEndpoint() func(c *fiber.Ctx) error {
|
func (mgs *ModelGalleryEndpointService) ApplyModelGalleryEndpoint() func(c *fiber.Ctx) error {
|
||||||
return func(c *fiber.Ctx) error {
|
return func(c *fiber.Ctx) error {
|
||||||
input := new(GalleryModel)
|
input := new(GalleryModel)
|
||||||
@ -68,13 +82,15 @@ func (mgs *ModelGalleryEndpointService) ApplyModelGalleryEndpoint() func(c *fibe
|
|||||||
Galleries: mgs.galleries,
|
Galleries: mgs.galleries,
|
||||||
ConfigURL: input.ConfigURL,
|
ConfigURL: input.ConfigURL,
|
||||||
}
|
}
|
||||||
return c.JSON(struct {
|
return c.JSON(schema.GalleryResponse{ID: uuid.String(), StatusURL: c.BaseURL() + "/models/jobs/" + uuid.String()})
|
||||||
ID string `json:"uuid"`
|
|
||||||
StatusURL string `json:"status"`
|
|
||||||
}{ID: uuid.String(), StatusURL: c.BaseURL() + "/models/jobs/" + uuid.String()})
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// DeleteModelGalleryEndpoint lets delete models from a LocalAI instance
|
||||||
|
// @Summary delete models to LocalAI.
|
||||||
|
// @Param name path string true "Model name"
|
||||||
|
// @Success 200 {object} schema.GalleryResponse "Response"
|
||||||
|
// @Router /models/delete/{name} [post]
|
||||||
func (mgs *ModelGalleryEndpointService) DeleteModelGalleryEndpoint() func(c *fiber.Ctx) error {
|
func (mgs *ModelGalleryEndpointService) DeleteModelGalleryEndpoint() func(c *fiber.Ctx) error {
|
||||||
return func(c *fiber.Ctx) error {
|
return func(c *fiber.Ctx) error {
|
||||||
modelName := c.Params("name")
|
modelName := c.Params("name")
|
||||||
@ -89,13 +105,14 @@ func (mgs *ModelGalleryEndpointService) DeleteModelGalleryEndpoint() func(c *fib
|
|||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
|
|
||||||
return c.JSON(struct {
|
return c.JSON(schema.GalleryResponse{ID: uuid.String(), StatusURL: c.BaseURL() + "/models/jobs/" + uuid.String()})
|
||||||
ID string `json:"uuid"`
|
|
||||||
StatusURL string `json:"status"`
|
|
||||||
}{ID: uuid.String(), StatusURL: c.BaseURL() + "/models/jobs/" + uuid.String()})
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// ListModelFromGalleryEndpoint list the available models for installation from the active galleries
|
||||||
|
// @Summary List installable models.
|
||||||
|
// @Success 200 {object} []gallery.GalleryModel "Response"
|
||||||
|
// @Router /models/available [get]
|
||||||
func (mgs *ModelGalleryEndpointService) ListModelFromGalleryEndpoint() func(c *fiber.Ctx) error {
|
func (mgs *ModelGalleryEndpointService) ListModelFromGalleryEndpoint() func(c *fiber.Ctx) error {
|
||||||
return func(c *fiber.Ctx) error {
|
return func(c *fiber.Ctx) error {
|
||||||
log.Debug().Msgf("Listing models from galleries: %+v", mgs.galleries)
|
log.Debug().Msgf("Listing models from galleries: %+v", mgs.galleries)
|
||||||
@ -116,6 +133,10 @@ func (mgs *ModelGalleryEndpointService) ListModelFromGalleryEndpoint() func(c *f
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// ListModelGalleriesEndpoint list the available galleries configured in LocalAI
|
||||||
|
// @Summary List all Galleries
|
||||||
|
// @Success 200 {object} []config.Gallery "Response"
|
||||||
|
// @Router /models/galleries [get]
|
||||||
// NOTE: This is different (and much simpler!) than above! This JUST lists the model galleries that have been loaded, not their contents!
|
// NOTE: This is different (and much simpler!) than above! This JUST lists the model galleries that have been loaded, not their contents!
|
||||||
func (mgs *ModelGalleryEndpointService) ListModelGalleriesEndpoint() func(c *fiber.Ctx) error {
|
func (mgs *ModelGalleryEndpointService) ListModelGalleriesEndpoint() func(c *fiber.Ctx) error {
|
||||||
return func(c *fiber.Ctx) error {
|
return func(c *fiber.Ctx) error {
|
||||||
@ -128,6 +149,11 @@ func (mgs *ModelGalleryEndpointService) ListModelGalleriesEndpoint() func(c *fib
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// AddModelGalleryEndpoint adds a gallery in LocalAI
|
||||||
|
// @Summary Adds a gallery in LocalAI
|
||||||
|
// @Param request body config.Gallery true "Gallery details"
|
||||||
|
// @Success 200 {object} []config.Gallery "Response"
|
||||||
|
// @Router /models/galleries [post]
|
||||||
func (mgs *ModelGalleryEndpointService) AddModelGalleryEndpoint() func(c *fiber.Ctx) error {
|
func (mgs *ModelGalleryEndpointService) AddModelGalleryEndpoint() func(c *fiber.Ctx) error {
|
||||||
return func(c *fiber.Ctx) error {
|
return func(c *fiber.Ctx) error {
|
||||||
input := new(config.Gallery)
|
input := new(config.Gallery)
|
||||||
@ -150,6 +176,11 @@ func (mgs *ModelGalleryEndpointService) AddModelGalleryEndpoint() func(c *fiber.
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// RemoveModelGalleryEndpoint remove a gallery in LocalAI
|
||||||
|
// @Summary removes a gallery from LocalAI
|
||||||
|
// @Param request body config.Gallery true "Gallery details"
|
||||||
|
// @Success 200 {object} []config.Gallery "Response"
|
||||||
|
// @Router /models/galleries [delete]
|
||||||
func (mgs *ModelGalleryEndpointService) RemoveModelGalleryEndpoint() func(c *fiber.Ctx) error {
|
func (mgs *ModelGalleryEndpointService) RemoveModelGalleryEndpoint() func(c *fiber.Ctx) error {
|
||||||
return func(c *fiber.Ctx) error {
|
return func(c *fiber.Ctx) error {
|
||||||
input := new(config.Gallery)
|
input := new(config.Gallery)
|
||||||
@ -165,6 +196,10 @@ func (mgs *ModelGalleryEndpointService) RemoveModelGalleryEndpoint() func(c *fib
|
|||||||
mgs.galleries = slices.DeleteFunc(mgs.galleries, func(gallery config.Gallery) bool {
|
mgs.galleries = slices.DeleteFunc(mgs.galleries, func(gallery config.Gallery) bool {
|
||||||
return gallery.Name == input.Name
|
return gallery.Name == input.Name
|
||||||
})
|
})
|
||||||
return c.Send(nil)
|
dat, err := json.Marshal(mgs.galleries)
|
||||||
|
if err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
return c.Send(dat)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -9,8 +9,11 @@ import (
|
|||||||
"github.com/prometheus/client_golang/prometheus/promhttp"
|
"github.com/prometheus/client_golang/prometheus/promhttp"
|
||||||
)
|
)
|
||||||
|
|
||||||
|
// LocalAIMetricsEndpoint returns the metrics endpoint for LocalAI
|
||||||
|
// @Summary Prometheus metrics endpoint
|
||||||
|
// @Param request body config.Gallery true "Gallery details"
|
||||||
|
// @Router /metrics [get]
|
||||||
func LocalAIMetricsEndpoint() fiber.Handler {
|
func LocalAIMetricsEndpoint() fiber.Handler {
|
||||||
|
|
||||||
return adaptor.HTTPHandler(promhttp.Handler())
|
return adaptor.HTTPHandler(promhttp.Handler())
|
||||||
}
|
}
|
||||||
|
|
||||||
|
28
core/http/endpoints/localai/p2p.go
Normal file
28
core/http/endpoints/localai/p2p.go
Normal file
@ -0,0 +1,28 @@
|
|||||||
|
package localai
|
||||||
|
|
||||||
|
import (
|
||||||
|
"github.com/gofiber/fiber/v2"
|
||||||
|
"github.com/mudler/LocalAI/core/config"
|
||||||
|
"github.com/mudler/LocalAI/core/p2p"
|
||||||
|
"github.com/mudler/LocalAI/core/schema"
|
||||||
|
)
|
||||||
|
|
||||||
|
// ShowP2PNodes returns the P2P Nodes
|
||||||
|
// @Summary Returns available P2P nodes
|
||||||
|
// @Success 200 {object} []schema.P2PNodesResponse "Response"
|
||||||
|
// @Router /api/p2p [get]
|
||||||
|
func ShowP2PNodes(c *fiber.Ctx) error {
|
||||||
|
// Render index
|
||||||
|
return c.JSON(schema.P2PNodesResponse{
|
||||||
|
Nodes: p2p.GetAvailableNodes(""),
|
||||||
|
FederatedNodes: p2p.GetAvailableNodes(p2p.FederatedID),
|
||||||
|
})
|
||||||
|
}
|
||||||
|
|
||||||
|
// ShowP2PToken returns the P2P token
|
||||||
|
// @Summary Show the P2P token
|
||||||
|
// @Success 200 {string} string "Response"
|
||||||
|
// @Router /api/p2p/token [get]
|
||||||
|
func ShowP2PToken(appConfig *config.ApplicationConfig) func(*fiber.Ctx) error {
|
||||||
|
return func(c *fiber.Ctx) error { return c.Send([]byte(appConfig.P2PToken)) }
|
||||||
|
}
|
@ -11,6 +11,7 @@ import (
|
|||||||
|
|
||||||
"github.com/gofiber/fiber/v2"
|
"github.com/gofiber/fiber/v2"
|
||||||
"github.com/mudler/LocalAI/core/config"
|
"github.com/mudler/LocalAI/core/config"
|
||||||
|
"github.com/mudler/LocalAI/core/schema"
|
||||||
"github.com/mudler/LocalAI/core/services"
|
"github.com/mudler/LocalAI/core/services"
|
||||||
model "github.com/mudler/LocalAI/pkg/model"
|
model "github.com/mudler/LocalAI/pkg/model"
|
||||||
"github.com/mudler/LocalAI/pkg/utils"
|
"github.com/mudler/LocalAI/pkg/utils"
|
||||||
@ -125,6 +126,14 @@ func generateRandomID() int64 {
|
|||||||
return currentId
|
return currentId
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// ListAssistantsEndpoint is the OpenAI Assistant API endpoint to list assistents https://platform.openai.com/docs/api-reference/assistants/listAssistants
|
||||||
|
// @Summary List available assistents
|
||||||
|
// @Param limit query int false "Limit the number of assistants returned"
|
||||||
|
// @Param order query string false "Order of assistants returned"
|
||||||
|
// @Param after query string false "Return assistants created after the given ID"
|
||||||
|
// @Param before query string false "Return assistants created before the given ID"
|
||||||
|
// @Success 200 {object} []Assistant "Response"
|
||||||
|
// @Router /v1/assistants [get]
|
||||||
func ListAssistantsEndpoint(cl *config.BackendConfigLoader, ml *model.ModelLoader, appConfig *config.ApplicationConfig) func(c *fiber.Ctx) error {
|
func ListAssistantsEndpoint(cl *config.BackendConfigLoader, ml *model.ModelLoader, appConfig *config.ApplicationConfig) func(c *fiber.Ctx) error {
|
||||||
return func(c *fiber.Ctx) error {
|
return func(c *fiber.Ctx) error {
|
||||||
// Because we're altering the existing assistants list we should just duplicate it for now.
|
// Because we're altering the existing assistants list we should just duplicate it for now.
|
||||||
@ -230,13 +239,11 @@ func modelExists(cl *config.BackendConfigLoader, ml *model.ModelLoader, modelNam
|
|||||||
return
|
return
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// DeleteAssistantEndpoint is the OpenAI Assistant API endpoint to delete assistents https://platform.openai.com/docs/api-reference/assistants/deleteAssistant
|
||||||
|
// @Summary Delete assistents
|
||||||
|
// @Success 200 {object} schema.DeleteAssistantResponse "Response"
|
||||||
|
// @Router /v1/assistants/{assistant_id} [delete]
|
||||||
func DeleteAssistantEndpoint(cl *config.BackendConfigLoader, ml *model.ModelLoader, appConfig *config.ApplicationConfig) func(c *fiber.Ctx) error {
|
func DeleteAssistantEndpoint(cl *config.BackendConfigLoader, ml *model.ModelLoader, appConfig *config.ApplicationConfig) func(c *fiber.Ctx) error {
|
||||||
type DeleteAssistantResponse struct {
|
|
||||||
ID string `json:"id"`
|
|
||||||
Object string `json:"object"`
|
|
||||||
Deleted bool `json:"deleted"`
|
|
||||||
}
|
|
||||||
|
|
||||||
return func(c *fiber.Ctx) error {
|
return func(c *fiber.Ctx) error {
|
||||||
assistantID := c.Params("assistant_id")
|
assistantID := c.Params("assistant_id")
|
||||||
if assistantID == "" {
|
if assistantID == "" {
|
||||||
@ -247,7 +254,7 @@ func DeleteAssistantEndpoint(cl *config.BackendConfigLoader, ml *model.ModelLoad
|
|||||||
if assistant.ID == assistantID {
|
if assistant.ID == assistantID {
|
||||||
Assistants = append(Assistants[:i], Assistants[i+1:]...)
|
Assistants = append(Assistants[:i], Assistants[i+1:]...)
|
||||||
utils.SaveConfig(appConfig.ConfigsDir, AssistantsConfigFile, Assistants)
|
utils.SaveConfig(appConfig.ConfigsDir, AssistantsConfigFile, Assistants)
|
||||||
return c.Status(fiber.StatusOK).JSON(DeleteAssistantResponse{
|
return c.Status(fiber.StatusOK).JSON(schema.DeleteAssistantResponse{
|
||||||
ID: assistantID,
|
ID: assistantID,
|
||||||
Object: "assistant.deleted",
|
Object: "assistant.deleted",
|
||||||
Deleted: true,
|
Deleted: true,
|
||||||
@ -256,7 +263,7 @@ func DeleteAssistantEndpoint(cl *config.BackendConfigLoader, ml *model.ModelLoad
|
|||||||
}
|
}
|
||||||
|
|
||||||
log.Warn().Msgf("Unable to find assistant %s for deletion", assistantID)
|
log.Warn().Msgf("Unable to find assistant %s for deletion", assistantID)
|
||||||
return c.Status(fiber.StatusNotFound).JSON(DeleteAssistantResponse{
|
return c.Status(fiber.StatusNotFound).JSON(schema.DeleteAssistantResponse{
|
||||||
ID: assistantID,
|
ID: assistantID,
|
||||||
Object: "assistant.deleted",
|
Object: "assistant.deleted",
|
||||||
Deleted: false,
|
Deleted: false,
|
||||||
@ -264,6 +271,10 @@ func DeleteAssistantEndpoint(cl *config.BackendConfigLoader, ml *model.ModelLoad
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// GetAssistantEndpoint is the OpenAI Assistant API endpoint to get assistents https://platform.openai.com/docs/api-reference/assistants/getAssistant
|
||||||
|
// @Summary Get assistent data
|
||||||
|
// @Success 200 {object} Assistant "Response"
|
||||||
|
// @Router /v1/assistants/{assistant_id} [get]
|
||||||
func GetAssistantEndpoint(cl *config.BackendConfigLoader, ml *model.ModelLoader, appConfig *config.ApplicationConfig) func(c *fiber.Ctx) error {
|
func GetAssistantEndpoint(cl *config.BackendConfigLoader, ml *model.ModelLoader, appConfig *config.ApplicationConfig) func(c *fiber.Ctx) error {
|
||||||
return func(c *fiber.Ctx) error {
|
return func(c *fiber.Ctx) error {
|
||||||
assistantID := c.Params("assistant_id")
|
assistantID := c.Params("assistant_id")
|
||||||
@ -293,19 +304,9 @@ var (
|
|||||||
AssistantsFileConfigFile = "assistantsFile.json"
|
AssistantsFileConfigFile = "assistantsFile.json"
|
||||||
)
|
)
|
||||||
|
|
||||||
type AssistantFileRequest struct {
|
|
||||||
FileID string `json:"file_id"`
|
|
||||||
}
|
|
||||||
|
|
||||||
type DeleteAssistantFileResponse struct {
|
|
||||||
ID string `json:"id"`
|
|
||||||
Object string `json:"object"`
|
|
||||||
Deleted bool `json:"deleted"`
|
|
||||||
}
|
|
||||||
|
|
||||||
func CreateAssistantFileEndpoint(cl *config.BackendConfigLoader, ml *model.ModelLoader, appConfig *config.ApplicationConfig) func(c *fiber.Ctx) error {
|
func CreateAssistantFileEndpoint(cl *config.BackendConfigLoader, ml *model.ModelLoader, appConfig *config.ApplicationConfig) func(c *fiber.Ctx) error {
|
||||||
return func(c *fiber.Ctx) error {
|
return func(c *fiber.Ctx) error {
|
||||||
request := new(AssistantFileRequest)
|
request := new(schema.AssistantFileRequest)
|
||||||
if err := c.BodyParser(request); err != nil {
|
if err := c.BodyParser(request); err != nil {
|
||||||
return c.Status(fiber.StatusBadRequest).JSON(fiber.Map{"error": "Cannot parse JSON"})
|
return c.Status(fiber.StatusBadRequest).JSON(fiber.Map{"error": "Cannot parse JSON"})
|
||||||
}
|
}
|
||||||
@ -346,7 +347,7 @@ func CreateAssistantFileEndpoint(cl *config.BackendConfigLoader, ml *model.Model
|
|||||||
|
|
||||||
func ListAssistantFilesEndpoint(cl *config.BackendConfigLoader, ml *model.ModelLoader, appConfig *config.ApplicationConfig) func(c *fiber.Ctx) error {
|
func ListAssistantFilesEndpoint(cl *config.BackendConfigLoader, ml *model.ModelLoader, appConfig *config.ApplicationConfig) func(c *fiber.Ctx) error {
|
||||||
type ListAssistantFiles struct {
|
type ListAssistantFiles struct {
|
||||||
Data []File
|
Data []schema.File
|
||||||
Object string
|
Object string
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -464,7 +465,7 @@ func DeleteAssistantFileEndpoint(cl *config.BackendConfigLoader, ml *model.Model
|
|||||||
// Remove the file from the assistantFiles slice
|
// Remove the file from the assistantFiles slice
|
||||||
AssistantFiles = append(AssistantFiles[:i], AssistantFiles[i+1:]...)
|
AssistantFiles = append(AssistantFiles[:i], AssistantFiles[i+1:]...)
|
||||||
utils.SaveConfig(appConfig.ConfigsDir, AssistantsFileConfigFile, AssistantFiles)
|
utils.SaveConfig(appConfig.ConfigsDir, AssistantsFileConfigFile, AssistantFiles)
|
||||||
return c.Status(fiber.StatusOK).JSON(DeleteAssistantFileResponse{
|
return c.Status(fiber.StatusOK).JSON(schema.DeleteAssistantFileResponse{
|
||||||
ID: fileId,
|
ID: fileId,
|
||||||
Object: "assistant.file.deleted",
|
Object: "assistant.file.deleted",
|
||||||
Deleted: true,
|
Deleted: true,
|
||||||
@ -480,7 +481,7 @@ func DeleteAssistantFileEndpoint(cl *config.BackendConfigLoader, ml *model.Model
|
|||||||
AssistantFiles = append(AssistantFiles[:i], AssistantFiles[i+1:]...)
|
AssistantFiles = append(AssistantFiles[:i], AssistantFiles[i+1:]...)
|
||||||
utils.SaveConfig(appConfig.ConfigsDir, AssistantsFileConfigFile, AssistantFiles)
|
utils.SaveConfig(appConfig.ConfigsDir, AssistantsFileConfigFile, AssistantFiles)
|
||||||
|
|
||||||
return c.Status(fiber.StatusNotFound).JSON(DeleteAssistantFileResponse{
|
return c.Status(fiber.StatusNotFound).JSON(schema.DeleteAssistantFileResponse{
|
||||||
ID: fileId,
|
ID: fileId,
|
||||||
Object: "assistant.file.deleted",
|
Object: "assistant.file.deleted",
|
||||||
Deleted: true,
|
Deleted: true,
|
||||||
@ -491,7 +492,7 @@ func DeleteAssistantFileEndpoint(cl *config.BackendConfigLoader, ml *model.Model
|
|||||||
}
|
}
|
||||||
log.Warn().Msgf("Unable to find assistant: %s", assistantID)
|
log.Warn().Msgf("Unable to find assistant: %s", assistantID)
|
||||||
|
|
||||||
return c.Status(fiber.StatusNotFound).JSON(DeleteAssistantFileResponse{
|
return c.Status(fiber.StatusNotFound).JSON(schema.DeleteAssistantFileResponse{
|
||||||
ID: fileId,
|
ID: fileId,
|
||||||
Object: "assistant.file.deleted",
|
Object: "assistant.file.deleted",
|
||||||
Deleted: false,
|
Deleted: false,
|
||||||
|
@ -14,6 +14,7 @@ import (
|
|||||||
|
|
||||||
"github.com/gofiber/fiber/v2"
|
"github.com/gofiber/fiber/v2"
|
||||||
"github.com/mudler/LocalAI/core/config"
|
"github.com/mudler/LocalAI/core/config"
|
||||||
|
"github.com/mudler/LocalAI/core/schema"
|
||||||
"github.com/mudler/LocalAI/pkg/model"
|
"github.com/mudler/LocalAI/pkg/model"
|
||||||
"github.com/stretchr/testify/assert"
|
"github.com/stretchr/testify/assert"
|
||||||
)
|
)
|
||||||
@ -26,7 +27,7 @@ type MockLoader struct {
|
|||||||
|
|
||||||
func tearDown() func() {
|
func tearDown() func() {
|
||||||
return func() {
|
return func() {
|
||||||
UploadedFiles = []File{}
|
UploadedFiles = []schema.File{}
|
||||||
Assistants = []Assistant{}
|
Assistants = []Assistant{}
|
||||||
AssistantFiles = []AssistantFile{}
|
AssistantFiles = []AssistantFile{}
|
||||||
_ = os.Remove(filepath.Join(configsDir, AssistantsConfigFile))
|
_ = os.Remove(filepath.Join(configsDir, AssistantsConfigFile))
|
||||||
@ -294,7 +295,7 @@ func TestAssistantEndpoints(t *testing.T) {
|
|||||||
file, assistant, err := createFileAndAssistant(t, app, appConfig)
|
file, assistant, err := createFileAndAssistant(t, app, appConfig)
|
||||||
assert.NoError(t, err)
|
assert.NoError(t, err)
|
||||||
|
|
||||||
afr := AssistantFileRequest{FileID: file.ID}
|
afr := schema.AssistantFileRequest{FileID: file.ID}
|
||||||
af, _, err := createAssistantFile(app, afr, assistant.ID)
|
af, _, err := createAssistantFile(app, afr, assistant.ID)
|
||||||
|
|
||||||
assert.NoError(t, err)
|
assert.NoError(t, err)
|
||||||
@ -305,7 +306,7 @@ func TestAssistantEndpoints(t *testing.T) {
|
|||||||
file, assistant, err := createFileAndAssistant(t, app, appConfig)
|
file, assistant, err := createFileAndAssistant(t, app, appConfig)
|
||||||
assert.NoError(t, err)
|
assert.NoError(t, err)
|
||||||
|
|
||||||
afr := AssistantFileRequest{FileID: file.ID}
|
afr := schema.AssistantFileRequest{FileID: file.ID}
|
||||||
af, _, err := createAssistantFile(app, afr, assistant.ID)
|
af, _, err := createAssistantFile(app, afr, assistant.ID)
|
||||||
assert.NoError(t, err)
|
assert.NoError(t, err)
|
||||||
|
|
||||||
@ -316,7 +317,7 @@ func TestAssistantEndpoints(t *testing.T) {
|
|||||||
file, assistant, err := createFileAndAssistant(t, app, appConfig)
|
file, assistant, err := createFileAndAssistant(t, app, appConfig)
|
||||||
assert.NoError(t, err)
|
assert.NoError(t, err)
|
||||||
|
|
||||||
afr := AssistantFileRequest{FileID: file.ID}
|
afr := schema.AssistantFileRequest{FileID: file.ID}
|
||||||
af, _, err := createAssistantFile(app, afr, assistant.ID)
|
af, _, err := createAssistantFile(app, afr, assistant.ID)
|
||||||
assert.NoError(t, err)
|
assert.NoError(t, err)
|
||||||
t.Cleanup(cleanupAssistantFile(t, app, af.ID, af.AssistantID))
|
t.Cleanup(cleanupAssistantFile(t, app, af.ID, af.AssistantID))
|
||||||
@ -338,7 +339,7 @@ func TestAssistantEndpoints(t *testing.T) {
|
|||||||
file, assistant, err := createFileAndAssistant(t, app, appConfig)
|
file, assistant, err := createFileAndAssistant(t, app, appConfig)
|
||||||
assert.NoError(t, err)
|
assert.NoError(t, err)
|
||||||
|
|
||||||
afr := AssistantFileRequest{FileID: file.ID}
|
afr := schema.AssistantFileRequest{FileID: file.ID}
|
||||||
af, _, err := createAssistantFile(app, afr, assistant.ID)
|
af, _, err := createAssistantFile(app, afr, assistant.ID)
|
||||||
assert.NoError(t, err)
|
assert.NoError(t, err)
|
||||||
|
|
||||||
@ -349,7 +350,7 @@ func TestAssistantEndpoints(t *testing.T) {
|
|||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
func createFileAndAssistant(t *testing.T, app *fiber.App, o *config.ApplicationConfig) (File, Assistant, error) {
|
func createFileAndAssistant(t *testing.T, app *fiber.App, o *config.ApplicationConfig) (schema.File, Assistant, error) {
|
||||||
ar := &AssistantRequest{
|
ar := &AssistantRequest{
|
||||||
Model: "ggml-gpt4all-j",
|
Model: "ggml-gpt4all-j",
|
||||||
Name: "3.5-turbo",
|
Name: "3.5-turbo",
|
||||||
@ -362,7 +363,7 @@ func createFileAndAssistant(t *testing.T, app *fiber.App, o *config.ApplicationC
|
|||||||
|
|
||||||
assistant, _, err := createAssistant(app, *ar)
|
assistant, _, err := createAssistant(app, *ar)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return File{}, Assistant{}, err
|
return schema.File{}, Assistant{}, err
|
||||||
}
|
}
|
||||||
t.Cleanup(cleanupAllAssistants(t, app, []string{assistant.ID}))
|
t.Cleanup(cleanupAllAssistants(t, app, []string{assistant.ID}))
|
||||||
|
|
||||||
@ -374,7 +375,7 @@ func createFileAndAssistant(t *testing.T, app *fiber.App, o *config.ApplicationC
|
|||||||
return file, assistant, nil
|
return file, assistant, nil
|
||||||
}
|
}
|
||||||
|
|
||||||
func createAssistantFile(app *fiber.App, afr AssistantFileRequest, assistantId string) (AssistantFile, *http.Response, error) {
|
func createAssistantFile(app *fiber.App, afr schema.AssistantFileRequest, assistantId string) (AssistantFile, *http.Response, error) {
|
||||||
afrJson, err := json.Marshal(afr)
|
afrJson, err := json.Marshal(afr)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return AssistantFile{}, nil, err
|
return AssistantFile{}, nil, err
|
||||||
@ -451,7 +452,7 @@ func cleanupAssistantFile(t *testing.T, app *fiber.App, fileId, assistantId stri
|
|||||||
resp, err := app.Test(request)
|
resp, err := app.Test(request)
|
||||||
assert.NoError(t, err)
|
assert.NoError(t, err)
|
||||||
|
|
||||||
var dafr DeleteAssistantFileResponse
|
var dafr schema.DeleteAssistantFileResponse
|
||||||
err = json.NewDecoder(resp.Body).Decode(&dafr)
|
err = json.NewDecoder(resp.Body).Decode(&dafr)
|
||||||
assert.NoError(t, err)
|
assert.NoError(t, err)
|
||||||
assert.True(t, dafr.Deleted)
|
assert.True(t, dafr.Deleted)
|
||||||
|
@ -225,18 +225,10 @@ func ChatEndpoint(cl *config.BackendConfigLoader, ml *model.ModelLoader, startup
|
|||||||
}
|
}
|
||||||
|
|
||||||
// Update input grammar
|
// Update input grammar
|
||||||
// Handle if we should return "name" instead of "functions"
|
jsStruct := funcs.ToJSONStructure(config.FunctionsConfig.FunctionNameKey, config.FunctionsConfig.FunctionNameKey)
|
||||||
if config.FunctionsConfig.FunctionName {
|
config.Grammar = jsStruct.Grammar(config.FunctionsConfig.GrammarConfig.Options()...)
|
||||||
jsStruct := funcs.ToJSONNameStructure()
|
|
||||||
config.Grammar = jsStruct.Grammar(config.FunctionsConfig.GrammarConfig.Options()...)
|
|
||||||
} else {
|
|
||||||
jsStruct := funcs.ToJSONFunctionStructure()
|
|
||||||
config.Grammar = jsStruct.Grammar(config.FunctionsConfig.GrammarConfig.Options()...)
|
|
||||||
}
|
|
||||||
case input.JSONFunctionGrammarObject != nil:
|
case input.JSONFunctionGrammarObject != nil:
|
||||||
config.Grammar = input.JSONFunctionGrammarObject.Grammar(config.FunctionsConfig.GrammarConfig.Options()...)
|
config.Grammar = input.JSONFunctionGrammarObject.Grammar(config.FunctionsConfig.GrammarConfig.Options()...)
|
||||||
case input.JSONFunctionGrammarObjectName != nil:
|
|
||||||
config.Grammar = input.JSONFunctionGrammarObjectName.Grammar(config.FunctionsConfig.GrammarConfig.Options()...)
|
|
||||||
default:
|
default:
|
||||||
// Force picking one of the functions by the request
|
// Force picking one of the functions by the request
|
||||||
if config.FunctionToCall() != "" {
|
if config.FunctionToCall() != "" {
|
||||||
|
@ -16,6 +16,11 @@ import (
|
|||||||
"github.com/rs/zerolog/log"
|
"github.com/rs/zerolog/log"
|
||||||
)
|
)
|
||||||
|
|
||||||
|
// EditEndpoint is the OpenAI edit API endpoint
|
||||||
|
// @Summary OpenAI edit endpoint
|
||||||
|
// @Param request body schema.OpenAIRequest true "query params"
|
||||||
|
// @Success 200 {object} schema.OpenAIResponse "Response"
|
||||||
|
// @Router /v1/edits [post]
|
||||||
func EditEndpoint(cl *config.BackendConfigLoader, ml *model.ModelLoader, appConfig *config.ApplicationConfig) func(c *fiber.Ctx) error {
|
func EditEndpoint(cl *config.BackendConfigLoader, ml *model.ModelLoader, appConfig *config.ApplicationConfig) func(c *fiber.Ctx) error {
|
||||||
return func(c *fiber.Ctx) error {
|
return func(c *fiber.Ctx) error {
|
||||||
modelFile, input, err := readRequest(c, cl, ml, appConfig, true)
|
modelFile, input, err := readRequest(c, cl, ml, appConfig, true)
|
||||||
|
@ -9,25 +9,16 @@ import (
|
|||||||
"time"
|
"time"
|
||||||
|
|
||||||
"github.com/mudler/LocalAI/core/config"
|
"github.com/mudler/LocalAI/core/config"
|
||||||
|
"github.com/mudler/LocalAI/core/schema"
|
||||||
|
|
||||||
"github.com/gofiber/fiber/v2"
|
"github.com/gofiber/fiber/v2"
|
||||||
"github.com/mudler/LocalAI/pkg/utils"
|
"github.com/mudler/LocalAI/pkg/utils"
|
||||||
)
|
)
|
||||||
|
|
||||||
var UploadedFiles []File
|
var UploadedFiles []schema.File
|
||||||
|
|
||||||
const UploadedFilesFile = "uploadedFiles.json"
|
const UploadedFilesFile = "uploadedFiles.json"
|
||||||
|
|
||||||
// File represents the structure of a file object from the OpenAI API.
|
|
||||||
type File struct {
|
|
||||||
ID string `json:"id"` // Unique identifier for the file
|
|
||||||
Object string `json:"object"` // Type of the object (e.g., "file")
|
|
||||||
Bytes int `json:"bytes"` // Size of the file in bytes
|
|
||||||
CreatedAt time.Time `json:"created_at"` // The time at which the file was created
|
|
||||||
Filename string `json:"filename"` // The name of the file
|
|
||||||
Purpose string `json:"purpose"` // The purpose of the file (e.g., "fine-tune", "classifications", etc.)
|
|
||||||
}
|
|
||||||
|
|
||||||
// UploadFilesEndpoint https://platform.openai.com/docs/api-reference/files/create
|
// UploadFilesEndpoint https://platform.openai.com/docs/api-reference/files/create
|
||||||
func UploadFilesEndpoint(cm *config.BackendConfigLoader, appConfig *config.ApplicationConfig) func(c *fiber.Ctx) error {
|
func UploadFilesEndpoint(cm *config.BackendConfigLoader, appConfig *config.ApplicationConfig) func(c *fiber.Ctx) error {
|
||||||
return func(c *fiber.Ctx) error {
|
return func(c *fiber.Ctx) error {
|
||||||
@ -61,7 +52,7 @@ func UploadFilesEndpoint(cm *config.BackendConfigLoader, appConfig *config.Appli
|
|||||||
return c.Status(fiber.StatusInternalServerError).SendString("Failed to save file: " + err.Error())
|
return c.Status(fiber.StatusInternalServerError).SendString("Failed to save file: " + err.Error())
|
||||||
}
|
}
|
||||||
|
|
||||||
f := File{
|
f := schema.File{
|
||||||
ID: fmt.Sprintf("file-%d", getNextFileId()),
|
ID: fmt.Sprintf("file-%d", getNextFileId()),
|
||||||
Object: "file",
|
Object: "file",
|
||||||
Bytes: int(file.Size),
|
Bytes: int(file.Size),
|
||||||
@ -84,14 +75,13 @@ func getNextFileId() int64 {
|
|||||||
}
|
}
|
||||||
|
|
||||||
// ListFilesEndpoint https://platform.openai.com/docs/api-reference/files/list
|
// ListFilesEndpoint https://platform.openai.com/docs/api-reference/files/list
|
||||||
|
// @Summary List files.
|
||||||
|
// @Success 200 {object} schema.ListFiles "Response"
|
||||||
|
// @Router /v1/files [get]
|
||||||
func ListFilesEndpoint(cm *config.BackendConfigLoader, appConfig *config.ApplicationConfig) func(c *fiber.Ctx) error {
|
func ListFilesEndpoint(cm *config.BackendConfigLoader, appConfig *config.ApplicationConfig) func(c *fiber.Ctx) error {
|
||||||
type ListFiles struct {
|
|
||||||
Data []File
|
|
||||||
Object string
|
|
||||||
}
|
|
||||||
|
|
||||||
return func(c *fiber.Ctx) error {
|
return func(c *fiber.Ctx) error {
|
||||||
var listFiles ListFiles
|
var listFiles schema.ListFiles
|
||||||
|
|
||||||
purpose := c.Query("purpose")
|
purpose := c.Query("purpose")
|
||||||
if purpose == "" {
|
if purpose == "" {
|
||||||
@ -108,7 +98,7 @@ func ListFilesEndpoint(cm *config.BackendConfigLoader, appConfig *config.Applica
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
func getFileFromRequest(c *fiber.Ctx) (*File, error) {
|
func getFileFromRequest(c *fiber.Ctx) (*schema.File, error) {
|
||||||
id := c.Params("file_id")
|
id := c.Params("file_id")
|
||||||
if id == "" {
|
if id == "" {
|
||||||
return nil, fmt.Errorf("file_id parameter is required")
|
return nil, fmt.Errorf("file_id parameter is required")
|
||||||
@ -125,7 +115,7 @@ func getFileFromRequest(c *fiber.Ctx) (*File, error) {
|
|||||||
|
|
||||||
// GetFilesEndpoint is the OpenAI API endpoint to get files https://platform.openai.com/docs/api-reference/files/retrieve
|
// GetFilesEndpoint is the OpenAI API endpoint to get files https://platform.openai.com/docs/api-reference/files/retrieve
|
||||||
// @Summary Returns information about a specific file.
|
// @Summary Returns information about a specific file.
|
||||||
// @Success 200 {object} File "Response"
|
// @Success 200 {object} schema.File "Response"
|
||||||
// @Router /v1/files/{file_id} [get]
|
// @Router /v1/files/{file_id} [get]
|
||||||
func GetFilesEndpoint(cm *config.BackendConfigLoader, appConfig *config.ApplicationConfig) func(c *fiber.Ctx) error {
|
func GetFilesEndpoint(cm *config.BackendConfigLoader, appConfig *config.ApplicationConfig) func(c *fiber.Ctx) error {
|
||||||
return func(c *fiber.Ctx) error {
|
return func(c *fiber.Ctx) error {
|
||||||
|
@ -14,6 +14,7 @@ import (
|
|||||||
"github.com/rs/zerolog/log"
|
"github.com/rs/zerolog/log"
|
||||||
|
|
||||||
"github.com/mudler/LocalAI/core/config"
|
"github.com/mudler/LocalAI/core/config"
|
||||||
|
"github.com/mudler/LocalAI/core/schema"
|
||||||
|
|
||||||
"github.com/gofiber/fiber/v2"
|
"github.com/gofiber/fiber/v2"
|
||||||
utils2 "github.com/mudler/LocalAI/pkg/utils"
|
utils2 "github.com/mudler/LocalAI/pkg/utils"
|
||||||
@ -22,11 +23,6 @@ import (
|
|||||||
"testing"
|
"testing"
|
||||||
)
|
)
|
||||||
|
|
||||||
type ListFiles struct {
|
|
||||||
Data []File
|
|
||||||
Object string
|
|
||||||
}
|
|
||||||
|
|
||||||
func startUpApp() (app *fiber.App, option *config.ApplicationConfig, loader *config.BackendConfigLoader) {
|
func startUpApp() (app *fiber.App, option *config.ApplicationConfig, loader *config.BackendConfigLoader) {
|
||||||
// Preparing the mocked objects
|
// Preparing the mocked objects
|
||||||
loader = &config.BackendConfigLoader{}
|
loader = &config.BackendConfigLoader{}
|
||||||
@ -159,7 +155,7 @@ func TestUploadFileExceedSizeLimit(t *testing.T) {
|
|||||||
resp, _ := app.Test(req)
|
resp, _ := app.Test(req)
|
||||||
assert.Equal(t, 200, resp.StatusCode)
|
assert.Equal(t, 200, resp.StatusCode)
|
||||||
|
|
||||||
var listFiles ListFiles
|
var listFiles schema.ListFiles
|
||||||
if err := json.Unmarshal(bodyToByteArray(resp, t), &listFiles); err != nil {
|
if err := json.Unmarshal(bodyToByteArray(resp, t), &listFiles); err != nil {
|
||||||
t.Errorf("Failed to decode response: %v", err)
|
t.Errorf("Failed to decode response: %v", err)
|
||||||
return
|
return
|
||||||
@ -201,7 +197,7 @@ func CallFilesUploadEndpoint(t *testing.T, app *fiber.App, fileName, tag, purpos
|
|||||||
return app.Test(req)
|
return app.Test(req)
|
||||||
}
|
}
|
||||||
|
|
||||||
func CallFilesUploadEndpointWithCleanup(t *testing.T, app *fiber.App, fileName, tag, purpose string, fileSize int, appConfig *config.ApplicationConfig) File {
|
func CallFilesUploadEndpointWithCleanup(t *testing.T, app *fiber.App, fileName, tag, purpose string, fileSize int, appConfig *config.ApplicationConfig) schema.File {
|
||||||
// Create a file that exceeds the limit
|
// Create a file that exceeds the limit
|
||||||
testName := strings.Split(t.Name(), "/")[1]
|
testName := strings.Split(t.Name(), "/")[1]
|
||||||
file := createTestFile(t, testName+"-"+fileName, fileSize, appConfig)
|
file := createTestFile(t, testName+"-"+fileName, fileSize, appConfig)
|
||||||
@ -280,8 +276,8 @@ func bodyToByteArray(resp *http.Response, t *testing.T) []byte {
|
|||||||
return bodyBytes
|
return bodyBytes
|
||||||
}
|
}
|
||||||
|
|
||||||
func responseToFile(t *testing.T, resp *http.Response) File {
|
func responseToFile(t *testing.T, resp *http.Response) schema.File {
|
||||||
var file File
|
var file schema.File
|
||||||
responseToString := bodyToString(resp, t)
|
responseToString := bodyToString(resp, t)
|
||||||
|
|
||||||
err := json.NewDecoder(strings.NewReader(responseToString)).Decode(&file)
|
err := json.NewDecoder(strings.NewReader(responseToString)).Decode(&file)
|
||||||
@ -292,8 +288,8 @@ func responseToFile(t *testing.T, resp *http.Response) File {
|
|||||||
return file
|
return file
|
||||||
}
|
}
|
||||||
|
|
||||||
func responseToListFile(t *testing.T, resp *http.Response) ListFiles {
|
func responseToListFile(t *testing.T, resp *http.Response) schema.ListFiles {
|
||||||
var listFiles ListFiles
|
var listFiles schema.ListFiles
|
||||||
responseToString := bodyToString(resp, t)
|
responseToString := bodyToString(resp, t)
|
||||||
|
|
||||||
err := json.NewDecoder(strings.NewReader(responseToString)).Decode(&listFiles)
|
err := json.NewDecoder(strings.NewReader(responseToString)).Decode(&listFiles)
|
||||||
|
@ -59,16 +59,8 @@ func RegisterLocalAIRoutes(app *fiber.App,
|
|||||||
|
|
||||||
// p2p
|
// p2p
|
||||||
if p2p.IsP2PEnabled() {
|
if p2p.IsP2PEnabled() {
|
||||||
app.Get("/api/p2p", auth, func(c *fiber.Ctx) error {
|
app.Get("/api/p2p", auth, localai.ShowP2PNodes)
|
||||||
// Render index
|
app.Get("/api/p2p/token", auth, localai.ShowP2PToken(appConfig))
|
||||||
return c.JSON(map[string]interface{}{
|
|
||||||
"Nodes": p2p.GetAvailableNodes(""),
|
|
||||||
"FederatedNodes": p2p.GetAvailableNodes(p2p.FederatedID),
|
|
||||||
})
|
|
||||||
})
|
|
||||||
app.Get("/api/p2p/token", auth, func(c *fiber.Ctx) error {
|
|
||||||
return c.Send([]byte(appConfig.P2PToken))
|
|
||||||
})
|
|
||||||
}
|
}
|
||||||
|
|
||||||
app.Get("/version", auth, func(c *fiber.Ctx) error {
|
app.Get("/version", auth, func(c *fiber.Ctx) error {
|
||||||
|
15
core/p2p/federated.go
Normal file
15
core/p2p/federated.go
Normal file
@ -0,0 +1,15 @@
|
|||||||
|
package p2p
|
||||||
|
|
||||||
|
const FederatedID = "federated"
|
||||||
|
|
||||||
|
type FederatedServer struct {
|
||||||
|
listenAddr, service, p2ptoken string
|
||||||
|
}
|
||||||
|
|
||||||
|
func NewFederatedServer(listenAddr, service, p2pToken string) *FederatedServer {
|
||||||
|
return &FederatedServer{
|
||||||
|
listenAddr: listenAddr,
|
||||||
|
service: service,
|
||||||
|
p2ptoken: p2pToken,
|
||||||
|
}
|
||||||
|
}
|
127
core/p2p/federated_server.go
Normal file
127
core/p2p/federated_server.go
Normal file
@ -0,0 +1,127 @@
|
|||||||
|
//go:build p2p
|
||||||
|
// +build p2p
|
||||||
|
|
||||||
|
package p2p
|
||||||
|
|
||||||
|
import (
|
||||||
|
"context"
|
||||||
|
"errors"
|
||||||
|
"fmt"
|
||||||
|
"net"
|
||||||
|
"time"
|
||||||
|
|
||||||
|
"math/rand/v2"
|
||||||
|
|
||||||
|
"github.com/mudler/edgevpn/pkg/node"
|
||||||
|
"github.com/mudler/edgevpn/pkg/protocol"
|
||||||
|
"github.com/mudler/edgevpn/pkg/types"
|
||||||
|
"github.com/rs/zerolog/log"
|
||||||
|
)
|
||||||
|
|
||||||
|
func (f *FederatedServer) Start(ctx context.Context) error {
|
||||||
|
|
||||||
|
n, err := NewNode(f.p2ptoken)
|
||||||
|
if err != nil {
|
||||||
|
return fmt.Errorf("creating a new node: %w", err)
|
||||||
|
}
|
||||||
|
err = n.Start(ctx)
|
||||||
|
if err != nil {
|
||||||
|
return fmt.Errorf("creating a new node: %w", err)
|
||||||
|
}
|
||||||
|
|
||||||
|
if err := ServiceDiscoverer(ctx, n, f.p2ptoken, f.service, func(servicesID string, tunnel NodeData) {
|
||||||
|
log.Debug().Msgf("Discovered node: %s", tunnel.ID)
|
||||||
|
}); err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
|
||||||
|
return f.proxy(ctx, n)
|
||||||
|
}
|
||||||
|
|
||||||
|
func (fs *FederatedServer) proxy(ctx context.Context, node *node.Node) error {
|
||||||
|
|
||||||
|
log.Info().Msgf("Allocating service '%s' on: %s", fs.service, fs.listenAddr)
|
||||||
|
// Open local port for listening
|
||||||
|
l, err := net.Listen("tcp", fs.listenAddr)
|
||||||
|
if err != nil {
|
||||||
|
log.Error().Err(err).Msg("Error listening")
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
// ll.Info("Binding local port on", srcaddr)
|
||||||
|
|
||||||
|
ledger, _ := node.Ledger()
|
||||||
|
|
||||||
|
// Announce ourselves so nodes accepts our connection
|
||||||
|
ledger.Announce(
|
||||||
|
ctx,
|
||||||
|
10*time.Second,
|
||||||
|
func() {
|
||||||
|
// Retrieve current ID for ip in the blockchain
|
||||||
|
//_, found := ledger.GetKey(protocol.UsersLedgerKey, node.Host().ID().String())
|
||||||
|
// If mismatch, update the blockchain
|
||||||
|
//if !found {
|
||||||
|
updatedMap := map[string]interface{}{}
|
||||||
|
updatedMap[node.Host().ID().String()] = &types.User{
|
||||||
|
PeerID: node.Host().ID().String(),
|
||||||
|
Timestamp: time.Now().String(),
|
||||||
|
}
|
||||||
|
ledger.Add(protocol.UsersLedgerKey, updatedMap)
|
||||||
|
// }
|
||||||
|
},
|
||||||
|
)
|
||||||
|
|
||||||
|
defer l.Close()
|
||||||
|
for {
|
||||||
|
select {
|
||||||
|
case <-ctx.Done():
|
||||||
|
return errors.New("context canceled")
|
||||||
|
default:
|
||||||
|
log.Debug().Msg("New for connection")
|
||||||
|
// Listen for an incoming connection.
|
||||||
|
conn, err := l.Accept()
|
||||||
|
if err != nil {
|
||||||
|
fmt.Println("Error accepting: ", err.Error())
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
|
||||||
|
// Handle connections in a new goroutine, forwarding to the p2p service
|
||||||
|
go func() {
|
||||||
|
var tunnelAddresses []string
|
||||||
|
for _, v := range GetAvailableNodes(fs.service) {
|
||||||
|
if v.IsOnline() {
|
||||||
|
tunnelAddresses = append(tunnelAddresses, v.TunnelAddress)
|
||||||
|
} else {
|
||||||
|
log.Info().Msgf("Node %s is offline", v.ID)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if len(tunnelAddresses) == 0 {
|
||||||
|
log.Error().Msg("No available nodes yet")
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
// open a TCP stream to one of the tunnels
|
||||||
|
// chosen randomly
|
||||||
|
// TODO: optimize this and track usage
|
||||||
|
tunnelAddr := tunnelAddresses[rand.IntN(len(tunnelAddresses))]
|
||||||
|
|
||||||
|
tunnelConn, err := net.Dial("tcp", tunnelAddr)
|
||||||
|
if err != nil {
|
||||||
|
log.Error().Err(err).Msg("Error connecting to tunnel")
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
log.Info().Msgf("Redirecting %s to %s", conn.LocalAddr().String(), tunnelConn.RemoteAddr().String())
|
||||||
|
closer := make(chan struct{}, 2)
|
||||||
|
go copyStream(closer, tunnelConn, conn)
|
||||||
|
go copyStream(closer, conn, tunnelConn)
|
||||||
|
<-closer
|
||||||
|
|
||||||
|
tunnelConn.Close()
|
||||||
|
conn.Close()
|
||||||
|
// ll.Infof("(service %s) Done handling %s", serviceID, l.Addr().String())
|
||||||
|
}()
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
@ -6,7 +6,6 @@ import (
|
|||||||
)
|
)
|
||||||
|
|
||||||
const defaultServicesID = "services_localai"
|
const defaultServicesID = "services_localai"
|
||||||
const FederatedID = "federated"
|
|
||||||
|
|
||||||
type NodeData struct {
|
type NodeData struct {
|
||||||
Name string
|
Name string
|
||||||
|
@ -137,14 +137,9 @@ func allocateLocalService(ctx context.Context, node *node.Node, listenAddr, serv
|
|||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
func copyStream(closer chan struct{}, dst io.Writer, src io.Reader) {
|
|
||||||
defer func() { closer <- struct{}{} }() // connection is closed, send signal to stop proxy
|
|
||||||
io.Copy(dst, src)
|
|
||||||
}
|
|
||||||
|
|
||||||
// This is the main of the server (which keeps the env variable updated)
|
// This is the main of the server (which keeps the env variable updated)
|
||||||
// This starts a goroutine that keeps LLAMACPP_GRPC_SERVERS updated with the discovered services
|
// This starts a goroutine that keeps LLAMACPP_GRPC_SERVERS updated with the discovered services
|
||||||
func ServiceDiscoverer(ctx context.Context, n *node.Node, token, servicesID string, discoveryFunc func()) error {
|
func ServiceDiscoverer(ctx context.Context, n *node.Node, token, servicesID string, discoveryFunc func(serviceID string, node NodeData)) error {
|
||||||
if servicesID == "" {
|
if servicesID == "" {
|
||||||
servicesID = defaultServicesID
|
servicesID = defaultServicesID
|
||||||
}
|
}
|
||||||
@ -166,7 +161,7 @@ func ServiceDiscoverer(ctx context.Context, n *node.Node, token, servicesID stri
|
|||||||
case tunnel := <-tunnels:
|
case tunnel := <-tunnels:
|
||||||
AddNode(servicesID, tunnel)
|
AddNode(servicesID, tunnel)
|
||||||
if discoveryFunc != nil {
|
if discoveryFunc != nil {
|
||||||
discoveryFunc()
|
discoveryFunc(servicesID, tunnel)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -396,3 +391,8 @@ func newNodeOpts(token string) ([]node.Option, error) {
|
|||||||
|
|
||||||
return nodeOpts, nil
|
return nodeOpts, nil
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func copyStream(closer chan struct{}, dst io.Writer, src io.Reader) {
|
||||||
|
defer func() { closer <- struct{}{} }() // connection is closed, send signal to stop proxy
|
||||||
|
io.Copy(dst, src)
|
||||||
|
}
|
||||||
|
@ -14,7 +14,11 @@ func GenerateToken() string {
|
|||||||
return "not implemented"
|
return "not implemented"
|
||||||
}
|
}
|
||||||
|
|
||||||
func ServiceDiscoverer(ctx context.Context, node *node.Node, token, servicesID string, fn func()) error {
|
func (f *FederatedServer) Start(ctx context.Context) error {
|
||||||
|
return fmt.Errorf("not implemented")
|
||||||
|
}
|
||||||
|
|
||||||
|
func ServiceDiscoverer(ctx context.Context, node *node.Node, token, servicesID string, fn func(string, NodeData)) error {
|
||||||
return fmt.Errorf("not implemented")
|
return fmt.Errorf("not implemented")
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -1,6 +1,7 @@
|
|||||||
package schema
|
package schema
|
||||||
|
|
||||||
import (
|
import (
|
||||||
|
"github.com/mudler/LocalAI/core/p2p"
|
||||||
gopsutil "github.com/shirou/gopsutil/v3/process"
|
gopsutil "github.com/shirou/gopsutil/v3/process"
|
||||||
)
|
)
|
||||||
|
|
||||||
@ -14,6 +15,11 @@ type BackendMonitorResponse struct {
|
|||||||
CPUPercent float64
|
CPUPercent float64
|
||||||
}
|
}
|
||||||
|
|
||||||
|
type GalleryResponse struct {
|
||||||
|
ID string `json:"uuid"`
|
||||||
|
StatusURL string `json:"status"`
|
||||||
|
}
|
||||||
|
|
||||||
// @Description TTS request body
|
// @Description TTS request body
|
||||||
type TTSRequest struct {
|
type TTSRequest struct {
|
||||||
Model string `json:"model" yaml:"model"` // model name or full path
|
Model string `json:"model" yaml:"model"` // model name or full path
|
||||||
@ -59,3 +65,8 @@ type StoresFindResponse struct {
|
|||||||
Values []string `json:"values" yaml:"values"`
|
Values []string `json:"values" yaml:"values"`
|
||||||
Similarities []float32 `json:"similarities" yaml:"similarities"`
|
Similarities []float32 `json:"similarities" yaml:"similarities"`
|
||||||
}
|
}
|
||||||
|
|
||||||
|
type P2PNodesResponse struct {
|
||||||
|
Nodes []p2p.NodeData `json:"nodes" yaml:"nodes"`
|
||||||
|
FederatedNodes []p2p.NodeData `json:"federated_nodes" yaml:"federated_nodes"`
|
||||||
|
}
|
||||||
|
@ -2,6 +2,7 @@ package schema
|
|||||||
|
|
||||||
import (
|
import (
|
||||||
"context"
|
"context"
|
||||||
|
"time"
|
||||||
|
|
||||||
functions "github.com/mudler/LocalAI/pkg/functions"
|
functions "github.com/mudler/LocalAI/pkg/functions"
|
||||||
)
|
)
|
||||||
@ -99,6 +100,37 @@ type OpenAIModel struct {
|
|||||||
Object string `json:"object"`
|
Object string `json:"object"`
|
||||||
}
|
}
|
||||||
|
|
||||||
|
type DeleteAssistantResponse struct {
|
||||||
|
ID string `json:"id"`
|
||||||
|
Object string `json:"object"`
|
||||||
|
Deleted bool `json:"deleted"`
|
||||||
|
}
|
||||||
|
|
||||||
|
// File represents the structure of a file object from the OpenAI API.
|
||||||
|
type File struct {
|
||||||
|
ID string `json:"id"` // Unique identifier for the file
|
||||||
|
Object string `json:"object"` // Type of the object (e.g., "file")
|
||||||
|
Bytes int `json:"bytes"` // Size of the file in bytes
|
||||||
|
CreatedAt time.Time `json:"created_at"` // The time at which the file was created
|
||||||
|
Filename string `json:"filename"` // The name of the file
|
||||||
|
Purpose string `json:"purpose"` // The purpose of the file (e.g., "fine-tune", "classifications", etc.)
|
||||||
|
}
|
||||||
|
|
||||||
|
type ListFiles struct {
|
||||||
|
Data []File
|
||||||
|
Object string
|
||||||
|
}
|
||||||
|
|
||||||
|
type AssistantFileRequest struct {
|
||||||
|
FileID string `json:"file_id"`
|
||||||
|
}
|
||||||
|
|
||||||
|
type DeleteAssistantFileResponse struct {
|
||||||
|
ID string `json:"id"`
|
||||||
|
Object string `json:"object"`
|
||||||
|
Deleted bool `json:"deleted"`
|
||||||
|
}
|
||||||
|
|
||||||
type ImageGenerationResponseFormat string
|
type ImageGenerationResponseFormat string
|
||||||
|
|
||||||
type ChatCompletionResponseFormatType string
|
type ChatCompletionResponseFormatType string
|
||||||
@ -147,8 +179,7 @@ type OpenAIRequest struct {
|
|||||||
// A grammar to constrain the LLM output
|
// A grammar to constrain the LLM output
|
||||||
Grammar string `json:"grammar" yaml:"grammar"`
|
Grammar string `json:"grammar" yaml:"grammar"`
|
||||||
|
|
||||||
JSONFunctionGrammarObject *functions.JSONFunctionStructureFunction `json:"grammar_json_functions" yaml:"grammar_json_functions"`
|
JSONFunctionGrammarObject *functions.JSONFunctionStructure `json:"grammar_json_functions" yaml:"grammar_json_functions"`
|
||||||
JSONFunctionGrammarObjectName *functions.JSONFunctionStructureName `json:"grammar_json_name" yaml:"grammar_json_name"`
|
|
||||||
|
|
||||||
Backend string `json:"backend" yaml:"backend"`
|
Backend string `json:"backend" yaml:"backend"`
|
||||||
|
|
||||||
|
@ -9,7 +9,7 @@ import (
|
|||||||
"time"
|
"time"
|
||||||
|
|
||||||
"github.com/fsnotify/fsnotify"
|
"github.com/fsnotify/fsnotify"
|
||||||
"github.com/imdario/mergo"
|
"dario.cat/mergo"
|
||||||
"github.com/mudler/LocalAI/core/config"
|
"github.com/mudler/LocalAI/core/config"
|
||||||
"github.com/rs/zerolog/log"
|
"github.com/rs/zerolog/log"
|
||||||
)
|
)
|
||||||
|
@ -112,6 +112,8 @@ name: "" # Model name, used to identify the model in API calls.
|
|||||||
# Precision settings for the model, reducing precision can enhance performance on some hardware.
|
# Precision settings for the model, reducing precision can enhance performance on some hardware.
|
||||||
f16: null # Whether to use 16-bit floating-point precision.
|
f16: null # Whether to use 16-bit floating-point precision.
|
||||||
|
|
||||||
|
embeddings: true # Enable embeddings for the model.
|
||||||
|
|
||||||
# Concurrency settings for the application.
|
# Concurrency settings for the application.
|
||||||
threads: null # Number of threads to use for processing.
|
threads: null # Number of threads to use for processing.
|
||||||
|
|
||||||
@ -150,7 +152,8 @@ function:
|
|||||||
replace_function_results: [] # Placeholder to replace function call results with arbitrary strings or patterns.
|
replace_function_results: [] # Placeholder to replace function call results with arbitrary strings or patterns.
|
||||||
replace_llm_results: [] # Replace language model results with arbitrary strings or patterns.
|
replace_llm_results: [] # Replace language model results with arbitrary strings or patterns.
|
||||||
capture_llm_results: [] # Capture language model results as text result, among JSON, in function calls. For instance, if a model returns a block for "thinking" and a block for "response", this will allow you to capture the thinking block.
|
capture_llm_results: [] # Capture language model results as text result, among JSON, in function calls. For instance, if a model returns a block for "thinking" and a block for "response", this will allow you to capture the thinking block.
|
||||||
return_name_in_function_response: false # Some models might prefer to use "name" rather then "function" when returning JSON data. This will allow to use "name" as a key in the JSON response.
|
function_name_key: "name"
|
||||||
|
function_arguments_key: "arguments"
|
||||||
|
|
||||||
# Feature gating flags to enable experimental or optional features.
|
# Feature gating flags to enable experimental or optional features.
|
||||||
feature_flags: {}
|
feature_flags: {}
|
||||||
|
@ -8,9 +8,9 @@ icon = "rocket_launch"
|
|||||||
|
|
||||||
## Running other models
|
## Running other models
|
||||||
|
|
||||||
> _Do you have already a model file? Skip to [Run models manually]({{%relref "docs/getting-started/manual" %}})_.
|
> _Do you have already a model file? Skip to [Run models manually]({{%relref "docs/getting-started/models" %}})_.
|
||||||
|
|
||||||
To load models into LocalAI, you can either [use models manually]({{%relref "docs/getting-started/manual" %}}) or configure LocalAI to pull the models from external sources, like Huggingface and configure the model.
|
To load models into LocalAI, you can either [use models manually]({{%relref "docs/getting-started/models" %}}) or configure LocalAI to pull the models from external sources, like Huggingface and configure the model.
|
||||||
|
|
||||||
To do that, you can point LocalAI to an URL to a YAML configuration file - however - LocalAI does also have some popular model configuration embedded in the binary as well. Below you can find a list of the models configuration that LocalAI has pre-built, see [Model customization]({{%relref "docs/getting-started/customize-model" %}}) on how to configure models from URLs.
|
To do that, you can point LocalAI to an URL to a YAML configuration file - however - LocalAI does also have some popular model configuration embedded in the binary as well. Below you can find a list of the models configuration that LocalAI has pre-built, see [Model customization]({{%relref "docs/getting-started/customize-model" %}}) on how to configure models from URLs.
|
||||||
|
|
||||||
|
@ -16,6 +16,10 @@ Here are answers to some of the most common questions.
|
|||||||
|
|
||||||
Most gguf-based models should work, but newer models may require additions to the API. If a model doesn't work, please feel free to open up issues. However, be cautious about downloading models from the internet and directly onto your machine, as there may be security vulnerabilities in lama.cpp or ggml that could be maliciously exploited. Some models can be found on Hugging Face: https://huggingface.co/models?search=gguf, or models from gpt4all are compatible too: https://github.com/nomic-ai/gpt4all.
|
Most gguf-based models should work, but newer models may require additions to the API. If a model doesn't work, please feel free to open up issues. However, be cautious about downloading models from the internet and directly onto your machine, as there may be security vulnerabilities in lama.cpp or ggml that could be maliciously exploited. Some models can be found on Hugging Face: https://huggingface.co/models?search=gguf, or models from gpt4all are compatible too: https://github.com/nomic-ai/gpt4all.
|
||||||
|
|
||||||
|
### Benchmarking LocalAI and llama.cpp shows different results!
|
||||||
|
|
||||||
|
LocalAI applies a set of defaults when loading models with the llama.cpp backend, one of these is mirostat sampling - while it achieves better results, it slows down the inference. You can disable this by setting `mirostat: 0` in the model config file. See also the advanced section ({{%relref "docs/advanced/advanced-usage" %}}) for more information and [this issue](https://github.com/mudler/LocalAI/issues/2780).
|
||||||
|
|
||||||
### What's the difference with Serge, or XXX?
|
### What's the difference with Serge, or XXX?
|
||||||
|
|
||||||
LocalAI is a multi-model solution that doesn't focus on a specific model type (e.g., llama.cpp or alpaca.cpp), and it handles all of these internally for faster inference, easy to set up locally and deploy to Kubernetes.
|
LocalAI is a multi-model solution that doesn't focus on a specific model type (e.g., llama.cpp or alpaca.cpp), and it handles all of these internally for faster inference, easy to set up locally and deploy to Kubernetes.
|
||||||
|
@ -55,8 +55,8 @@ apt install cmake golang libgrpc-dev make protobuf-compiler-grpc python3-grpc-to
|
|||||||
After you have golang installed and working, you can install the required binaries for compiling the golang protobuf components via the following commands
|
After you have golang installed and working, you can install the required binaries for compiling the golang protobuf components via the following commands
|
||||||
|
|
||||||
```bash
|
```bash
|
||||||
go install google.golang.org/protobuf/cmd/protoc-gen-go@v1.34.0
|
go install google.golang.org/protobuf/cmd/protoc-gen-go@v1.34.2
|
||||||
go install google.golang.org/grpc/cmd/protoc-gen-go-grpc@8ba23be9613c672d40ae261d2a1335d639bdd59b
|
go install google.golang.org/grpc/cmd/protoc-gen-go-grpc@1958fcbe2ca8bd93af633f11e97d44e567e945af
|
||||||
|
|
||||||
```
|
```
|
||||||
|
|
||||||
|
@ -1,21 +1,69 @@
|
|||||||
---
|
+++
|
||||||
|
disableToc = false
|
||||||
|
title = "Install and Run Models"
|
||||||
|
weight = 4
|
||||||
|
icon = "rocket_launch"
|
||||||
|
+++
|
||||||
|
|
||||||
disableToc: false
|
To install models with LocalAI, you can:
|
||||||
title: "Run models manually"
|
|
||||||
weight: 5
|
|
||||||
icon: "rocket_launch"
|
|
||||||
|
|
||||||
---
|
- Browse the Model Gallery from the Web Interface and install models with a couple of clicks. For more details, refer to the [Gallery Documentation]({{% relref "docs/features/model-gallery" %}}).
|
||||||
|
- Specify a model from the LocalAI gallery during startup, e.g., `local-ai run <model_gallery_name>`.
|
||||||
|
- Use a URI to specify a model file (e.g., `huggingface://...`, `oci://`, or `ollama://`) when starting LocalAI, e.g., `local-ai run huggingface://TheBloke/phi-2-GGUF/phi-2.Q8_0.gguf`.
|
||||||
|
- Specify a URL to a model configuration file when starting LocalAI, e.g., `local-ai run https://gist.githubusercontent.com/.../phi-2.yaml`.
|
||||||
|
- Manually install the models by copying the files into the models directory (`--models`).
|
||||||
|
|
||||||
# Run Models Manually
|
## Run and Install Models via the Gallery
|
||||||
|
|
||||||
|
To run models available in the LocalAI gallery, you can use the WebUI or specify the model name when starting LocalAI. Models can be found in the gallery via the Web interface, the [model gallery](https://models.localai.io), or the CLI with: `local-ai models list`.
|
||||||
|
|
||||||
|
To install a model from the gallery, use the model name as the URI. For example, to run LocalAI with the Hermes model, execute:
|
||||||
|
|
||||||
|
```bash
|
||||||
|
local-ai run hermes-2-theta-llama-3-8b
|
||||||
|
```
|
||||||
|
|
||||||
|
To install only the model, use:
|
||||||
|
|
||||||
|
```bash
|
||||||
|
local-ai models install hermes-2-theta-llama-3-8b
|
||||||
|
```
|
||||||
|
|
||||||
|
Note: The galleries available in LocalAI can be customized to point to a different URL or a local directory. For more information on how to setup your own gallery, see the [Gallery Documentation]({{% relref "docs/features/model-gallery" %}}).
|
||||||
|
|
||||||
|
## Run Models via URI
|
||||||
|
|
||||||
|
To run models via URI, specify a URI to a model file or a configuration file when starting LocalAI. Valid syntax includes:
|
||||||
|
|
||||||
|
- `file://path/to/model`
|
||||||
|
- `huggingface://repository_id/model_file` (e.g., `huggingface://TheBloke/phi-2-GGUF/phi-2.Q8_0.gguf`)
|
||||||
|
- From OCIs: `oci://container_image:tag`, `ollama://model_id:tag`
|
||||||
|
- From configuration files: `https://gist.githubusercontent.com/.../phi-2.yaml`
|
||||||
|
|
||||||
|
Configuration files can be used to customize the model defaults and settings. For advanced configurations, refer to the [Customize Models section]({{% relref "docs/getting-started/customize-model" %}}).
|
||||||
|
|
||||||
|
### Examples
|
||||||
|
|
||||||
|
```bash
|
||||||
|
# Start LocalAI with the phi-2 model
|
||||||
|
local-ai run huggingface://TheBloke/phi-2-GGUF/phi-2.Q8_0.gguf
|
||||||
|
# Install and run a model from the Ollama OCI registry
|
||||||
|
local-ai run ollama://gemma:2b
|
||||||
|
# Run a model from a configuration file
|
||||||
|
local-ai run https://gist.githubusercontent.com/.../phi-2.yaml
|
||||||
|
# Install and run a model from a standard OCI registry (e.g., Docker Hub)
|
||||||
|
local-ai run oci://localai/phi-2:latest
|
||||||
|
```
|
||||||
|
|
||||||
|
## Run Models Manually
|
||||||
|
|
||||||
Follow these steps to manually run models using LocalAI:
|
Follow these steps to manually run models using LocalAI:
|
||||||
|
|
||||||
1. **Prepare Your Model and Configuration Files**:
|
1. **Prepare Your Model and Configuration Files**:
|
||||||
Ensure you have a model file and a configuration YAML file, if necessary. Customize model defaults and specific settings with a configuration file. For advanced configurations, refer to the [Advanced Documentation]({{% relref "docs/advanced" %}}).
|
Ensure you have a model file and, if necessary, a configuration YAML file. Customize model defaults and settings with a configuration file. For advanced configurations, refer to the [Advanced Documentation]({{% relref "docs/advanced" %}}).
|
||||||
|
|
||||||
2. **GPU Acceleration**:
|
2. **GPU Acceleration**:
|
||||||
For instructions on GPU acceleration, visit the [GPU acceleration]({{% relref "docs/features/gpu-acceleration" %}}) page.
|
For instructions on GPU acceleration, visit the [GPU Acceleration]({{% relref "docs/features/gpu-acceleration" %}}) page.
|
||||||
|
|
||||||
3. **Run LocalAI**:
|
3. **Run LocalAI**:
|
||||||
Choose one of the following methods to run LocalAI:
|
Choose one of the following methods to run LocalAI:
|
||||||
@ -160,5 +208,3 @@ For instructions on building LocalAI from source, see the [Build Section]({{% re
|
|||||||
{{< /tabs >}}
|
{{< /tabs >}}
|
||||||
|
|
||||||
For more model configurations, visit the [Examples Section](https://github.com/mudler/LocalAI/tree/master/examples/configurations).
|
For more model configurations, visit the [Examples Section](https://github.com/mudler/LocalAI/tree/master/examples/configurations).
|
||||||
|
|
||||||
---
|
|
@ -38,13 +38,13 @@ For detailed instructions, see [Using container images]({{% relref "docs/getting
|
|||||||
|
|
||||||
## Running LocalAI with All-in-One (AIO) Images
|
## Running LocalAI with All-in-One (AIO) Images
|
||||||
|
|
||||||
> _Already have a model file? Skip to [Run models manually]({{% relref "docs/getting-started/manual" %}})_.
|
> _Already have a model file? Skip to [Run models manually]({{% relref "docs/getting-started/models" %}})_.
|
||||||
|
|
||||||
LocalAI's All-in-One (AIO) images are pre-configured with a set of models and backends to fully leverage almost all the features of LocalAI. If pre-configured models are not required, you can use the standard [images]({{% relref "docs/getting-started/container-images" %}}).
|
LocalAI's All-in-One (AIO) images are pre-configured with a set of models and backends to fully leverage almost all the features of LocalAI. If pre-configured models are not required, you can use the standard [images]({{% relref "docs/getting-started/container-images" %}}).
|
||||||
|
|
||||||
These images are available for both CPU and GPU environments. AIO images are designed for ease of use and require no additional configuration.
|
These images are available for both CPU and GPU environments. AIO images are designed for ease of use and require no additional configuration.
|
||||||
|
|
||||||
It is recommended to use AIO images if you prefer not to configure the models manually or via the web interface. For running specific models, refer to the [manual method]({{% relref "docs/getting-started/manual" %}}).
|
It is recommended to use AIO images if you prefer not to configure the models manually or via the web interface. For running specific models, refer to the [manual method]({{% relref "docs/getting-started/models" %}}).
|
||||||
|
|
||||||
The AIO images come pre-configured with the following features:
|
The AIO images come pre-configured with the following features:
|
||||||
- Text to Speech (TTS)
|
- Text to Speech (TTS)
|
||||||
@ -66,5 +66,5 @@ Explore additional resources and community contributions:
|
|||||||
- [Run from Container images]({{% relref "docs/getting-started/container-images" %}})
|
- [Run from Container images]({{% relref "docs/getting-started/container-images" %}})
|
||||||
- [Examples to try from the CLI]({{% relref "docs/getting-started/try-it-out" %}})
|
- [Examples to try from the CLI]({{% relref "docs/getting-started/try-it-out" %}})
|
||||||
- [Build LocalAI and the container image]({{% relref "docs/getting-started/build" %}})
|
- [Build LocalAI and the container image]({{% relref "docs/getting-started/build" %}})
|
||||||
- [Run models manually]({{% relref "docs/getting-started/manual" %}})
|
- [Run models manually]({{% relref "docs/getting-started/models" %}})
|
||||||
- [Examples](https://github.com/mudler/LocalAI/tree/master/examples#examples)
|
- [Examples](https://github.com/mudler/LocalAI/tree/master/examples#examples)
|
||||||
|
@ -17,10 +17,10 @@ After installation, install new models by navigating the model gallery, or by us
|
|||||||
To install models with the WebUI, see the [Models section]({{%relref "docs/features/model-gallery" %}}).
|
To install models with the WebUI, see the [Models section]({{%relref "docs/features/model-gallery" %}}).
|
||||||
With the CLI you can list the models with `local-ai models list` and install them with `local-ai models install <model-name>`.
|
With the CLI you can list the models with `local-ai models list` and install them with `local-ai models install <model-name>`.
|
||||||
|
|
||||||
You can also [run models manually]({{%relref "docs/getting-started/manual" %}}) by copying files into the `models` directory.
|
You can also [run models manually]({{%relref "docs/getting-started/models" %}}) by copying files into the `models` directory.
|
||||||
{{% /alert %}}
|
{{% /alert %}}
|
||||||
|
|
||||||
You can test out the API endpoints using `curl`, few examples are listed below. The models we are refering here (`gpt-4`, `gpt-4-vision-preview`, `tts-1`, `whisper-1`) are the default models that come with the AIO images - you can also use any other model you have installed.
|
You can test out the API endpoints using `curl`, few examples are listed below. The models we are referring here (`gpt-4`, `gpt-4-vision-preview`, `tts-1`, `whisper-1`) are the default models that come with the AIO images - you can also use any other model you have installed.
|
||||||
|
|
||||||
### Text Generation
|
### Text Generation
|
||||||
|
|
||||||
@ -193,4 +193,4 @@ Don't use the model file as `model` in the request unless you want to handle the
|
|||||||
|
|
||||||
Use the model names like you would do with OpenAI like in the examples below. For instance `gpt-4-vision-preview`, or `gpt-4`.
|
Use the model names like you would do with OpenAI like in the examples below. For instance `gpt-4-vision-preview`, or `gpt-4`.
|
||||||
|
|
||||||
{{% /alert %}}
|
{{% /alert %}}
|
||||||
|
2
docs/themes/hugo-theme-relearn
vendored
2
docs/themes/hugo-theme-relearn
vendored
@ -1 +1 @@
|
|||||||
Subproject commit c25bc2a27ab46649393ef7b310e14fff1311116d
|
Subproject commit 1b2e139512106f8074ac7d4a884135d159720cc4
|
@ -1,5 +1,5 @@
|
|||||||
# Use an official Python runtime as a parent image
|
# Use an official Python runtime as a parent image
|
||||||
FROM harbor.home.sfxworks.net/docker/library/python:3.9-slim
|
FROM python:3.12-slim
|
||||||
|
|
||||||
# Set the working directory in the container
|
# Set the working directory in the container
|
||||||
WORKDIR /app
|
WORKDIR /app
|
||||||
@ -7,8 +7,17 @@ WORKDIR /app
|
|||||||
# Copy the current directory contents into the container at /app
|
# Copy the current directory contents into the container at /app
|
||||||
COPY requirements.txt /app
|
COPY requirements.txt /app
|
||||||
|
|
||||||
|
# Install c++ compiler
|
||||||
|
RUN apt-get update \
|
||||||
|
&& DEBIAN_FRONTEND=noninteractive apt-get install -y build-essential \
|
||||||
|
&& apt-get clean \
|
||||||
|
&& rm -rf /var/lib/apt/lists/*
|
||||||
|
|
||||||
# Install any needed packages specified in requirements.txt
|
# Install any needed packages specified in requirements.txt
|
||||||
RUN pip install -r requirements.txt
|
RUN pip install --no-cache-dir -r requirements.txt \
|
||||||
|
&& DEBIAN_FRONTEND=noninteractive apt-get remove -y build-essential \
|
||||||
|
&& apt-get clean \
|
||||||
|
&& rm -rf /var/lib/apt/lists/*
|
||||||
|
|
||||||
COPY . /app
|
COPY . /app
|
||||||
|
|
||||||
|
@ -1,7 +1,6 @@
|
|||||||
llama_hub==0.0.41
|
llama_index==0.10.55
|
||||||
llama_index==0.8.55
|
requests==2.32.3
|
||||||
Requests==2.31.0
|
weaviate_client==4.6.5
|
||||||
weaviate_client==3.25.1
|
|
||||||
transformers
|
transformers
|
||||||
torch
|
torch
|
||||||
chainlit
|
chainlit
|
||||||
|
@ -1,5 +1,5 @@
|
|||||||
FROM python:3.10-bullseye
|
FROM python:3.12-slim-bullseye
|
||||||
COPY . /app
|
COPY . /app
|
||||||
WORKDIR /app
|
WORKDIR /app
|
||||||
RUN pip install --no-cache-dir -r requirements.txt
|
RUN pip install --no-cache-dir -r requirements.txt
|
||||||
ENTRYPOINT [ "python", "./functions-openai.py" ];
|
ENTRYPOINT [ "python", "./functions-openai.py" ]
|
||||||
|
@ -1,2 +1,2 @@
|
|||||||
langchain==0.1.0
|
langchain==0.2.8
|
||||||
openai==0.27.8
|
openai==1.35.13
|
||||||
|
@ -5,7 +5,7 @@ metadata:
|
|||||||
spec:
|
spec:
|
||||||
containers:
|
containers:
|
||||||
- name: broken-pod
|
- name: broken-pod
|
||||||
image: nginx:1.a.b.c
|
image: nginx:1.27.0
|
||||||
livenessProbe:
|
livenessProbe:
|
||||||
httpGet:
|
httpGet:
|
||||||
path: /
|
path: /
|
||||||
|
@ -2,7 +2,7 @@ replicaCount: 1
|
|||||||
|
|
||||||
deployment:
|
deployment:
|
||||||
# https://quay.io/repository/go-skynet/local-ai?tab=tags
|
# https://quay.io/repository/go-skynet/local-ai?tab=tags
|
||||||
image: quay.io/go-skynet/local-ai:v1.23.0
|
image: quay.io/go-skynet/local-ai:v1.40.0
|
||||||
env:
|
env:
|
||||||
threads: 4
|
threads: 4
|
||||||
debug: "true"
|
debug: "true"
|
||||||
@ -93,4 +93,4 @@ nodeSelector: {}
|
|||||||
|
|
||||||
tolerations: []
|
tolerations: []
|
||||||
|
|
||||||
affinity: {}
|
affinity: {}
|
||||||
|
@ -1,4 +1,4 @@
|
|||||||
langchain==0.1.0
|
langchain==0.2.8
|
||||||
openai==0.27.6
|
openai==1.35.13
|
||||||
chromadb==0.3.21
|
chromadb==0.5.4
|
||||||
llama-index==0.9.36
|
llama-index==0.10.55
|
@ -1,6 +1,6 @@
|
|||||||
FROM node:latest
|
FROM node:lts-alpine
|
||||||
COPY ./langchainjs-localai-example /app
|
COPY ./langchainjs-localai-example /app
|
||||||
WORKDIR /app
|
WORKDIR /app
|
||||||
RUN npm install
|
RUN npm install
|
||||||
RUN npm run build
|
RUN npm run build
|
||||||
ENTRYPOINT [ "npm", "run", "start" ]
|
ENTRYPOINT [ "npm", "run", "start" ]
|
||||||
|
@ -1,5 +1,5 @@
|
|||||||
FROM python:3.10-bullseye
|
FROM python:3.12-bullseye
|
||||||
COPY ./langchainpy-localai-example /app
|
COPY ./langchainpy-localai-example /app
|
||||||
WORKDIR /app
|
WORKDIR /app
|
||||||
RUN pip install --no-cache-dir -r requirements.txt
|
RUN pip install --no-cache-dir -r requirements.txt
|
||||||
ENTRYPOINT [ "python", "./full_demo.py" ];
|
ENTRYPOINT [ "python", "./full_demo.py" ]
|
||||||
|
@ -1,32 +1,33 @@
|
|||||||
aiohttp==3.9.4
|
aiohttp==3.9.5
|
||||||
aiosignal==1.3.1
|
aiosignal==1.3.1
|
||||||
async-timeout==4.0.2
|
async-timeout==4.0.3
|
||||||
attrs==23.1.0
|
attrs==23.2.0
|
||||||
certifi==2023.7.22
|
certifi==2024.7.4
|
||||||
charset-normalizer==3.1.0
|
charset-normalizer==3.3.2
|
||||||
colorama==0.4.6
|
colorama==0.4.6
|
||||||
dataclasses-json==0.5.7
|
dataclasses-json==0.6.7
|
||||||
debugpy==1.6.7
|
debugpy==1.8.2
|
||||||
frozenlist==1.3.3
|
frozenlist==1.4.1
|
||||||
greenlet==2.0.2
|
greenlet==3.0.3
|
||||||
idna==3.7
|
idna==3.7
|
||||||
langchain==0.1.0
|
langchain==0.2.8
|
||||||
marshmallow==3.19.0
|
langchain-community==0.2.7
|
||||||
|
marshmallow==3.21.3
|
||||||
marshmallow-enum==1.5.1
|
marshmallow-enum==1.5.1
|
||||||
multidict==6.0.4
|
multidict==6.0.5
|
||||||
mypy-extensions==1.0.0
|
mypy-extensions==1.0.0
|
||||||
numexpr==2.8.4
|
numexpr==2.10.1
|
||||||
numpy==1.24.3
|
numpy==1.26.4
|
||||||
openai==0.27.6
|
openai==1.35.13
|
||||||
openapi-schema-pydantic==1.2.4
|
openapi-schema-pydantic==1.2.4
|
||||||
packaging==23.1
|
packaging>=23.2
|
||||||
pydantic==1.10.13
|
pydantic==2.8.2
|
||||||
PyYAML==6.0
|
PyYAML==6.0.1
|
||||||
requests==2.31.0
|
requests==2.32.3
|
||||||
SQLAlchemy==2.0.12
|
SQLAlchemy==2.0.30
|
||||||
tenacity==8.2.2
|
tenacity==8.5.0
|
||||||
tqdm==4.66.3
|
tqdm==4.66.4
|
||||||
typing-inspect==0.8.0
|
typing-inspect==0.9.0
|
||||||
typing_extensions==4.5.0
|
typing_extensions==4.12.2
|
||||||
urllib3==1.26.18
|
urllib3==2.2.2
|
||||||
yarl==1.9.2
|
yarl==1.9.4
|
||||||
|
@ -1,10 +1,10 @@
|
|||||||
module semantic-todo
|
module semantic-todo
|
||||||
|
|
||||||
go 1.21.6
|
go 1.22
|
||||||
|
|
||||||
require (
|
require (
|
||||||
github.com/gdamore/tcell/v2 v2.7.1
|
github.com/gdamore/tcell/v2 v2.7.4
|
||||||
github.com/rivo/tview v0.0.0-20240307173318-e804876934a1
|
github.com/rivo/tview v0.0.0-20240524063012-037df494fb76
|
||||||
)
|
)
|
||||||
|
|
||||||
require (
|
require (
|
||||||
|
Some files were not shown because too many files have changed in this diff Show More
Loading…
Reference in New Issue
Block a user