Merge branch 'master' into default_miro

This commit is contained in:
Dave 2024-08-20 19:10:51 -04:00 committed by GitHub
commit 3eb1c1c689
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
197 changed files with 4171 additions and 1305 deletions

View File

@ -0,0 +1,17 @@
#!/bin/bash
cd /workspace
# Get the files into the volume without a bind mount
if [ ! -d ".git" ]; then
git clone https://github.com/mudler/LocalAI.git .
else
git fetch
fi
echo "Standard Post-Create script completed."
if [ -f "/devcontainer-customization/postcreate.sh" ]; then
echo "Launching customization postcreate.sh"
bash "/devcontainer-customization/postcreate.sh"
fi

View File

@ -0,0 +1,16 @@
#!/bin/bash
cd /workspace
# Grab the pre-stashed backend assets to avoid build issues
cp -r /build/backend-assets /workspace/backend-assets
# Ensures generated source files are present upon load
make prepare
echo "Standard Post-Start script completed."
if [ -f "/devcontainer-customization/poststart.sh" ]; then
echo "Launching customization poststart.sh"
bash "/devcontainer-customization/poststart.sh"
fi

View File

@ -0,0 +1,49 @@
#!/bin/bash
# This file contains some really simple functions that are useful when building up customization scripts.
# Checks if the git config has a user registered - and sets it up if not.
#
# Param 1: name
# Param 2: email
#
config_user() {
local gcn=$(git config --global user.name)
if [ -z "${gcn}" ]; then
echo "Setting up git user / remote"
git config --global user.name "$1"
git config --global user.email "$2"
fi
}
# Checks if the git remote is configured - and sets it up if not. Fetches either way.
#
# Param 1: remote name
# Param 2: remote url
#
config_remote() {
local gr=$(git remote -v | grep $1)
if [ -z "${gr}" ]; then
git remote add $1 $2
fi
git fetch $1
}
# Setup special .ssh files
#
# Param 1: bash array, filenames relative to the customization directory that should be copied to ~/.ssh
setup_ssh() {
local files=("$@")
for file in "${files[@]}"; then
local cfile="/devcontainer-customization/${file}"
local hfile="~/.ssh/${file}"
if [ ! -f "${hfile}" ]; then
echo "copying ${file}"
cp "${cfile}" "${hfile}"
chmod 600 "${hfile}"
fi
done
ls ~/.ssh
}

View File

@ -0,0 +1,25 @@
Place any additional resources your environment requires in this directory
Script hooks are currently called for:
`postcreate.sh` and `poststart.sh`
If files with those names exist here, they will be called at the end of the normal script.
This is a good place to set things like `git config --global user.name` are set - and to handle any other files that are mounted via this directory.
To assist in doing so, `source /.devcontainer-scripts/utils.sh` will provide utility functions that may be useful - for example:
```
#!/bin/bash
source "/.devcontainer-scripts/utils.sh"
sshfiles=("config", "key.pub")
setup_ssh "${sshfiles[@]}"
config_user "YOUR NAME" "YOUR EMAIL"
config_remote "REMOTE NAME" "REMOTE URL"
```

View File

@ -0,0 +1,24 @@
{
"$schema": "https://raw.githubusercontent.com/devcontainers/spec/main/schemas/devContainer.schema.json",
"name": "LocalAI",
"workspaceFolder": "/workspace",
"dockerComposeFile": [ "./docker-compose-devcontainer.yml" ],
"service": "api",
"shutdownAction": "stopCompose",
"customizations": {
"vscode": {
"extensions": [
"golang.go",
"ms-vscode.makefile-tools",
"ms-azuretools.vscode-docker",
"ms-python.python",
"ms-python.debugpy",
"wayou.vscode-todo-highlight",
"waderyan.gitblame"
]
}
},
"forwardPorts": [8080, 3000],
"postCreateCommand": "bash /.devcontainer-scripts/postcreate.sh",
"postStartCommand": "bash /.devcontainer-scripts/poststart.sh"
}

View File

@ -0,0 +1,48 @@
services:
api:
build:
context: ..
dockerfile: Dockerfile
target: devcontainer
args:
- FFMPEG=true
- IMAGE_TYPE=extras
- GO_TAGS=stablediffusion p2p tts
env_file:
- ../.env
ports:
- 8080:8080
volumes:
- localai_workspace:/workspace
- ../models:/host-models
- ./customization:/devcontainer-customization
command: /bin/sh -c "while sleep 1000; do :; done"
cap_add:
- SYS_PTRACE
security_opt:
- seccomp:unconfined
prometheus:
image: prom/prometheus
container_name: prometheus
command:
- '--config.file=/etc/prometheus/prometheus.yml'
ports:
- 9090:9090
restart: unless-stopped
volumes:
- ./prometheus:/etc/prometheus
- prom_data:/prometheus
grafana:
image: grafana/grafana
container_name: grafana
ports:
- 3000:3000
restart: unless-stopped
environment:
- GF_SECURITY_ADMIN_USER=admin
- GF_SECURITY_ADMIN_PASSWORD=grafana
volumes:
- ./grafana:/etc/grafana/provisioning/datasources
volumes:
prom_data:
localai_workspace:

View File

@ -0,0 +1,10 @@
apiVersion: 1
datasources:
- name: Prometheus
type: prometheus
url: http://prometheus:9090
isDefault: true
access: proxy
editable: true

View File

@ -0,0 +1,21 @@
global:
scrape_interval: 15s
scrape_timeout: 10s
evaluation_interval: 15s
alerting:
alertmanagers:
- static_configs:
- targets: []
scheme: http
timeout: 10s
api_version: v1
scrape_configs:
- job_name: prometheus
honor_timestamps: true
scrape_interval: 15s
scrape_timeout: 10s
metrics_path: /metrics
scheme: http
static_configs:
- targets:
- localhost:9090

View File

@ -1,6 +1,7 @@
.idea
.github
.vscode
.devcontainer
models
examples/chatbot-ui/models
examples/rwkv/models

3
.env
View File

@ -79,6 +79,9 @@
### Enable to run parallel requests
# LOCALAI_PARALLEL_REQUESTS=true
# Enable to allow p2p mode
# LOCALAI_P2P=true
### Watchdog settings
###
# Enables watchdog to kill backends that are inactive for too much time

13
.github/bump_deps.sh vendored
View File

@ -6,4 +6,17 @@ VAR=$3
LAST_COMMIT=$(curl -s -H "Accept: application/vnd.github.VERSION.sha" "https://api.github.com/repos/$REPO/commits/$BRANCH")
# Read $VAR from Makefile (only first match)
set +e
CURRENT_COMMIT="$(grep -m1 "^$VAR?=" Makefile | cut -d'=' -f2)"
set -e
sed -i Makefile -e "s/$VAR?=.*/$VAR?=$LAST_COMMIT/"
if [ -z "$CURRENT_COMMIT" ]; then
echo "Could not find $VAR in Makefile."
exit 0
fi
echo "Changes: https://github.com/$REPO/compare/${CURRENT_COMMIT}..${LAST_COMMIT}" >> "${VAR}_message.txt"
echo "${LAST_COMMIT}" >> "${VAR}_commit.txt"

View File

@ -67,10 +67,6 @@ updates:
directory: "/backend/python/parler-tts"
schedule:
interval: "weekly"
- package-ecosystem: "pip"
directory: "/backend/python/petals"
schedule:
interval: "weekly"
- package-ecosystem: "pip"
directory: "/backend/python/rerankers"
schedule:

View File

@ -40,17 +40,30 @@ jobs:
steps:
- uses: actions/checkout@v4
- name: Bump dependencies 🔧
id: bump
run: |
bash .github/bump_deps.sh ${{ matrix.repository }} ${{ matrix.branch }} ${{ matrix.variable }}
{
echo 'message<<EOF'
cat "${{ matrix.variable }}_message.txt"
echo EOF
} >> "$GITHUB_OUTPUT"
{
echo 'commit<<EOF'
cat "${{ matrix.variable }}_commit.txt"
echo EOF
} >> "$GITHUB_OUTPUT"
rm -rfv ${{ matrix.variable }}_message.txt
rm -rfv ${{ matrix.variable }}_commit.txt
- name: Create Pull Request
uses: peter-evans/create-pull-request@v6
with:
token: ${{ secrets.UPDATE_BOT_TOKEN }}
push-to-fork: ci-forks/LocalAI
commit-message: ':arrow_up: Update ${{ matrix.repository }}'
title: 'chore: :arrow_up: Update ${{ matrix.repository }}'
title: 'chore: :arrow_up: Update ${{ matrix.repository }} to `${{ steps.bump.outputs.commit }}`'
branch: "update/${{ matrix.variable }}"
body: Bump of ${{ matrix.repository }} version
body: ${{ steps.bump.outputs.message }}
signoff: true

64
.github/workflows/deploy-explorer.yaml vendored Normal file
View File

@ -0,0 +1,64 @@
name: Explorer deployment
on:
push:
branches:
- master
tags:
- 'v*'
concurrency:
group: ci-deploy-${{ github.head_ref || github.ref }}-${{ github.repository }}
jobs:
build-linux:
runs-on: ubuntu-latest
steps:
- name: Clone
uses: actions/checkout@v4
with:
submodules: true
- uses: actions/setup-go@v5
with:
go-version: '1.21.x'
cache: false
- name: Dependencies
run: |
sudo apt-get update
sudo apt-get install -y wget curl build-essential ffmpeg protobuf-compiler ccache upx-ucl gawk cmake libgmock-dev
go install google.golang.org/grpc/cmd/protoc-gen-go-grpc@1958fcbe2ca8bd93af633f11e97d44e567e945af
go install google.golang.org/protobuf/cmd/protoc-gen-go@v1.34.2
make protogen-go
- name: Build api
run: |
CGO_ENABLED=0 make build-api
- name: rm
uses: appleboy/ssh-action@v1.0.3
with:
host: ${{ secrets.EXPLORER_SSH_HOST }}
username: ${{ secrets.EXPLORER_SSH_USERNAME }}
key: ${{ secrets.EXPLORER_SSH_KEY }}
port: ${{ secrets.EXPLORER_SSH_PORT }}
script: |
sudo rm -rf local-ai/ || true
- name: copy file via ssh
uses: appleboy/scp-action@v0.1.7
with:
host: ${{ secrets.EXPLORER_SSH_HOST }}
username: ${{ secrets.EXPLORER_SSH_USERNAME }}
key: ${{ secrets.EXPLORER_SSH_KEY }}
port: ${{ secrets.EXPLORER_SSH_PORT }}
source: "local-ai"
overwrite: true
rm: true
target: ./local-ai
- name: restarting
uses: appleboy/ssh-action@v1.0.3
with:
host: ${{ secrets.EXPLORER_SSH_HOST }}
username: ${{ secrets.EXPLORER_SSH_USERNAME }}
key: ${{ secrets.EXPLORER_SSH_KEY }}
port: ${{ secrets.EXPLORER_SSH_PORT }}
script: |
sudo cp -rfv local-ai/local-ai /usr/bin/local-ai
sudo systemctl restart local-ai

View File

@ -168,32 +168,6 @@ jobs:
make --jobs=5 --output-sync=target -C backend/python/transformers-musicgen
make --jobs=5 --output-sync=target -C backend/python/transformers-musicgen test
# tests-petals:
# runs-on: ubuntu-latest
# steps:
# - name: Clone
# uses: actions/checkout@v4
# with:
# submodules: true
# - name: Dependencies
# run: |
# sudo apt-get update
# sudo apt-get install build-essential ffmpeg
# # Install UV
# curl -LsSf https://astral.sh/uv/install.sh | sh
# sudo apt-get install -y ca-certificates cmake curl patch python3-pip
# sudo apt-get install -y libopencv-dev
# pip install --user --no-cache-dir grpcio-tools==1.64.1
# - name: Test petals
# run: |
# make --jobs=5 --output-sync=target -C backend/python/petals
# make --jobs=5 --output-sync=target -C backend/python/petals test
# tests-bark:
# runs-on: ubuntu-latest
# steps:

3
.gitignore vendored
View File

@ -54,3 +54,6 @@ docs/static/gallery.html
# backend virtual environments
**/venv
# per-developer customization files for the development container
.devcontainer/customization/*

21
.vscode/launch.json vendored
View File

@ -3,12 +3,12 @@
"configurations": [
{
"name": "Python: Current File",
"type": "python",
"type": "debugpy",
"request": "launch",
"program": "${file}",
"console": "integratedTerminal",
"justMyCode": false,
"cwd": "${workspaceFolder}/examples/langchain-chroma",
"cwd": "${fileDirname}",
"env": {
"OPENAI_API_BASE": "http://localhost:8080/v1",
"OPENAI_API_KEY": "abc"
@ -19,15 +19,16 @@
"type": "go",
"request": "launch",
"mode": "debug",
"program": "${workspaceFolder}/main.go",
"args": [
"api"
],
"program": "${workspaceRoot}",
"args": [],
"env": {
"C_INCLUDE_PATH": "${workspaceFolder}/go-llama:${workspaceFolder}/go-stable-diffusion/:${workspaceFolder}/gpt4all/gpt4all-bindings/golang/:${workspaceFolder}/go-gpt2:${workspaceFolder}/go-rwkv:${workspaceFolder}/whisper.cpp:${workspaceFolder}/go-bert:${workspaceFolder}/bloomz",
"LIBRARY_PATH": "${workspaceFolder}/go-llama:${workspaceFolder}/go-stable-diffusion/:${workspaceFolder}/gpt4all/gpt4all-bindings/golang/:${workspaceFolder}/go-gpt2:${workspaceFolder}/go-rwkv:${workspaceFolder}/whisper.cpp:${workspaceFolder}/go-bert:${workspaceFolder}/bloomz",
"DEBUG": "true"
}
"LOCALAI_LOG_LEVEL": "debug",
"LOCALAI_P2P": "true",
"LOCALAI_FEDERATED": "true"
},
"buildFlags": ["-tags", "stablediffusion p2p tts", "-v"],
"envFile": "${workspaceFolder}/.env",
"cwd": "${workspaceRoot}"
}
]
}

View File

@ -8,12 +8,12 @@ FROM ${BASE_IMAGE} AS requirements-core
USER root
ARG GO_VERSION=1.22.5
ARG GO_VERSION=1.22.6
ARG TARGETARCH
ARG TARGETVARIANT
ENV DEBIAN_FRONTEND=noninteractive
ENV EXTERNAL_GRPC_BACKENDS="coqui:/build/backend/python/coqui/run.sh,huggingface-embeddings:/build/backend/python/sentencetransformers/run.sh,petals:/build/backend/python/petals/run.sh,transformers:/build/backend/python/transformers/run.sh,sentencetransformers:/build/backend/python/sentencetransformers/run.sh,rerankers:/build/backend/python/rerankers/run.sh,autogptq:/build/backend/python/autogptq/run.sh,bark:/build/backend/python/bark/run.sh,diffusers:/build/backend/python/diffusers/run.sh,exllama:/build/backend/python/exllama/run.sh,openvoice:/build/backend/python/openvoice/run.sh,vall-e-x:/build/backend/python/vall-e-x/run.sh,vllm:/build/backend/python/vllm/run.sh,mamba:/build/backend/python/mamba/run.sh,exllama2:/build/backend/python/exllama2/run.sh,transformers-musicgen:/build/backend/python/transformers-musicgen/run.sh,parler-tts:/build/backend/python/parler-tts/run.sh"
ENV EXTERNAL_GRPC_BACKENDS="coqui:/build/backend/python/coqui/run.sh,huggingface-embeddings:/build/backend/python/sentencetransformers/run.sh,transformers:/build/backend/python/transformers/run.sh,sentencetransformers:/build/backend/python/sentencetransformers/run.sh,rerankers:/build/backend/python/rerankers/run.sh,autogptq:/build/backend/python/autogptq/run.sh,bark:/build/backend/python/bark/run.sh,diffusers:/build/backend/python/diffusers/run.sh,exllama:/build/backend/python/exllama/run.sh,openvoice:/build/backend/python/openvoice/run.sh,vall-e-x:/build/backend/python/vall-e-x/run.sh,vllm:/build/backend/python/vllm/run.sh,mamba:/build/backend/python/mamba/run.sh,exllama2:/build/backend/python/exllama2/run.sh,transformers-musicgen:/build/backend/python/transformers-musicgen/run.sh,parler-tts:/build/backend/python/parler-tts/run.sh"
RUN apt-get update && \
@ -30,7 +30,7 @@ RUN apt-get update && \
# Install Go
RUN curl -L -s https://go.dev/dl/go${GO_VERSION}.linux-${TARGETARCH}.tar.gz | tar -C /usr/local -xz
ENV PATH $PATH:/root/go/bin:/usr/local/go/bin
ENV PATH=$PATH:/root/go/bin:/usr/local/go/bin
# Install grpc compilers
RUN go install google.golang.org/protobuf/cmd/protoc-gen-go@v1.34.2 && \
@ -39,15 +39,18 @@ RUN go install google.golang.org/protobuf/cmd/protoc-gen-go@v1.34.2 && \
COPY --chmod=644 custom-ca-certs/* /usr/local/share/ca-certificates/
RUN update-ca-certificates
RUN test -n "$TARGETARCH" \
|| (echo 'warn: missing $TARGETARCH, either set this `ARG` manually, or run using `docker buildkit`')
# Use the variables in subsequent instructions
RUN echo "Target Architecture: $TARGETARCH"
RUN echo "Target Variant: $TARGETVARIANT"
# Cuda
ENV PATH /usr/local/cuda/bin:${PATH}
ENV PATH=/usr/local/cuda/bin:${PATH}
# HipBLAS requirements
ENV PATH /opt/rocm/bin:${PATH}
ENV PATH=/opt/rocm/bin:${PATH}
# OpenBLAS requirements and stable diffusion
RUN apt-get update && \
@ -62,9 +65,6 @@ RUN ln -s /usr/include/opencv4/opencv2 /usr/include/opencv2
WORKDIR /build
RUN test -n "$TARGETARCH" \
|| (echo 'warn: missing $TARGETARCH, either set this `ARG` manually, or run using `docker buildkit`')
###################################
###################################
@ -81,7 +81,7 @@ RUN apt-get update && \
espeak \
python3-pip \
python-is-python3 \
python3-dev \
python3-dev llvm \
python3-venv && \
apt-get clean && \
rm -rf /var/lib/apt/lists/* && \
@ -217,13 +217,14 @@ RUN git clone --recurse-submodules --jobs 4 -b ${GRPC_VERSION} --depth 1 --shall
###################################
###################################
# The builder target compiles LocalAI. This target is not the target that will be uploaded to the registry.
# Adjustments to the build process should likely be made here.
FROM requirements-drivers AS builder
# The builder-base target has the arguments, variables, and copies shared between full builder images and the uncompiled devcontainer
FROM requirements-drivers AS builder-base
ARG GO_TAGS="stablediffusion tts p2p"
ARG GRPC_BACKENDS
ARG MAKEFLAGS
ARG LD_FLAGS="-s -w"
ENV GRPC_BACKENDS=${GRPC_BACKENDS}
ENV GO_TAGS=${GO_TAGS}
@ -231,14 +232,12 @@ ENV MAKEFLAGS=${MAKEFLAGS}
ENV NVIDIA_DRIVER_CAPABILITIES=compute,utility
ENV NVIDIA_REQUIRE_CUDA="cuda>=${CUDA_MAJOR_VERSION}.0"
ENV NVIDIA_VISIBLE_DEVICES=all
ENV LD_FLAGS=${LD_FLAGS}
RUN echo "GO_TAGS: $GO_TAGS" && echo "TARGETARCH: $TARGETARCH"
WORKDIR /build
COPY . .
COPY .git .
RUN echo "GO_TAGS: $GO_TAGS"
RUN make prepare
# We need protoc installed, and the version in 22.04 is too old. We will create one as part installing the GRPC build below
# but that will also being in a newer version of absl which stablediffusion cannot compile with. This version of protoc is only
@ -256,9 +255,30 @@ RUN <<EOT bash
fi
EOT
###################################
###################################
# This first portion of builder holds the layers specifically used to build backend-assets/grpc/stablediffusion
# In most cases, builder is the image you should be using - however, this can save build time if one just needs to copy backend-assets/grpc/stablediffusion and nothing else.
FROM builder-base AS builder-sd
COPY . .
COPY .git .
RUN make prepare
# stablediffusion does not tolerate a newer version of abseil, build it first
RUN GRPC_BACKENDS=backend-assets/grpc/stablediffusion make build
###################################
###################################
# The builder target compiles LocalAI. This target is not the target that will be uploaded to the registry.
# Adjustments to the build process should likely be made here.
FROM builder-sd AS builder
# Install the pre-built GRPC
COPY --from=grpc /opt/grpc /usr/local
@ -276,6 +296,41 @@ RUN if [ ! -d "/build/sources/go-piper/piper-phonemize/pi/lib/" ]; then \
###################################
###################################
# The devcontainer target is not used on CI. It is a target for developers to use locally -
# rather than copying files it mounts them locally and leaves building to the developer
FROM builder-base AS devcontainer
ARG FFMPEG
COPY --from=grpc /opt/grpc /usr/local
COPY --from=builder-sd /build/backend-assets/grpc/stablediffusion /build/backend-assets/grpc/stablediffusion
COPY .devcontainer-scripts /.devcontainer-scripts
# Add FFmpeg
RUN if [ "${FFMPEG}" = "true" ]; then \
apt-get update && \
apt-get install -y --no-install-recommends \
ffmpeg && \
apt-get clean && \
rm -rf /var/lib/apt/lists/* \
; fi
RUN apt-get update && \
apt-get install -y --no-install-recommends \
ssh less && \
apt-get clean && \
rm -rf /var/lib/apt/lists/*
RUN go install github.com/go-delve/delve/cmd/dlv@latest
RUN go install github.com/mikefarah/yq/v4@latest
###################################
###################################
# This is the final target. The result of this target will be the image uploaded to the registry.
# If you cannot find a more suitable place for an addition, this layer is a suitable place for it.
FROM requirements-drivers
@ -326,7 +381,7 @@ COPY --from=builder /build/local-ai ./
COPY --from=builder /build/sources/go-piper/piper-phonemize/pi/lib/* /usr/lib/
# do not let stablediffusion rebuild (requires an older version of absl)
COPY --from=builder /build/backend-assets/grpc/stablediffusion ./backend-assets/grpc/stablediffusion
COPY --from=builder-sd /build/backend-assets/grpc/stablediffusion ./backend-assets/grpc/stablediffusion
# Change the shell to bash so we can use [[ tests below
SHELL ["/bin/bash", "-c"]
@ -356,9 +411,6 @@ RUN if [[ ( "${EXTRA_BACKENDS}" =~ "vall-e-x" || -z "${EXTRA_BACKENDS}" ) && "$I
if [[ ( "${EXTRA_BACKENDS}" =~ "openvoice" || -z "${EXTRA_BACKENDS}" ) && "$IMAGE_TYPE" == "extras" ]]; then \
make -C backend/python/openvoice \
; fi && \
if [[ ( "${EXTRA_BACKENDS}" =~ "petals" || -z "${EXTRA_BACKENDS}" ) && "$IMAGE_TYPE" == "extras" ]]; then \
make -C backend/python/petals \
; fi && \
if [[ ( "${EXTRA_BACKENDS}" =~ "sentencetransformers" || -z "${EXTRA_BACKENDS}" ) && "$IMAGE_TYPE" == "extras" ]]; then \
make -C backend/python/sentencetransformers \
; fi && \

View File

@ -8,11 +8,7 @@ DETECT_LIBS?=true
# llama.cpp versions
GOLLAMA_REPO?=https://github.com/go-skynet/go-llama.cpp
GOLLAMA_VERSION?=2b57a8ae43e4699d3dc5d1496a1ccd42922993be
CPPLLAMA_VERSION?=01245f5b1629075543bc4478418c7d72a0b4b3c7
# gpt4all version
GPT4ALL_REPO?=https://github.com/nomic-ai/gpt4all
GPT4ALL_VERSION?=27a8b020c36b0df8f8b82a252d261cda47cf44b8
CPPLLAMA_VERSION?=2f3c1466ff46a2413b0e363a5005c46538186ee6
# go-rwkv version
RWKV_REPO?=https://github.com/donomii/go-rwkv.cpp
@ -20,7 +16,7 @@ RWKV_VERSION?=661e7ae26d442f5cfebd2a0881b44e8c55949ec6
# whisper.cpp version
WHISPER_REPO?=https://github.com/ggerganov/whisper.cpp
WHISPER_CPP_VERSION?=f68298ce06ca3edd6e6f3f21c3d0bb5f073942c3
WHISPER_CPP_VERSION?=d65786ea540a5aef21f67cacfa6f134097727780
# bert.cpp version
BERT_REPO?=https://github.com/go-skynet/go-bert.cpp
@ -190,7 +186,6 @@ ALL_GRPC_BACKENDS+=backend-assets/grpc/llama-cpp-fallback
ALL_GRPC_BACKENDS+=backend-assets/grpc/llama-ggml
ALL_GRPC_BACKENDS+=backend-assets/grpc/llama-cpp-grpc
ALL_GRPC_BACKENDS+=backend-assets/util/llama-cpp-rpc-server
ALL_GRPC_BACKENDS+=backend-assets/grpc/gpt4all
ALL_GRPC_BACKENDS+=backend-assets/grpc/rwkv
ALL_GRPC_BACKENDS+=backend-assets/grpc/whisper
ALL_GRPC_BACKENDS+=backend-assets/grpc/local-store
@ -253,18 +248,6 @@ sources/go-piper:
sources/go-piper/libpiper_binding.a: sources/go-piper
$(MAKE) -C sources/go-piper libpiper_binding.a example/main piper.o
## GPT4ALL
sources/gpt4all:
mkdir -p sources/gpt4all
cd sources/gpt4all && \
git init && \
git remote add origin $(GPT4ALL_REPO) && \
git fetch origin && \
git checkout $(GPT4ALL_VERSION) && \
git submodule update --init --recursive --depth 1 --single-branch
sources/gpt4all/gpt4all-bindings/golang/libgpt4all.a: sources/gpt4all
$(MAKE) -C sources/gpt4all/gpt4all-bindings/golang/ libgpt4all.a
## RWKV
sources/go-rwkv.cpp:
@ -318,7 +301,7 @@ sources/whisper.cpp:
sources/whisper.cpp/libwhisper.a: sources/whisper.cpp
cd sources/whisper.cpp && $(MAKE) libwhisper.a libggml.a
get-sources: sources/go-llama.cpp sources/gpt4all sources/go-piper sources/go-rwkv.cpp sources/whisper.cpp sources/go-bert.cpp sources/go-stable-diffusion sources/go-tiny-dream backend/cpp/llama/llama.cpp
get-sources: sources/go-llama.cpp sources/go-piper sources/go-rwkv.cpp sources/whisper.cpp sources/go-bert.cpp sources/go-stable-diffusion sources/go-tiny-dream backend/cpp/llama/llama.cpp
replace:
$(GOCMD) mod edit -replace github.com/donomii/go-rwkv.cpp=$(CURDIR)/sources/go-rwkv.cpp
@ -328,7 +311,6 @@ replace:
$(GOCMD) mod edit -replace github.com/M0Rf30/go-tiny-dream=$(CURDIR)/sources/go-tiny-dream
$(GOCMD) mod edit -replace github.com/mudler/go-piper=$(CURDIR)/sources/go-piper
$(GOCMD) mod edit -replace github.com/mudler/go-stable-diffusion=$(CURDIR)/sources/go-stable-diffusion
$(GOCMD) mod edit -replace github.com/nomic-ai/gpt4all/gpt4all-bindings/golang=$(CURDIR)/sources/gpt4all/gpt4all-bindings/golang
$(GOCMD) mod edit -replace github.com/go-skynet/go-llama.cpp=$(CURDIR)/sources/go-llama.cpp
dropreplace:
@ -339,7 +321,6 @@ dropreplace:
$(GOCMD) mod edit -dropreplace github.com/M0Rf30/go-tiny-dream
$(GOCMD) mod edit -dropreplace github.com/mudler/go-piper
$(GOCMD) mod edit -dropreplace github.com/mudler/go-stable-diffusion
$(GOCMD) mod edit -dropreplace github.com/nomic-ai/gpt4all/gpt4all-bindings/golang
$(GOCMD) mod edit -dropreplace github.com/go-skynet/go-llama.cpp
prepare-sources: get-sources replace
@ -349,7 +330,6 @@ prepare-sources: get-sources replace
rebuild: ## Rebuilds the project
$(GOCMD) clean -cache
$(MAKE) -C sources/go-llama.cpp clean
$(MAKE) -C sources/gpt4all/gpt4all-bindings/golang/ clean
$(MAKE) -C sources/go-rwkv.cpp clean
$(MAKE) -C sources/whisper.cpp clean
$(MAKE) -C sources/go-stable-diffusion clean
@ -396,7 +376,7 @@ build-minimal:
BUILD_GRPC_FOR_BACKEND_LLAMA=true GRPC_BACKENDS="backend-assets/grpc/llama-cpp-avx2" GO_TAGS=p2p $(MAKE) build
build-api:
BUILD_GRPC_FOR_BACKEND_LLAMA=true BUILD_API_ONLY=true GO_TAGS=none $(MAKE) build
BUILD_GRPC_FOR_BACKEND_LLAMA=true BUILD_API_ONLY=true GO_TAGS=p2p $(MAKE) build
backend-assets/lib:
mkdir -p backend-assets/lib
@ -407,7 +387,7 @@ ifeq ($(DETECT_LIBS),true)
scripts/prepare-libs.sh backend-assets/grpc/llama-cpp-avx2
endif
ifeq ($(OS),Darwin)
$(info ${GREEN}I Skip CUDA/hipblas build on MacOS${RESET})
BUILD_TYPE=none $(MAKE) backend-assets/grpc/llama-cpp-fallback
else
$(MAKE) backend-assets/grpc/llama-cpp-cuda
$(MAKE) backend-assets/grpc/llama-cpp-hipblas
@ -469,8 +449,7 @@ test: prepare test-models/testmodel.ggml grpcs
export GO_TAGS="tts stablediffusion debug"
$(MAKE) prepare-test
HUGGINGFACE_GRPC=$(abspath ./)/backend/python/sentencetransformers/run.sh TEST_DIR=$(abspath ./)/test-dir/ FIXTURES=$(abspath ./)/tests/fixtures CONFIG_FILE=$(abspath ./)/test-models/config.yaml MODELS_PATH=$(abspath ./)/test-models \
$(GOCMD) run github.com/onsi/ginkgo/v2/ginkgo --label-filter="!gpt4all && !llama && !llama-gguf" --flake-attempts $(TEST_FLAKES) --fail-fast -v -r $(TEST_PATHS)
$(MAKE) test-gpt4all
$(GOCMD) run github.com/onsi/ginkgo/v2/ginkgo --label-filter="!llama && !llama-gguf" --flake-attempts $(TEST_FLAKES) --fail-fast -v -r $(TEST_PATHS)
$(MAKE) test-llama
$(MAKE) test-llama-gguf
$(MAKE) test-tts
@ -500,10 +479,6 @@ teardown-e2e:
rm -rf $(TEST_DIR) || true
docker stop $$(docker ps -q --filter ancestor=localai-tests)
test-gpt4all: prepare-test
TEST_DIR=$(abspath ./)/test-dir/ FIXTURES=$(abspath ./)/tests/fixtures CONFIG_FILE=$(abspath ./)/test-models/config.yaml MODELS_PATH=$(abspath ./)/test-models \
$(GOCMD) run github.com/onsi/ginkgo/v2/ginkgo --label-filter="gpt4all" --flake-attempts 5 -v -r $(TEST_PATHS)
test-llama: prepare-test
TEST_DIR=$(abspath ./)/test-dir/ FIXTURES=$(abspath ./)/tests/fixtures CONFIG_FILE=$(abspath ./)/test-models/config.yaml MODELS_PATH=$(abspath ./)/test-models \
$(GOCMD) run github.com/onsi/ginkgo/v2/ginkgo --label-filter="llama" --flake-attempts 5 -v -r $(TEST_PATHS)
@ -559,10 +534,10 @@ protogen-go-clean:
$(RM) bin/*
.PHONY: protogen-python
protogen-python: autogptq-protogen bark-protogen coqui-protogen diffusers-protogen exllama-protogen exllama2-protogen mamba-protogen petals-protogen rerankers-protogen sentencetransformers-protogen transformers-protogen parler-tts-protogen transformers-musicgen-protogen vall-e-x-protogen vllm-protogen openvoice-protogen
protogen-python: autogptq-protogen bark-protogen coqui-protogen diffusers-protogen exllama-protogen exllama2-protogen mamba-protogen rerankers-protogen sentencetransformers-protogen transformers-protogen parler-tts-protogen transformers-musicgen-protogen vall-e-x-protogen vllm-protogen openvoice-protogen
.PHONY: protogen-python-clean
protogen-python-clean: autogptq-protogen-clean bark-protogen-clean coqui-protogen-clean diffusers-protogen-clean exllama-protogen-clean exllama2-protogen-clean mamba-protogen-clean petals-protogen-clean sentencetransformers-protogen-clean rerankers-protogen-clean transformers-protogen-clean transformers-musicgen-protogen-clean parler-tts-protogen-clean vall-e-x-protogen-clean vllm-protogen-clean openvoice-protogen-clean
protogen-python-clean: autogptq-protogen-clean bark-protogen-clean coqui-protogen-clean diffusers-protogen-clean exllama-protogen-clean exllama2-protogen-clean mamba-protogen-clean sentencetransformers-protogen-clean rerankers-protogen-clean transformers-protogen-clean transformers-musicgen-protogen-clean parler-tts-protogen-clean vall-e-x-protogen-clean vllm-protogen-clean openvoice-protogen-clean
.PHONY: autogptq-protogen
autogptq-protogen:
@ -620,14 +595,6 @@ mamba-protogen:
mamba-protogen-clean:
$(MAKE) -C backend/python/mamba protogen-clean
.PHONY: petals-protogen
petals-protogen:
$(MAKE) -C backend/python/petals protogen
.PHONY: petals-protogen-clean
petals-protogen-clean:
$(MAKE) -C backend/python/petals protogen-clean
.PHONY: rerankers-protogen
rerankers-protogen:
$(MAKE) -C backend/python/rerankers protogen
@ -709,7 +676,6 @@ prepare-extra-conda-environments: protogen-python
$(MAKE) -C backend/python/vall-e-x
$(MAKE) -C backend/python/openvoice
$(MAKE) -C backend/python/exllama
$(MAKE) -C backend/python/petals
$(MAKE) -C backend/python/exllama2
prepare-test-extra: protogen-python
@ -730,12 +696,6 @@ backend-assets/espeak-ng-data: sources/go-piper sources/go-piper/libpiper_bindin
mkdir -p backend-assets/espeak-ng-data
@cp -rf sources/go-piper/piper-phonemize/pi/share/espeak-ng-data/. backend-assets/espeak-ng-data
backend-assets/gpt4all: sources/gpt4all sources/gpt4all/gpt4all-bindings/golang/libgpt4all.a
mkdir -p backend-assets/gpt4all
@cp sources/gpt4all/gpt4all-bindings/golang/buildllm/*.so backend-assets/gpt4all/ || true
@cp sources/gpt4all/gpt4all-bindings/golang/buildllm/*.dylib backend-assets/gpt4all/ || true
@cp sources/gpt4all/gpt4all-bindings/golang/buildllm/*.dll backend-assets/gpt4all/ || true
backend-assets/grpc: protogen-go replace
mkdir -p backend-assets/grpc
@ -746,13 +706,6 @@ ifneq ($(UPX),)
$(UPX) backend-assets/grpc/bert-embeddings
endif
backend-assets/grpc/gpt4all: sources/gpt4all sources/gpt4all/gpt4all-bindings/golang/libgpt4all.a backend-assets/gpt4all backend-assets/grpc
CGO_LDFLAGS="$(CGO_LDFLAGS)" C_INCLUDE_PATH=$(CURDIR)/sources/gpt4all/gpt4all-bindings/golang/ LIBRARY_PATH=$(CURDIR)/sources/gpt4all/gpt4all-bindings/golang/ \
$(GOCMD) build -ldflags "$(LD_FLAGS)" -tags "$(GO_TAGS)" -o backend-assets/grpc/gpt4all ./backend/go/llm/gpt4all/
ifneq ($(UPX),)
$(UPX) backend-assets/grpc/gpt4all
endif
backend-assets/grpc/huggingface: backend-assets/grpc
$(GOCMD) build -ldflags "$(LD_FLAGS)" -tags "$(GO_TAGS)" -o backend-assets/grpc/huggingface ./backend/go/llm/langchain/
ifneq ($(UPX),)
@ -783,9 +736,6 @@ else
echo "BUILD_GRPC_FOR_BACKEND_LLAMA is not defined."
LLAMA_VERSION=$(CPPLLAMA_VERSION) $(MAKE) -C backend/cpp/${VARIANT} grpc-server
endif
ifneq ($(UPX),)
$(UPX) backend/cpp/${VARIANT}/grpc-server
endif
# This target is for manually building a variant with-auto detected flags
backend-assets/grpc/llama-cpp: backend-assets/grpc backend/cpp/llama/llama.cpp
@ -858,9 +808,6 @@ backend-assets/grpc/llama-cpp-grpc: backend-assets/grpc backend/cpp/llama/llama.
backend-assets/util/llama-cpp-rpc-server: backend-assets/grpc/llama-cpp-grpc
mkdir -p backend-assets/util/
cp -rf backend/cpp/llama-grpc/llama.cpp/build/bin/rpc-server backend-assets/util/llama-cpp-rpc-server
ifneq ($(UPX),)
$(UPX) backend-assets/util/llama-cpp-rpc-server
endif
backend-assets/grpc/llama-ggml: sources/go-llama.cpp sources/go-llama.cpp/libbinding.a backend-assets/grpc
CGO_LDFLAGS="$(CGO_LDFLAGS)" C_INCLUDE_PATH=$(CURDIR)/sources/go-llama.cpp LIBRARY_PATH=$(CURDIR)/sources/go-llama.cpp \

View File

@ -84,6 +84,7 @@ docker run -ti --name local-ai -p 8080:8080 localai/localai:latest-aio-cpu
Hot topics (looking for contributors):
- 🔥🔥 Distributed, P2P Global community pools: https://github.com/mudler/LocalAI/issues/3113
- WebUI improvements: https://github.com/mudler/LocalAI/issues/2156
- Backends v2: https://github.com/mudler/LocalAI/issues/1126
- Improving UX v2: https://github.com/mudler/LocalAI/issues/1373
@ -150,6 +151,7 @@ Other:
## :book: 🎥 [Media, Blogs, Social](https://localai.io/basics/news/#media-blogs-social)
- [Run Visual studio code with LocalAI (SUSE)](https://www.suse.com/c/running-ai-locally/)
- 🆕 [Run LocalAI on Jetson Nano Devkit](https://mudler.pm/posts/local-ai-jetson-nano-devkit/)
- [Run LocalAI on AWS EKS with Pulumi](https://www.pulumi.com/blog/low-code-llm-apps-with-local-ai-flowise-and-pulumi/)
- [Run LocalAI on AWS](https://staleks.hashnode.dev/installing-localai-on-aws-ec2-instance)

View File

@ -458,7 +458,9 @@ struct llama_server_context
}
}
std::tie(model, ctx) = llama_init_from_gpt_params(params);
llama_init_result llama_init = llama_init_from_gpt_params(params);
model = llama_init.model;
ctx = llama_init.context;
if (model == nullptr)
{
LOG_ERROR("unable to load model", {{"model", params.model}});
@ -478,7 +480,7 @@ struct llama_server_context
n_ctx = llama_n_ctx(ctx);
add_bos_token = llama_should_add_bos_token(model);
add_bos_token = llama_add_bos_token(model);
return true;
}
@ -2258,7 +2260,7 @@ static void params_parse(const backend::ModelOptions* request,
}
// get the directory of modelfile
std::string model_dir = params.model.substr(0, params.model.find_last_of("/\\"));
params.lora_adapter.push_back(std::make_tuple(model_dir + "/"+request->loraadapter(), scale_factor));
params.lora_adapters.push_back({ model_dir + "/"+request->loraadapter(), scale_factor });
}
params.use_mlock = request->mlock();
params.use_mmap = request->mmap();

View File

@ -1,62 +0,0 @@
package main
// This is a wrapper to statisfy the GRPC service interface
// It is meant to be used by the main executable that is the server for the specific backend type (falcon, gpt3, etc)
import (
"fmt"
"github.com/mudler/LocalAI/pkg/grpc/base"
pb "github.com/mudler/LocalAI/pkg/grpc/proto"
gpt4all "github.com/nomic-ai/gpt4all/gpt4all-bindings/golang"
)
type LLM struct {
base.SingleThread
gpt4all *gpt4all.Model
}
func (llm *LLM) Load(opts *pb.ModelOptions) error {
model, err := gpt4all.New(opts.ModelFile,
gpt4all.SetThreads(int(opts.Threads)),
gpt4all.SetLibrarySearchPath(opts.LibrarySearchPath))
llm.gpt4all = model
return err
}
func buildPredictOptions(opts *pb.PredictOptions) []gpt4all.PredictOption {
predictOptions := []gpt4all.PredictOption{
gpt4all.SetTemperature(float64(opts.Temperature)),
gpt4all.SetTopP(float64(opts.TopP)),
gpt4all.SetTopK(int(opts.TopK)),
gpt4all.SetTokens(int(opts.Tokens)),
}
if opts.Batch != 0 {
predictOptions = append(predictOptions, gpt4all.SetBatch(int(opts.Batch)))
}
return predictOptions
}
func (llm *LLM) Predict(opts *pb.PredictOptions) (string, error) {
return llm.gpt4all.Predict(opts.Prompt, buildPredictOptions(opts)...)
}
func (llm *LLM) PredictStream(opts *pb.PredictOptions, results chan string) error {
predictOptions := buildPredictOptions(opts)
go func() {
llm.gpt4all.SetTokenCallback(func(token string) bool {
results <- token
return true
})
_, err := llm.gpt4all.Predict(opts.Prompt, predictOptions...)
if err != nil {
fmt.Println("err: ", err)
}
llm.gpt4all.SetTokenCallback(nil)
close(results)
}()
return nil
}

View File

@ -1,21 +0,0 @@
package main
// Note: this is started internally by LocalAI and a server is allocated for each model
import (
"flag"
grpc "github.com/mudler/LocalAI/pkg/grpc"
)
var (
addr = flag.String("addr", "localhost:50051", "the address to connect to")
)
func main() {
flag.Parse()
if err := grpc.StartServer(*addr, &LLM{}); err != nil {
panic(err)
}
}

View File

@ -0,0 +1,2 @@
--extra-index-url https://download.pytorch.org/whl/cu118
torch

View File

@ -0,0 +1 @@
torch

View File

@ -2,4 +2,4 @@
intel-extension-for-pytorch
torch
optimum[openvino]
setuptools==70.3.0 # https://github.com/mudler/LocalAI/issues/2406
setuptools==72.1.0 # https://github.com/mudler/LocalAI/issues/2406

View File

@ -1,7 +1,6 @@
accelerate
auto-gptq==0.7.1
grpcio==1.65.1
grpcio==1.65.4
protobuf
torch
certifi
transformers

View File

@ -0,0 +1,4 @@
transformers
accelerate
torch
torchaudio

View File

@ -0,0 +1,5 @@
--extra-index-url https://download.pytorch.org/whl/cu118
torch
torchaudio
transformers
accelerate

View File

@ -0,0 +1,4 @@
torch
torchaudio
transformers
accelerate

View File

@ -1,3 +1,5 @@
--extra-index-url https://download.pytorch.org/whl/rocm6.0
torch
torchaudio
torchaudio
transformers
accelerate

View File

@ -3,4 +3,6 @@ intel-extension-for-pytorch
torch
torchaudio
optimum[openvino]
setuptools==70.3.0 # https://github.com/mudler/LocalAI/issues/2406
setuptools==70.3.0 # https://github.com/mudler/LocalAI/issues/2406
transformers
accelerate

View File

@ -1,6 +1,4 @@
accelerate
bark==0.1.5
grpcio==1.65.1
grpcio==1.65.5
protobuf
certifi
transformers
certifi

View File

@ -18,10 +18,23 @@
# source $(dirname $0)/../common/libbackend.sh
#
function init() {
# Name of the backend (directory name)
BACKEND_NAME=${PWD##*/}
# Path where all backends files are
MY_DIR=$(realpath `dirname $0`)
# Build type
BUILD_PROFILE=$(getBuildProfile)
# Environment directory
EDIR=${MY_DIR}
# Allow to specify a custom env dir for shared environments
if [ "x${ENV_DIR}" != "x" ]; then
EDIR=${ENV_DIR}
fi
# If a backend has defined a list of valid build profiles...
if [ ! -z "${LIMIT_TARGETS}" ]; then
isValidTarget=$(checkTargets ${LIMIT_TARGETS})
@ -74,13 +87,14 @@ function getBuildProfile() {
# This function is idempotent, so you can call it as many times as you want and it will
# always result in an activated virtual environment
function ensureVenv() {
if [ ! -d "${MY_DIR}/venv" ]; then
uv venv ${MY_DIR}/venv
if [ ! -d "${EDIR}/venv" ]; then
uv venv ${EDIR}/venv
echo "virtualenv created"
fi
if [ "x${VIRTUAL_ENV}" != "x${MY_DIR}/venv" ]; then
source ${MY_DIR}/venv/bin/activate
# Source if we are not already in a Virtual env
if [ "x${VIRTUAL_ENV}" != "x${EDIR}/venv" ]; then
source ${EDIR}/venv/bin/activate
echo "virtualenv activated"
fi
@ -113,13 +127,24 @@ function installRequirements() {
# These are the requirements files we will attempt to install, in order
declare -a requirementFiles=(
"${MY_DIR}/requirements-install.txt"
"${MY_DIR}/requirements.txt"
"${MY_DIR}/requirements-${BUILD_TYPE}.txt"
"${EDIR}/requirements-install.txt"
"${EDIR}/requirements.txt"
"${EDIR}/requirements-${BUILD_TYPE}.txt"
)
if [ "x${BUILD_TYPE}" != "x${BUILD_PROFILE}" ]; then
requirementFiles+=("${MY_DIR}/requirements-${BUILD_PROFILE}.txt")
requirementFiles+=("${EDIR}/requirements-${BUILD_PROFILE}.txt")
fi
# if BUILD_TYPE is empty, we are a CPU build, so we should try to install the CPU requirements
if [ "x${BUILD_TYPE}" == "x" ]; then
requirementFiles+=("${EDIR}/requirements-cpu.txt")
fi
requirementFiles+=("${EDIR}/requirements-after.txt")
if [ "x${BUILD_TYPE}" != "x${BUILD_PROFILE}" ]; then
requirementFiles+=("${EDIR}/requirements-${BUILD_PROFILE}-after.txt")
fi
for reqFile in ${requirementFiles[@]}; do

View File

@ -1,2 +1,2 @@
grpcio==1.65.1
grpcio==1.65.5
protobuf

View File

@ -0,0 +1,3 @@
transformers
accelerate
torch

View File

@ -0,0 +1,5 @@
--extra-index-url https://download.pytorch.org/whl/cu118
torch
torchaudio
transformers
accelerate

View File

@ -0,0 +1,4 @@
torch
torchaudio
transformers
accelerate

View File

@ -1,3 +1,5 @@
--extra-index-url https://download.pytorch.org/whl/rocm6.0
torch
torchaudio
torchaudio
transformers
accelerate

View File

@ -3,4 +3,6 @@ intel-extension-for-pytorch
torch
torchaudio
optimum[openvino]
setuptools==70.3.0 # https://github.com/mudler/LocalAI/issues/2406
setuptools==72.1.0 # https://github.com/mudler/LocalAI/issues/2406
transformers
accelerate

View File

@ -1,6 +1,4 @@
accelerate
TTS==0.22.0
grpcio==1.65.1
grpcio==1.65.5
protobuf
certifi
transformers
certifi

View File

@ -18,13 +18,13 @@ import backend_pb2_grpc
import grpc
from diffusers import StableDiffusion3Pipeline, StableDiffusionXLPipeline, StableDiffusionDepth2ImgPipeline, DPMSolverMultistepScheduler, StableDiffusionPipeline, DiffusionPipeline, \
EulerAncestralDiscreteScheduler
EulerAncestralDiscreteScheduler, FluxPipeline, FluxTransformer2DModel
from diffusers import StableDiffusionImg2ImgPipeline, AutoPipelineForText2Image, ControlNetModel, StableVideoDiffusionPipeline
from diffusers.pipelines.stable_diffusion import safety_checker
from diffusers.utils import load_image, export_to_video
from compel import Compel, ReturnedEmbeddingsType
from transformers import CLIPTextModel
from optimum.quanto import freeze, qfloat8, quantize
from transformers import CLIPTextModel, T5EncoderModel
from safetensors.torch import load_file
_ONE_DAY_IN_SECONDS = 60 * 60 * 24
@ -163,6 +163,8 @@ class BackendServicer(backend_pb2_grpc.BackendServicer):
modelFile = request.Model
self.cfg_scale = 7
self.PipelineType = request.PipelineType
if request.CFGScale != 0:
self.cfg_scale = request.CFGScale
@ -244,6 +246,30 @@ class BackendServicer(backend_pb2_grpc.BackendServicer):
torch_dtype=torchType,
use_safetensors=True,
variant=variant)
elif request.PipelineType == "FluxPipeline":
self.pipe = FluxPipeline.from_pretrained(
request.Model,
torch_dtype=torch.bfloat16)
if request.LowVRAM:
self.pipe.enable_model_cpu_offload()
elif request.PipelineType == "FluxTransformer2DModel":
dtype = torch.bfloat16
# specify from environment or default to "ChuckMcSneed/FLUX.1-dev"
bfl_repo = os.environ.get("BFL_REPO", "ChuckMcSneed/FLUX.1-dev")
transformer = FluxTransformer2DModel.from_single_file(modelFile, torch_dtype=dtype)
quantize(transformer, weights=qfloat8)
freeze(transformer)
text_encoder_2 = T5EncoderModel.from_pretrained(bfl_repo, subfolder="text_encoder_2", torch_dtype=dtype)
quantize(text_encoder_2, weights=qfloat8)
freeze(text_encoder_2)
self.pipe = FluxPipeline.from_pretrained(bfl_repo, transformer=None, text_encoder_2=None, torch_dtype=dtype)
self.pipe.transformer = transformer
self.pipe.text_encoder_2 = text_encoder_2
if request.LowVRAM:
self.pipe.enable_model_cpu_offload()
if CLIPSKIP and request.CLIPSkip != 0:
self.clip_skip = request.CLIPSkip
@ -399,6 +425,13 @@ class BackendServicer(backend_pb2_grpc.BackendServicer):
request.seed
)
if self.PipelineType == "FluxPipeline":
kwargs["max_sequence_length"] = 256
if self.PipelineType == "FluxTransformer2DModel":
kwargs["output_type"] = "pil"
kwargs["generator"] = torch.Generator("cpu").manual_seed(0)
if self.img2vid:
# Load the conditioning image
image = load_image(request.src)

View File

@ -0,0 +1,9 @@
diffusers
opencv-python
transformers
accelerate
compel
peft
sentencepiece
torch
optimum-quanto

View File

@ -0,0 +1,10 @@
--extra-index-url https://download.pytorch.org/whl/cu118
torch
diffusers
opencv-python
transformers
accelerate
compel
peft
sentencepiece
optimum-quanto

View File

@ -0,0 +1,9 @@
torch
diffusers
opencv-python
transformers
accelerate
compel
peft
sentencepiece
optimum-quanto

View File

@ -1,3 +1,11 @@
--extra-index-url https://download.pytorch.org/whl/rocm6.0
torch
torchvision
torch==2.3.1+rocm6.0
torchvision==0.18.1+rocm6.0
diffusers
opencv-python
transformers
accelerate
compel
peft
sentencepiece
optimum-quanto

View File

@ -3,4 +3,12 @@ intel-extension-for-pytorch
torch
torchvision
optimum[openvino]
setuptools==70.3.0 # https://github.com/mudler/LocalAI/issues/2406
setuptools==70.3.0 # https://github.com/mudler/LocalAI/issues/2406
diffusers
opencv-python
transformers
accelerate
compel
peft
sentencepiece
optimum-quanto

View File

@ -1,13 +1,5 @@
setuptools
accelerate
compel
peft
diffusers
grpcio==1.65.1
opencv-python
grpcio==1.65.4
pillow
protobuf
sentencepiece
torch
transformers
certifi

View File

@ -0,0 +1,3 @@
transformers
accelerate
torch

View File

@ -0,0 +1,4 @@
--extra-index-url https://download.pytorch.org/whl/cu118
torch
transformers
accelerate

View File

@ -0,0 +1,3 @@
torch
transformers
accelerate

View File

@ -1,6 +1,4 @@
grpcio==1.65.0
grpcio==1.65.5
protobuf
torch
transformers
certifi
setuptools

View File

@ -0,0 +1,3 @@
transformers
accelerate
torch

View File

@ -0,0 +1,4 @@
--extra-index-url https://download.pytorch.org/whl/cu118
torch
transformers
accelerate

View File

@ -0,0 +1,3 @@
torch
transformers
accelerate

View File

@ -1,7 +1,5 @@
accelerate
grpcio==1.65.1
grpcio==1.65.4
protobuf
certifi
torch
wheel
setuptools

View File

@ -0,0 +1,2 @@
causal-conv1d==1.4.0
mamba-ssm==2.2.2

View File

@ -0,0 +1,2 @@
torch
transformers

View File

@ -0,0 +1,3 @@
--extra-index-url https://download.pytorch.org/whl/cu118
torch
transformers

View File

@ -0,0 +1,2 @@
torch
transformers

View File

@ -3,5 +3,4 @@
# https://github.com/Dao-AILab/causal-conv1d/issues/24
packaging
setuptools
wheel
torch==2.3.1
wheel

View File

@ -1,6 +1,3 @@
causal-conv1d==1.4.0
mamba-ssm==2.2.2
grpcio==1.65.1
grpcio==1.65.5
protobuf
certifi
transformers
certifi

View File

@ -0,0 +1 @@
torch

View File

@ -0,0 +1,2 @@
--extra-index-url https://download.pytorch.org/whl/cu118
torch

View File

@ -0,0 +1 @@
torch

View File

@ -2,7 +2,7 @@
intel-extension-for-pytorch
torch
optimum[openvino]
grpcio==1.65.1
grpcio==1.65.5
protobuf
librosa==0.9.1
faster-whisper==1.0.3

View File

@ -1,4 +1,4 @@
grpcio==1.65.1
grpcio==1.65.5
protobuf
librosa
faster-whisper

View File

@ -5,7 +5,7 @@ source $(dirname $0)/../common/libbackend.sh
# Download checkpoints if not present
if [ ! -d "checkpoints_v2" ]; then
wget https://myshell-public-repo-hosting.s3.amazonaws.com/openvoice/checkpoints_v2_0417.zip -O checkpoints_v2.zip
wget https://myshell-public-repo-host.s3.amazonaws.com/openvoice/checkpoints_v2_0417.zip -O checkpoints_v2.zip
unzip checkpoints_v2.zip
fi

View File

@ -0,0 +1 @@
git+https://github.com/huggingface/parler-tts.git@8e465f1b5fcd223478e07175cb40494d19ffbe17

View File

@ -0,0 +1,3 @@
transformers
accelerate
torch

View File

@ -0,0 +1,5 @@
--extra-index-url https://download.pytorch.org/whl/cu118
torch
torchaudio
transformers
accelerate

View File

@ -0,0 +1,4 @@
torch
torchaudio
transformers
accelerate

View File

@ -1,3 +1,5 @@
--extra-index-url https://download.pytorch.org/whl/rocm6.0
torch
torchaudio
torchaudio
transformers
accelerate

View File

@ -3,4 +3,6 @@ intel-extension-for-pytorch
torch
torchaudio
optimum[openvino]
setuptools==70.3.0 # https://github.com/mudler/LocalAI/issues/2406
setuptools==72.1.0 # https://github.com/mudler/LocalAI/issues/2406
transformers
accelerate

View File

@ -1,7 +1,4 @@
accelerate
grpcio==1.65.1
grpcio==1.65.5
protobuf
torch
git+https://github.com/huggingface/parler-tts.git@10016fb0300c0dc31a0fb70e26f3affee7b62f16
certifi
transformers
llvmlite==0.43.0

View File

@ -1,31 +0,0 @@
.PHONY: petals
petals: protogen
@echo "Creating virtual environment..."
bash install.sh "petals.yml"
@echo "Virtual environment created."
.PHONY: run
run: protogen
@echo "Running petals..."
bash run.sh
@echo "petals run."
.PHONY: test
test: protogen
@echo "Testing petals..."
bash test.sh
@echo "petals tested."
.PHONY: protogen
protogen: backend_pb2_grpc.py backend_pb2.py
.PHONY: protogen-clean
protogen-clean:
$(RM) backend_pb2_grpc.py backend_pb2.py
backend_pb2_grpc.py backend_pb2.py:
python3 -m grpc_tools.protoc -I../.. --python_out=. --grpc_python_out=. backend.proto
.PHONY: clean
clean: protogen-clean
rm -rf venv __pycache__

View File

@ -1,140 +0,0 @@
#!/usr/bin/env python3
from concurrent import futures
import time
import argparse
import signal
import sys
import os
import backend_pb2
import backend_pb2_grpc
import grpc
import torch
from transformers import AutoTokenizer
from petals import AutoDistributedModelForCausalLM
_ONE_DAY_IN_SECONDS = 60 * 60 * 24
# If MAX_WORKERS are specified in the environment use it, otherwise default to 1
MAX_WORKERS = int(os.environ.get('PYTHON_GRPC_MAX_WORKERS', '1'))
# Implement the BackendServicer class with the service methods
class BackendServicer(backend_pb2_grpc.BackendServicer):
"""
A gRPC servicer that implements the Backend service defined in backend.proto.
"""
def Health(self, request, context):
"""
Returns a health check message.
Args:
request: The health check request.
context: The gRPC context.
Returns:
backend_pb2.Reply: The health check reply.
"""
return backend_pb2.Reply(message=bytes("OK", 'utf-8'))
def LoadModel(self, request, context):
"""
Loads a language model.
Args:
request: The load model request.
context: The gRPC context.
Returns:
backend_pb2.Result: The load model result.
"""
try:
self.tokenizer = AutoTokenizer.from_pretrained(request.Model, use_fast=False, add_bos_token=False)
self.model = AutoDistributedModelForCausalLM.from_pretrained(request.Model)
self.cuda = False
if request.CUDA:
self.model = self.model.cuda()
self.cuda = True
except Exception as err:
return backend_pb2.Result(success=False, message=f"Unexpected {err=}, {type(err)=}")
return backend_pb2.Result(message="Model loaded successfully", success=True)
def Predict(self, request, context):
"""
Generates text based on the given prompt and sampling parameters.
Args:
request: The predict request.
context: The gRPC context.
Returns:
backend_pb2.Result: The predict result.
"""
inputs = self.tokenizer(request.Prompt, return_tensors="pt")["input_ids"]
if self.cuda:
inputs = inputs.cuda()
if request.Tokens == 0:
# Max to max value if tokens are not specified
request.Tokens = 8192
# TODO: kwargs and map all parameters
outputs = self.model.generate(inputs, max_new_tokens=request.Tokens)
generated_text = self.tokenizer.decode(outputs[0])
# Remove prompt from response if present
if request.Prompt in generated_text:
generated_text = generated_text.replace(request.Prompt, "")
return backend_pb2.Result(message=bytes(generated_text, encoding='utf-8'))
def PredictStream(self, request, context):
"""
Generates text based on the given prompt and sampling parameters, and streams the results.
Args:
request: The predict stream request.
context: The gRPC context.
Returns:
backend_pb2.Result: The predict stream result.
"""
# Implement PredictStream RPC
#for reply in some_data_generator():
# yield reply
# Not implemented yet
return self.Predict(request, context)
def serve(address):
server = grpc.server(futures.ThreadPoolExecutor(max_workers=MAX_WORKERS))
backend_pb2_grpc.add_BackendServicer_to_server(BackendServicer(), server)
server.add_insecure_port(address)
server.start()
print("Server started. Listening on: " + address, file=sys.stderr)
# Define the signal handler function
def signal_handler(sig, frame):
print("Received termination signal. Shutting down...")
server.stop(0)
sys.exit(0)
# Set the signal handlers for SIGINT and SIGTERM
signal.signal(signal.SIGINT, signal_handler)
signal.signal(signal.SIGTERM, signal_handler)
try:
while True:
time.sleep(_ONE_DAY_IN_SECONDS)
except KeyboardInterrupt:
server.stop(0)
if __name__ == "__main__":
parser = argparse.ArgumentParser(description="Run the gRPC server.")
parser.add_argument(
"--addr", default="localhost:50051", help="The address to bind the server to."
)
args = parser.parse_args()
serve(args.addr)

View File

@ -1,14 +0,0 @@
#!/bin/bash
set -e
source $(dirname $0)/../common/libbackend.sh
# This is here because the Intel pip index is broken and returns 200 status codes for every package name, it just doesn't return any package links.
# This makes uv think that the package exists in the Intel pip index, and by default it stops looking at other pip indexes once it finds a match.
# We need uv to continue falling through to the pypi default index to find optimum[openvino] in the pypi index
# the --upgrade actually allows us to *downgrade* torch to the version provided in the Intel pip index
if [ "x${BUILD_PROFILE}" == "xintel" ]; then
EXTRA_PIP_INSTALL_FLAGS+=" --upgrade --index-strategy=unsafe-first-match"
fi
installRequirements

View File

@ -1,2 +0,0 @@
--extra-index-url https://download.pytorch.org/whl/rocm6.0
torch

View File

@ -1,5 +0,0 @@
--extra-index-url https://pytorch-extension.intel.com/release-whl/stable/xpu/us/
intel-extension-for-pytorch
torch
optimum[openvino]
setuptools==70.3.0 # https://github.com/mudler/LocalAI/issues/2406

View File

@ -1,3 +0,0 @@
git+https://github.com/bigscience-workshop/petals
certifi
transformers

View File

@ -1,4 +0,0 @@
#!/bin/bash
source $(dirname $0)/../common/libbackend.sh
startBackend $@

View File

@ -1,58 +0,0 @@
import unittest
import subprocess
import time
import backend_pb2
import backend_pb2_grpc
import grpc
import unittest
import subprocess
import time
import grpc
import backend_pb2_grpc
import backend_pb2
class TestBackendServicer(unittest.TestCase):
"""
TestBackendServicer is the class that tests the gRPC service.
This class contains methods to test the startup and shutdown of the gRPC service.
"""
def setUp(self):
self.service = subprocess.Popen(["python", "backend.py", "--addr", "localhost:50051"])
time.sleep(10)
def tearDown(self) -> None:
self.service.terminate()
self.service.wait()
def test_server_startup(self):
try:
self.setUp()
with grpc.insecure_channel("localhost:50051") as channel:
stub = backend_pb2_grpc.BackendStub(channel)
response = stub.Health(backend_pb2.HealthMessage())
self.assertEqual(response.message, b'OK')
except Exception as err:
print(err)
self.fail("Server failed to start")
finally:
self.tearDown()
def test_load_model(self):
"""
This method tests if the model is loaded successfully
"""
try:
self.setUp()
with grpc.insecure_channel("localhost:50051") as channel:
stub = backend_pb2_grpc.BackendStub(channel)
response = stub.LoadModel(backend_pb2.ModelOptions(Model="bigscience/bloom-560m"))
print(response)
self.assertTrue(response.success)
self.assertEqual(response.message, "Model loaded successfully")
except Exception as err:
print(err)
self.fail("LoadModel service failed")
finally:
self.tearDown()

View File

@ -1,6 +0,0 @@
#!/bin/bash
set -e
source $(dirname $0)/../common/libbackend.sh
runUnittests

View File

@ -0,0 +1,4 @@
transformers
accelerate
torch
rerankers[transformers]

View File

@ -0,0 +1,5 @@
--extra-index-url https://download.pytorch.org/whl/cu118
transformers
accelerate
torch
rerankers[transformers]

View File

@ -0,0 +1,4 @@
transformers
accelerate
torch
rerankers[transformers]

View File

@ -1,2 +1,5 @@
--extra-index-url https://download.pytorch.org/whl/rocm6.0
torch
transformers
accelerate
torch
rerankers[transformers]

View File

@ -1,5 +1,8 @@
--extra-index-url https://pytorch-extension.intel.com/release-whl/stable/xpu/us/
intel-extension-for-pytorch
transformers
accelerate
torch
rerankers[transformers]
optimum[openvino]
setuptools==70.3.0 # https://github.com/mudler/LocalAI/issues/2406
setuptools==72.1.0 # https://github.com/mudler/LocalAI/issues/2406

View File

@ -1,6 +1,3 @@
accelerate
rerankers[transformers]
grpcio==1.65.1
grpcio==1.65.4
protobuf
certifi
transformers
certifi

View File

@ -0,0 +1,6 @@
torch
accelerate
transformers
bitsandbytes
sentence-transformers==3.0.1
transformers

View File

@ -0,0 +1,5 @@
--extra-index-url https://download.pytorch.org/whl/cu118
torch
accelerate
sentence-transformers==3.0.1
transformers

View File

@ -0,0 +1,4 @@
torch
accelerate
sentence-transformers==3.0.1
transformers

View File

@ -1,2 +1,5 @@
--extra-index-url https://download.pytorch.org/whl/rocm6.0
torch
torch
accelerate
sentence-transformers==3.0.1
transformers

View File

@ -2,4 +2,7 @@
intel-extension-for-pytorch
torch
optimum[openvino]
setuptools==69.5.1 # https://github.com/mudler/LocalAI/issues/2406
setuptools==69.5.1 # https://github.com/mudler/LocalAI/issues/2406
accelerate
sentence-transformers==3.0.1
transformers

View File

@ -1,6 +1,3 @@
accelerate
sentence-transformers==3.0.1
transformers
grpcio==1.65.1
grpcio==1.65.5
protobuf
certifi

View File

@ -0,0 +1,3 @@
transformers
accelerate
torch

View File

@ -0,0 +1,4 @@
--extra-index-url https://download.pytorch.org/whl/cu118
transformers
accelerate
torch

View File

@ -0,0 +1,3 @@
transformers
accelerate
torch

View File

@ -1,2 +1,4 @@
--extra-index-url https://download.pytorch.org/whl/rocm6.0
transformers
accelerate
torch

Some files were not shown because too many files have changed in this diff Show More