mirror of
https://github.com/mudler/LocalAI.git
synced 2025-02-19 00:50:23 +00:00
Merge branch 'master' into ci/public-runner
Some checks failed
Security Scan / tests (push) Has been cancelled
Some checks failed
Security Scan / tests (push) Has been cancelled
This commit is contained in:
commit
4c145b037b
@ -7,7 +7,7 @@ services:
|
||||
args:
|
||||
- FFMPEG=true
|
||||
- IMAGE_TYPE=extras
|
||||
- GO_TAGS=stablediffusion p2p tts
|
||||
- GO_TAGS=p2p tts
|
||||
env_file:
|
||||
- ../.env
|
||||
ports:
|
||||
|
6
.env
6
.env
@ -38,12 +38,12 @@
|
||||
## Uncomment and set to true to enable rebuilding from source
|
||||
# REBUILD=true
|
||||
|
||||
## Enable go tags, available: stablediffusion, tts
|
||||
## stablediffusion: image generation with stablediffusion
|
||||
## Enable go tags, available: p2p, tts
|
||||
## p2p: enable distributed inferencing
|
||||
## tts: enables text-to-speech with go-piper
|
||||
## (requires REBUILD=true)
|
||||
#
|
||||
# GO_TAGS=stablediffusion
|
||||
# GO_TAGS=p2p
|
||||
|
||||
## Path where to store generated images
|
||||
# LOCALAI_IMAGE_PATH=/tmp/generated/images
|
||||
|
35
.github/workflows/release.yaml
vendored
35
.github/workflows/release.yaml
vendored
@ -237,40 +237,7 @@ jobs:
|
||||
detached: true
|
||||
connect-timeout-seconds: 180
|
||||
limit-access-to-actor: true
|
||||
build-stablediffusion:
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- name: Clone
|
||||
uses: actions/checkout@v4
|
||||
with:
|
||||
submodules: true
|
||||
- uses: actions/setup-go@v5
|
||||
with:
|
||||
go-version: '1.21.x'
|
||||
cache: false
|
||||
- name: Dependencies
|
||||
run: |
|
||||
sudo apt-get update
|
||||
sudo apt-get install -y --no-install-recommends libopencv-dev protobuf-compiler ccache upx-ucl
|
||||
go install google.golang.org/grpc/cmd/protoc-gen-go-grpc@1958fcbe2ca8bd93af633f11e97d44e567e945af
|
||||
go install google.golang.org/protobuf/cmd/protoc-gen-go@v1.34.2
|
||||
- name: Build stablediffusion
|
||||
run: |
|
||||
export PATH=$PATH:$GOPATH/bin
|
||||
make backend-assets/grpc/stablediffusion
|
||||
mkdir -p release && cp backend-assets/grpc/stablediffusion release
|
||||
env:
|
||||
GO_TAGS: stablediffusion
|
||||
- uses: actions/upload-artifact@v4
|
||||
with:
|
||||
name: stablediffusion
|
||||
path: release/
|
||||
- name: Release
|
||||
uses: softprops/action-gh-release@v2
|
||||
if: startsWith(github.ref, 'refs/tags/')
|
||||
with:
|
||||
files: |
|
||||
release/*
|
||||
|
||||
|
||||
build-macOS-x86_64:
|
||||
runs-on: macos-13
|
||||
|
51
.github/workflows/test-extra.yml
vendored
51
.github/workflows/test-extra.yml
vendored
@ -78,57 +78,6 @@ jobs:
|
||||
make --jobs=5 --output-sync=target -C backend/python/diffusers
|
||||
make --jobs=5 --output-sync=target -C backend/python/diffusers test
|
||||
|
||||
tests-parler-tts:
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- name: Clone
|
||||
uses: actions/checkout@v4
|
||||
with:
|
||||
submodules: true
|
||||
- name: Dependencies
|
||||
run: |
|
||||
sudo apt-get update
|
||||
sudo apt-get install build-essential ffmpeg
|
||||
# Install UV
|
||||
curl -LsSf https://astral.sh/uv/install.sh | sh
|
||||
sudo apt-get install -y ca-certificates cmake curl patch python3-pip
|
||||
sudo apt-get install -y libopencv-dev
|
||||
pip install --user --no-cache-dir grpcio-tools==1.64.1
|
||||
|
||||
- name: Test parler-tts
|
||||
run: |
|
||||
make --jobs=5 --output-sync=target -C backend/python/parler-tts
|
||||
make --jobs=5 --output-sync=target -C backend/python/parler-tts test
|
||||
- name: Setup tmate session if tests fail
|
||||
if: ${{ failure() }}
|
||||
uses: mxschmitt/action-tmate@v3.19
|
||||
with:
|
||||
detached: true
|
||||
connect-timeout-seconds: 180
|
||||
limit-access-to-actor: true
|
||||
|
||||
tests-openvoice:
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- name: Clone
|
||||
uses: actions/checkout@v4
|
||||
with:
|
||||
submodules: true
|
||||
- name: Dependencies
|
||||
run: |
|
||||
sudo apt-get update
|
||||
sudo apt-get install build-essential ffmpeg
|
||||
# Install UV
|
||||
curl -LsSf https://astral.sh/uv/install.sh | sh
|
||||
sudo apt-get install -y ca-certificates cmake curl patch python3-pip
|
||||
sudo apt-get install -y libopencv-dev
|
||||
pip install --user --no-cache-dir grpcio-tools==1.64.1
|
||||
|
||||
- name: Test openvoice
|
||||
run: |
|
||||
make --jobs=5 --output-sync=target -C backend/python/openvoice
|
||||
make --jobs=5 --output-sync=target -C backend/python/openvoice test
|
||||
|
||||
# tests-transformers-musicgen:
|
||||
# runs-on: ubuntu-latest
|
||||
# steps:
|
||||
|
6
.github/workflows/test.yml
vendored
6
.github/workflows/test.yml
vendored
@ -105,9 +105,7 @@ jobs:
|
||||
# Pre-build piper before we start tests in order to have shared libraries in place
|
||||
make sources/go-piper && \
|
||||
GO_TAGS="tts" make -C sources/go-piper piper.o && \
|
||||
sudo cp -rfv sources/go-piper/piper-phonemize/pi/lib/. /usr/lib/ && \
|
||||
# Pre-build stable diffusion before we install a newer version of abseil (not compatible with stablediffusion-ncn)
|
||||
PATH="$PATH:/root/go/bin" GO_TAGS="stablediffusion tts" GRPC_BACKENDS=backend-assets/grpc/stablediffusion make build
|
||||
sudo cp -rfv sources/go-piper/piper-phonemize/pi/lib/. /usr/lib/
|
||||
env:
|
||||
CUDA_VERSION: 12-4
|
||||
- name: Cache grpc
|
||||
@ -129,7 +127,7 @@ jobs:
|
||||
cd grpc && cd cmake/build && sudo make --jobs 5 install
|
||||
- name: Test
|
||||
run: |
|
||||
PATH="$PATH:/root/go/bin" GO_TAGS="stablediffusion tts" make --jobs 5 --output-sync=target test
|
||||
PATH="$PATH:/root/go/bin" GO_TAGS="tts" make --jobs 5 --output-sync=target test
|
||||
- name: Setup tmate session if tests fail
|
||||
if: ${{ failure() }}
|
||||
uses: mxschmitt/action-tmate@v3.19
|
||||
|
2
.vscode/launch.json
vendored
2
.vscode/launch.json
vendored
@ -26,7 +26,7 @@
|
||||
"LOCALAI_P2P": "true",
|
||||
"LOCALAI_FEDERATED": "true"
|
||||
},
|
||||
"buildFlags": ["-tags", "stablediffusion p2p tts", "-v"],
|
||||
"buildFlags": ["-tags", "p2p tts", "-v"],
|
||||
"envFile": "${workspaceFolder}/.env",
|
||||
"cwd": "${workspaceRoot}"
|
||||
}
|
||||
|
51
Dockerfile
51
Dockerfile
@ -15,8 +15,7 @@ ARG TARGETARCH
|
||||
ARG TARGETVARIANT
|
||||
|
||||
ENV DEBIAN_FRONTEND=noninteractive
|
||||
ENV EXTERNAL_GRPC_BACKENDS="coqui:/build/backend/python/coqui/run.sh,transformers:/build/backend/python/transformers/run.sh,rerankers:/build/backend/python/rerankers/run.sh,autogptq:/build/backend/python/autogptq/run.sh,bark:/build/backend/python/bark/run.sh,diffusers:/build/backend/python/diffusers/run.sh,openvoice:/build/backend/python/openvoice/run.sh,kokoro:/build/backend/python/kokoro/run.sh,vllm:/build/backend/python/vllm/run.sh,mamba:/build/backend/python/mamba/run.sh,exllama2:/build/backend/python/exllama2/run.sh,parler-tts:/build/backend/python/parler-tts/run.sh"
|
||||
|
||||
ENV EXTERNAL_GRPC_BACKENDS="coqui:/build/backend/python/coqui/run.sh,transformers:/build/backend/python/transformers/run.sh,rerankers:/build/backend/python/rerankers/run.sh,autogptq:/build/backend/python/autogptq/run.sh,bark:/build/backend/python/bark/run.sh,diffusers:/build/backend/python/diffusers/run.sh,faster-whisper:/build/backend/python/faster-whisper/run.sh,kokoro:/build/backend/python/kokoro/run.sh,vllm:/build/backend/python/vllm/run.sh,exllama2:/build/backend/python/exllama2/run.sh"
|
||||
|
||||
RUN apt-get update && \
|
||||
apt-get install -y --no-install-recommends \
|
||||
@ -69,14 +68,10 @@ ENV PATH=/opt/rocm/bin:${PATH}
|
||||
# OpenBLAS requirements and stable diffusion
|
||||
RUN apt-get update && \
|
||||
apt-get install -y --no-install-recommends \
|
||||
libopenblas-dev \
|
||||
libopencv-dev && \
|
||||
libopenblas-dev && \
|
||||
apt-get clean && \
|
||||
rm -rf /var/lib/apt/lists/*
|
||||
|
||||
# Set up OpenCV
|
||||
RUN ln -s /usr/include/opencv4/opencv2 /usr/include/opencv2
|
||||
|
||||
WORKDIR /build
|
||||
|
||||
###################################
|
||||
@ -251,7 +246,7 @@ RUN git clone --recurse-submodules --jobs 4 -b ${GRPC_VERSION} --depth 1 --shall
|
||||
|
||||
FROM requirements-drivers AS builder-base
|
||||
|
||||
ARG GO_TAGS="stablediffusion tts p2p"
|
||||
ARG GO_TAGS="tts p2p"
|
||||
ARG GRPC_BACKENDS
|
||||
ARG MAKEFLAGS
|
||||
ARG LD_FLAGS="-s -w"
|
||||
@ -285,35 +280,12 @@ RUN <<EOT bash
|
||||
fi
|
||||
EOT
|
||||
|
||||
|
||||
###################################
|
||||
###################################
|
||||
|
||||
# This first portion of builder holds the layers specifically used to build backend-assets/grpc/stablediffusion
|
||||
# In most cases, builder is the image you should be using - however, this can save build time if one just needs to copy backend-assets/grpc/stablediffusion and nothing else.
|
||||
FROM builder-base AS builder-sd
|
||||
|
||||
# stablediffusion does not tolerate a newer version of abseil, copy only over enough elements to build it
|
||||
COPY Makefile .
|
||||
COPY go.mod .
|
||||
COPY go.sum .
|
||||
COPY backend/backend.proto ./backend/backend.proto
|
||||
COPY backend/go/image/stablediffusion ./backend/go/image/stablediffusion
|
||||
COPY pkg/grpc ./pkg/grpc
|
||||
COPY pkg/stablediffusion ./pkg/stablediffusion
|
||||
RUN git init
|
||||
RUN make sources/go-stable-diffusion
|
||||
RUN touch prepare-sources
|
||||
|
||||
# Actually build the backend
|
||||
RUN GRPC_BACKENDS=backend-assets/grpc/stablediffusion make backend-assets/grpc/stablediffusion
|
||||
|
||||
###################################
|
||||
###################################
|
||||
|
||||
# The builder target compiles LocalAI. This target is not the target that will be uploaded to the registry.
|
||||
# Adjustments to the build process should likely be made here.
|
||||
FROM builder-sd AS builder
|
||||
FROM builder-base AS builder
|
||||
|
||||
# Install the pre-built GRPC
|
||||
COPY --from=grpc /opt/grpc /usr/local
|
||||
@ -353,8 +325,6 @@ ARG FFMPEG
|
||||
|
||||
COPY --from=grpc /opt/grpc /usr/local
|
||||
|
||||
COPY --from=builder-sd /build/backend-assets/grpc/stablediffusion /build/backend-assets/grpc/stablediffusion
|
||||
|
||||
COPY .devcontainer-scripts /.devcontainer-scripts
|
||||
|
||||
# Add FFmpeg
|
||||
@ -427,9 +397,6 @@ COPY --from=builder /build/local-ai ./
|
||||
# Copy shared libraries for piper
|
||||
COPY --from=builder /build/sources/go-piper/piper-phonemize/pi/lib/* /usr/lib/
|
||||
|
||||
# do not let stablediffusion rebuild (requires an older version of absl)
|
||||
COPY --from=builder-sd /build/backend-assets/grpc/stablediffusion ./backend-assets/grpc/stablediffusion
|
||||
|
||||
# Change the shell to bash so we can use [[ tests below
|
||||
SHELL ["/bin/bash", "-c"]
|
||||
# We try to strike a balance between individual layer size (as that affects total push time) and total image size
|
||||
@ -443,8 +410,8 @@ RUN if [[ ( "${IMAGE_TYPE}" == "extras ")]]; then \
|
||||
RUN if [[ ( "${EXTRA_BACKENDS}" =~ "coqui" || -z "${EXTRA_BACKENDS}" ) && "$IMAGE_TYPE" == "extras" ]]; then \
|
||||
make -C backend/python/coqui \
|
||||
; fi && \
|
||||
if [[ ( "${EXTRA_BACKENDS}" =~ "parler-tts" || -z "${EXTRA_BACKENDS}" ) && "$IMAGE_TYPE" == "extras" ]]; then \
|
||||
make -C backend/python/parler-tts \
|
||||
if [[ ( "${EXTRA_BACKENDS}" =~ "faster-whisper" || -z "${EXTRA_BACKENDS}" ) && "$IMAGE_TYPE" == "extras" ]]; then \
|
||||
make -C backend/python/faster-whisper \
|
||||
; fi && \
|
||||
if [[ ( "${EXTRA_BACKENDS}" =~ "diffusers" || -z "${EXTRA_BACKENDS}" ) && "$IMAGE_TYPE" == "extras" ]]; then \
|
||||
make -C backend/python/diffusers \
|
||||
@ -453,9 +420,6 @@ RUN if [[ ( "${EXTRA_BACKENDS}" =~ "coqui" || -z "${EXTRA_BACKENDS}" ) && "$IMAG
|
||||
RUN if [[ ( "${EXTRA_BACKENDS}" =~ "kokoro" || -z "${EXTRA_BACKENDS}" ) && "$IMAGE_TYPE" == "extras" ]]; then \
|
||||
make -C backend/python/kokoro \
|
||||
; fi && \
|
||||
if [[ ( "${EXTRA_BACKENDS}" =~ "openvoice" || -z "${EXTRA_BACKENDS}" ) && "$IMAGE_TYPE" == "extras" ]]; then \
|
||||
make -C backend/python/openvoice \
|
||||
; fi && \
|
||||
if [[ ( "${EXTRA_BACKENDS}" =~ "exllama2" || -z "${EXTRA_BACKENDS}" ) && "$IMAGE_TYPE" == "extras" ]]; then \
|
||||
make -C backend/python/exllama2 \
|
||||
; fi && \
|
||||
@ -474,9 +438,6 @@ RUN if [[ ( "${EXTRA_BACKENDS}" =~ "vllm" || -z "${EXTRA_BACKENDS}" ) && "$IMAGE
|
||||
; fi && \
|
||||
if [[ ( "${EXTRA_BACKENDS}" =~ "rerankers" || -z "${EXTRA_BACKENDS}" ) && "$IMAGE_TYPE" == "extras" ]]; then \
|
||||
make -C backend/python/rerankers \
|
||||
; fi && \
|
||||
if [[ ( "${EXTRA_BACKENDS}" =~ "mamba" || -z "${EXTRA_BACKENDS}" ) && "$IMAGE_TYPE" == "extras" ]]; then \
|
||||
make -C backend/python/mamba \
|
||||
; fi
|
||||
|
||||
# Make sure the models directory exists
|
||||
|
80
Makefile
80
Makefile
@ -8,7 +8,7 @@ DETECT_LIBS?=true
|
||||
# llama.cpp versions
|
||||
GOLLAMA_REPO?=https://github.com/go-skynet/go-llama.cpp
|
||||
GOLLAMA_VERSION?=2b57a8ae43e4699d3dc5d1496a1ccd42922993be
|
||||
CPPLLAMA_VERSION?=3edfa7d3753c29e44b964c0ff424d2ea8d5fdee6
|
||||
CPPLLAMA_VERSION?=6152129d05870cb38162c422c6ba80434e021e9f
|
||||
|
||||
# whisper.cpp version
|
||||
WHISPER_REPO?=https://github.com/ggerganov/whisper.cpp
|
||||
@ -18,17 +18,13 @@ WHISPER_CPP_VERSION?=6266a9f9e56a5b925e9892acf650f3eb1245814d
|
||||
PIPER_REPO?=https://github.com/mudler/go-piper
|
||||
PIPER_VERSION?=e10ca041a885d4a8f3871d52924b47792d5e5aa0
|
||||
|
||||
# stablediffusion version
|
||||
STABLEDIFFUSION_REPO?=https://github.com/mudler/go-stable-diffusion
|
||||
STABLEDIFFUSION_VERSION?=4a3cd6aeae6f66ee57eae9a0075f8c58c3a6a38f
|
||||
|
||||
# bark.cpp
|
||||
BARKCPP_REPO?=https://github.com/PABannier/bark.cpp.git
|
||||
BARKCPP_VERSION?=v1.0.0
|
||||
|
||||
# stablediffusion.cpp (ggml)
|
||||
STABLEDIFFUSION_GGML_REPO?=https://github.com/leejet/stable-diffusion.cpp
|
||||
STABLEDIFFUSION_GGML_VERSION?=dcf91f9e0f2cbf9da472ee2a556751ed4bab2d2a
|
||||
STABLEDIFFUSION_GGML_VERSION?=5eb15ef4d022bef4a391de4f5f6556e81fbb5024
|
||||
|
||||
ONNX_VERSION?=1.20.0
|
||||
ONNX_ARCH?=x64
|
||||
@ -179,11 +175,6 @@ ifeq ($(STATIC),true)
|
||||
LD_FLAGS+=-linkmode external -extldflags -static
|
||||
endif
|
||||
|
||||
ifeq ($(findstring stablediffusion,$(GO_TAGS)),stablediffusion)
|
||||
# OPTIONAL_TARGETS+=go-stable-diffusion/libstablediffusion.a
|
||||
OPTIONAL_GRPC+=backend-assets/grpc/stablediffusion
|
||||
endif
|
||||
|
||||
ifeq ($(findstring tts,$(GO_TAGS)),tts)
|
||||
# OPTIONAL_TARGETS+=go-piper/libpiper_binding.a
|
||||
# OPTIONAL_TARGETS+=backend-assets/espeak-ng-data
|
||||
@ -273,19 +264,6 @@ sources/go-piper:
|
||||
sources/go-piper/libpiper_binding.a: sources/go-piper
|
||||
$(MAKE) -C sources/go-piper libpiper_binding.a example/main piper.o
|
||||
|
||||
## stable diffusion (onnx)
|
||||
sources/go-stable-diffusion:
|
||||
mkdir -p sources/go-stable-diffusion
|
||||
cd sources/go-stable-diffusion && \
|
||||
git init && \
|
||||
git remote add origin $(STABLEDIFFUSION_REPO) && \
|
||||
git fetch origin && \
|
||||
git checkout $(STABLEDIFFUSION_VERSION) && \
|
||||
git submodule update --init --recursive --depth 1 --single-branch
|
||||
|
||||
sources/go-stable-diffusion/libstablediffusion.a: sources/go-stable-diffusion
|
||||
CPATH="$(CPATH):/usr/include/opencv4" $(MAKE) -C sources/go-stable-diffusion libstablediffusion.a
|
||||
|
||||
## stablediffusion (ggml)
|
||||
sources/stablediffusion-ggml.cpp:
|
||||
git clone --recursive $(STABLEDIFFUSION_GGML_REPO) sources/stablediffusion-ggml.cpp && \
|
||||
@ -331,20 +309,18 @@ sources/whisper.cpp:
|
||||
sources/whisper.cpp/libwhisper.a: sources/whisper.cpp
|
||||
cd sources/whisper.cpp && $(MAKE) libwhisper.a libggml.a
|
||||
|
||||
get-sources: sources/go-llama.cpp sources/go-piper sources/stablediffusion-ggml.cpp sources/bark.cpp sources/whisper.cpp sources/go-stable-diffusion backend/cpp/llama/llama.cpp
|
||||
get-sources: sources/go-llama.cpp sources/go-piper sources/stablediffusion-ggml.cpp sources/bark.cpp sources/whisper.cpp backend/cpp/llama/llama.cpp
|
||||
|
||||
replace:
|
||||
$(GOCMD) mod edit -replace github.com/ggerganov/whisper.cpp=$(CURDIR)/sources/whisper.cpp
|
||||
$(GOCMD) mod edit -replace github.com/ggerganov/whisper.cpp/bindings/go=$(CURDIR)/sources/whisper.cpp/bindings/go
|
||||
$(GOCMD) mod edit -replace github.com/mudler/go-piper=$(CURDIR)/sources/go-piper
|
||||
$(GOCMD) mod edit -replace github.com/mudler/go-stable-diffusion=$(CURDIR)/sources/go-stable-diffusion
|
||||
$(GOCMD) mod edit -replace github.com/go-skynet/go-llama.cpp=$(CURDIR)/sources/go-llama.cpp
|
||||
|
||||
dropreplace:
|
||||
$(GOCMD) mod edit -dropreplace github.com/ggerganov/whisper.cpp
|
||||
$(GOCMD) mod edit -dropreplace github.com/ggerganov/whisper.cpp/bindings/go
|
||||
$(GOCMD) mod edit -dropreplace github.com/mudler/go-piper
|
||||
$(GOCMD) mod edit -dropreplace github.com/mudler/go-stable-diffusion
|
||||
$(GOCMD) mod edit -dropreplace github.com/go-skynet/go-llama.cpp
|
||||
|
||||
prepare-sources: get-sources replace
|
||||
@ -355,7 +331,6 @@ rebuild: ## Rebuilds the project
|
||||
$(GOCMD) clean -cache
|
||||
$(MAKE) -C sources/go-llama.cpp clean
|
||||
$(MAKE) -C sources/whisper.cpp clean
|
||||
$(MAKE) -C sources/go-stable-diffusion clean
|
||||
$(MAKE) -C sources/go-piper clean
|
||||
$(MAKE) build
|
||||
|
||||
@ -470,7 +445,7 @@ prepare-test: grpcs
|
||||
|
||||
test: prepare test-models/testmodel.ggml grpcs
|
||||
@echo 'Running tests'
|
||||
export GO_TAGS="tts stablediffusion debug"
|
||||
export GO_TAGS="tts debug"
|
||||
$(MAKE) prepare-test
|
||||
HUGGINGFACE_GRPC=$(abspath ./)/backend/python/transformers/run.sh TEST_DIR=$(abspath ./)/test-dir/ FIXTURES=$(abspath ./)/tests/fixtures CONFIG_FILE=$(abspath ./)/test-models/config.yaml MODELS_PATH=$(abspath ./)/test-models \
|
||||
$(GOCMD) run github.com/onsi/ginkgo/v2/ginkgo --label-filter="!llama && !llama-gguf" --flake-attempts $(TEST_FLAKES) --fail-fast -v -r $(TEST_PATHS)
|
||||
@ -558,10 +533,10 @@ protogen-go-clean:
|
||||
$(RM) bin/*
|
||||
|
||||
.PHONY: protogen-python
|
||||
protogen-python: autogptq-protogen bark-protogen coqui-protogen diffusers-protogen exllama2-protogen mamba-protogen rerankers-protogen transformers-protogen parler-tts-protogen kokoro-protogen vllm-protogen openvoice-protogen
|
||||
protogen-python: autogptq-protogen bark-protogen coqui-protogen diffusers-protogen exllama2-protogen rerankers-protogen transformers-protogen kokoro-protogen vllm-protogen faster-whisper-protogen
|
||||
|
||||
.PHONY: protogen-python-clean
|
||||
protogen-python-clean: autogptq-protogen-clean bark-protogen-clean coqui-protogen-clean diffusers-protogen-clean exllama2-protogen-clean mamba-protogen-clean rerankers-protogen-clean transformers-protogen-clean parler-tts-protogen-clean kokoro-protogen-clean vllm-protogen-clean openvoice-protogen-clean
|
||||
protogen-python-clean: autogptq-protogen-clean bark-protogen-clean coqui-protogen-clean diffusers-protogen-clean exllama2-protogen-clean rerankers-protogen-clean transformers-protogen-clean kokoro-protogen-clean vllm-protogen-clean faster-whisper-protogen-clean
|
||||
|
||||
.PHONY: autogptq-protogen
|
||||
autogptq-protogen:
|
||||
@ -595,6 +570,14 @@ diffusers-protogen:
|
||||
diffusers-protogen-clean:
|
||||
$(MAKE) -C backend/python/diffusers protogen-clean
|
||||
|
||||
.PHONY: faster-whisper-protogen
|
||||
faster-whisper-protogen:
|
||||
$(MAKE) -C backend/python/faster-whisper protogen
|
||||
|
||||
.PHONY: faster-whisper-protogen-clean
|
||||
faster-whisper-protogen-clean:
|
||||
$(MAKE) -C backend/python/faster-whisper protogen-clean
|
||||
|
||||
.PHONY: exllama2-protogen
|
||||
exllama2-protogen:
|
||||
$(MAKE) -C backend/python/exllama2 protogen
|
||||
@ -603,14 +586,6 @@ exllama2-protogen:
|
||||
exllama2-protogen-clean:
|
||||
$(MAKE) -C backend/python/exllama2 protogen-clean
|
||||
|
||||
.PHONY: mamba-protogen
|
||||
mamba-protogen:
|
||||
$(MAKE) -C backend/python/mamba protogen
|
||||
|
||||
.PHONY: mamba-protogen-clean
|
||||
mamba-protogen-clean:
|
||||
$(MAKE) -C backend/python/mamba protogen-clean
|
||||
|
||||
.PHONY: rerankers-protogen
|
||||
rerankers-protogen:
|
||||
$(MAKE) -C backend/python/rerankers protogen
|
||||
@ -627,14 +602,6 @@ transformers-protogen:
|
||||
transformers-protogen-clean:
|
||||
$(MAKE) -C backend/python/transformers protogen-clean
|
||||
|
||||
.PHONY: parler-tts-protogen
|
||||
parler-tts-protogen:
|
||||
$(MAKE) -C backend/python/parler-tts protogen
|
||||
|
||||
.PHONY: parler-tts-protogen-clean
|
||||
parler-tts-protogen-clean:
|
||||
$(MAKE) -C backend/python/parler-tts protogen-clean
|
||||
|
||||
.PHONY: kokoro-protogen
|
||||
kokoro-protogen:
|
||||
$(MAKE) -C backend/python/kokoro protogen
|
||||
@ -643,14 +610,6 @@ kokoro-protogen:
|
||||
kokoro-protogen-clean:
|
||||
$(MAKE) -C backend/python/kokoro protogen-clean
|
||||
|
||||
.PHONY: openvoice-protogen
|
||||
openvoice-protogen:
|
||||
$(MAKE) -C backend/python/openvoice protogen
|
||||
|
||||
.PHONY: openvoice-protogen-clean
|
||||
openvoice-protogen-clean:
|
||||
$(MAKE) -C backend/python/openvoice protogen-clean
|
||||
|
||||
.PHONY: vllm-protogen
|
||||
vllm-protogen:
|
||||
$(MAKE) -C backend/python/vllm protogen
|
||||
@ -666,13 +625,11 @@ prepare-extra-conda-environments: protogen-python
|
||||
$(MAKE) -C backend/python/bark
|
||||
$(MAKE) -C backend/python/coqui
|
||||
$(MAKE) -C backend/python/diffusers
|
||||
$(MAKE) -C backend/python/faster-whisper
|
||||
$(MAKE) -C backend/python/vllm
|
||||
$(MAKE) -C backend/python/mamba
|
||||
$(MAKE) -C backend/python/rerankers
|
||||
$(MAKE) -C backend/python/transformers
|
||||
$(MAKE) -C backend/python/parler-tts
|
||||
$(MAKE) -C backend/python/kokoro
|
||||
$(MAKE) -C backend/python/openvoice
|
||||
$(MAKE) -C backend/python/exllama2
|
||||
|
||||
prepare-test-extra: protogen-python
|
||||
@ -816,13 +773,6 @@ ifneq ($(UPX),)
|
||||
$(UPX) backend-assets/grpc/piper
|
||||
endif
|
||||
|
||||
backend-assets/grpc/stablediffusion: sources/go-stable-diffusion sources/go-stable-diffusion/libstablediffusion.a backend-assets/grpc
|
||||
CGO_LDFLAGS="$(CGO_LDFLAGS)" CPATH="$(CPATH):$(CURDIR)/sources/go-stable-diffusion/:/usr/include/opencv4" LIBRARY_PATH=$(CURDIR)/sources/go-stable-diffusion/ \
|
||||
$(GOCMD) build -ldflags "$(LD_FLAGS)" -tags "$(GO_TAGS)" -o backend-assets/grpc/stablediffusion ./backend/go/image/stablediffusion
|
||||
ifneq ($(UPX),)
|
||||
$(UPX) backend-assets/grpc/stablediffusion
|
||||
endif
|
||||
|
||||
backend-assets/grpc/silero-vad: backend-assets/grpc backend-assets/lib/libonnxruntime.so.1
|
||||
CGO_LDFLAGS="$(CGO_LDFLAGS)" CPATH="$(CPATH):$(CURDIR)/sources/onnxruntime/include/" LIBRARY_PATH=$(CURDIR)/backend-assets/lib \
|
||||
$(GOCMD) build -ldflags "$(LD_FLAGS)" -tags "$(GO_TAGS)" -o backend-assets/grpc/silero-vad ./backend/go/vad/silero
|
||||
|
@ -39,7 +39,7 @@
|
||||
</p>
|
||||
|
||||
<p align="center">
|
||||
<a href="https://trendshift.io/repositories/1484" target="_blank"><img src="https://trendshift.io/api/badge/repositories/1484" alt="go-skynet%2FLocalAI | Trendshift" style="width: 250px; height: 55px;" width="250" height="55"/></a>
|
||||
<a href="https://trendshift.io/repositories/5539" target="_blank"><img src="https://trendshift.io/api/badge/repositories/5539" alt="mudler%2FLocalAI | Trendshift" style="width: 250px; height: 55px;" width="250" height="55"/></a>
|
||||
</p>
|
||||
|
||||
> :bulb: Get help - [❓FAQ](https://localai.io/faq/) [💭Discussions](https://github.com/go-skynet/LocalAI/discussions) [:speech_balloon: Discord](https://discord.gg/uJAeKSAGDy) [:book: Documentation website](https://localai.io/)
|
||||
|
@ -1,56 +1,17 @@
|
||||
name: stablediffusion
|
||||
backend: stablediffusion
|
||||
backend: stablediffusion-ggml
|
||||
cfg_scale: 4.5
|
||||
|
||||
options:
|
||||
- sampler:euler
|
||||
parameters:
|
||||
model: stablediffusion_assets
|
||||
|
||||
license: "BSD-3"
|
||||
urls:
|
||||
- https://github.com/EdVince/Stable-Diffusion-NCNN
|
||||
- https://github.com/EdVince/Stable-Diffusion-NCNN/blob/main/LICENSE
|
||||
|
||||
description: |
|
||||
Stable Diffusion in NCNN with c++, supported txt2img and img2img
|
||||
model: stable-diffusion-v1-5-pruned-emaonly-Q4_0.gguf
|
||||
step: 25
|
||||
|
||||
download_files:
|
||||
- filename: "stablediffusion_assets/AutoencoderKL-256-256-fp16-opt.param"
|
||||
sha256: "18ca4b66685e21406bcf64c484b3b680b4949900415536d599cc876579c85c82"
|
||||
uri: "https://raw.githubusercontent.com/EdVince/Stable-Diffusion-NCNN/main/x86/linux/assets/AutoencoderKL-256-256-fp16-opt.param"
|
||||
- filename: "stablediffusion_assets/AutoencoderKL-512-512-fp16-opt.param"
|
||||
sha256: "cf45f63aacf3dbbab0f59ed92a6f2c14d9a1801314631cd3abe91e3c85639a20"
|
||||
uri: "https://raw.githubusercontent.com/EdVince/Stable-Diffusion-NCNN/main/x86/linux/assets/AutoencoderKL-512-512-fp16-opt.param"
|
||||
- filename: "stablediffusion_assets/AutoencoderKL-base-fp16.param"
|
||||
sha256: "0254a056dce61b0c27dc9ec1b78b53bcf55315c540f55f051eb841aa992701ba"
|
||||
uri: "https://raw.githubusercontent.com/EdVince/Stable-Diffusion-NCNN/main/x86/linux/assets/AutoencoderKL-base-fp16.param"
|
||||
- filename: "stablediffusion_assets/AutoencoderKL-encoder-512-512-fp16.bin"
|
||||
sha256: "ddcb79a9951b9f91e05e087739ed69da2c1c4ae30ba4168cce350b49d617c9fa"
|
||||
uri: "https://github.com/EdVince/Stable-Diffusion-NCNN/releases/download/naifu/AutoencoderKL-encoder-512-512-fp16.bin"
|
||||
- filename: "stablediffusion_assets/AutoencoderKL-fp16.bin"
|
||||
sha256: "f02e71f80e70252734724bbfaed5c4ddd3a8ed7e61bb2175ff5f53099f0e35dd"
|
||||
uri: "https://github.com/EdVince/Stable-Diffusion-NCNN/releases/download/naifu/AutoencoderKL-fp16.bin"
|
||||
- filename: "stablediffusion_assets/FrozenCLIPEmbedder-fp16.bin"
|
||||
sha256: "1c9a12f4e1dd1b295a388045f7f28a2352a4d70c3dc96a542189a3dd7051fdd6"
|
||||
uri: "https://github.com/EdVince/Stable-Diffusion-NCNN/releases/download/naifu/FrozenCLIPEmbedder-fp16.bin"
|
||||
- filename: "stablediffusion_assets/FrozenCLIPEmbedder-fp16.param"
|
||||
sha256: "471afbe678dd1fd3fe764ef9c6eccaccb0a7d7e601f27b462aa926b20eb368c9"
|
||||
uri: "https://raw.githubusercontent.com/EdVince/Stable-Diffusion-NCNN/main/x86/linux/assets/FrozenCLIPEmbedder-fp16.param"
|
||||
- filename: "stablediffusion_assets/log_sigmas.bin"
|
||||
sha256: "a2089f8aa4c61f9c200feaec541ab3f5c94233b28deb6d5e8bcd974fa79b68ac"
|
||||
uri: "https://github.com/EdVince/Stable-Diffusion-NCNN/raw/main/x86/linux/assets/log_sigmas.bin"
|
||||
- filename: "stablediffusion_assets/UNetModel-256-256-MHA-fp16-opt.param"
|
||||
sha256: "a58c380229f09491776df837b7aa7adffc0a87821dc4708b34535da2e36e3da1"
|
||||
uri: "https://raw.githubusercontent.com/EdVince/Stable-Diffusion-NCNN/main/x86/linux/assets/UNetModel-256-256-MHA-fp16-opt.param"
|
||||
- filename: "stablediffusion_assets/UNetModel-512-512-MHA-fp16-opt.param"
|
||||
sha256: "f12034067062827bd7f43d1d21888d1f03905401acf6c6eea22be23c259636fa"
|
||||
uri: "https://raw.githubusercontent.com/EdVince/Stable-Diffusion-NCNN/main/x86/linux/assets/UNetModel-512-512-MHA-fp16-opt.param"
|
||||
- filename: "stablediffusion_assets/UNetModel-base-MHA-fp16.param"
|
||||
sha256: "696f6975de49f4325b53ce32aff81861a6d6c07cd9ce3f0aae2cc405350af38d"
|
||||
uri: "https://raw.githubusercontent.com/EdVince/Stable-Diffusion-NCNN/main/x86/linux/assets/UNetModel-base-MHA-fp16.param"
|
||||
- filename: "stablediffusion_assets/UNetModel-MHA-fp16.bin"
|
||||
sha256: "d618918d011bfc1f644c0f2a33bf84931bd53b28a98492b0a8ed6f3a818852c3"
|
||||
uri: "https://github.com/EdVince/Stable-Diffusion-NCNN/releases/download/naifu/UNetModel-MHA-fp16.bin"
|
||||
- filename: "stablediffusion_assets/vocab.txt"
|
||||
sha256: "e30e57b6f1e47616982ef898d8922be24e535b4fa3d0110477b3a6f02ebbae7d"
|
||||
uri: "https://raw.githubusercontent.com/EdVince/Stable-Diffusion-NCNN/main/x86/linux/assets/vocab.txt"
|
||||
- filename: "stable-diffusion-v1-5-pruned-emaonly-Q4_0.gguf"
|
||||
sha256: "b8944e9fe0b69b36ae1b5bb0185b3a7b8ef14347fe0fa9af6c64c4829022261f"
|
||||
uri: "huggingface://second-state/stable-diffusion-v1-5-GGUF/stable-diffusion-v1-5-pruned-emaonly-Q4_0.gguf"
|
||||
|
||||
usage: |
|
||||
curl http://localhost:8080/v1/images/generations \
|
||||
|
@ -1,21 +0,0 @@
|
||||
package main
|
||||
|
||||
// Note: this is started internally by LocalAI and a server is allocated for each model
|
||||
|
||||
import (
|
||||
"flag"
|
||||
|
||||
grpc "github.com/mudler/LocalAI/pkg/grpc"
|
||||
)
|
||||
|
||||
var (
|
||||
addr = flag.String("addr", "localhost:50051", "the address to connect to")
|
||||
)
|
||||
|
||||
func main() {
|
||||
flag.Parse()
|
||||
|
||||
if err := grpc.StartServer(*addr, &Image{}); err != nil {
|
||||
panic(err)
|
||||
}
|
||||
}
|
@ -1,33 +0,0 @@
|
||||
package main
|
||||
|
||||
// This is a wrapper to statisfy the GRPC service interface
|
||||
// It is meant to be used by the main executable that is the server for the specific backend type (falcon, gpt3, etc)
|
||||
import (
|
||||
"github.com/mudler/LocalAI/pkg/grpc/base"
|
||||
pb "github.com/mudler/LocalAI/pkg/grpc/proto"
|
||||
"github.com/mudler/LocalAI/pkg/stablediffusion"
|
||||
)
|
||||
|
||||
type Image struct {
|
||||
base.SingleThread
|
||||
stablediffusion *stablediffusion.StableDiffusion
|
||||
}
|
||||
|
||||
func (image *Image) Load(opts *pb.ModelOptions) error {
|
||||
var err error
|
||||
// Note: the Model here is a path to a directory containing the model files
|
||||
image.stablediffusion, err = stablediffusion.New(opts.ModelFile)
|
||||
return err
|
||||
}
|
||||
|
||||
func (image *Image) GenerateImage(opts *pb.GenerateImageRequest) error {
|
||||
return image.stablediffusion.GenerateImage(
|
||||
int(opts.Height),
|
||||
int(opts.Width),
|
||||
int(opts.Mode),
|
||||
int(opts.Step),
|
||||
int(opts.Seed),
|
||||
opts.PositivePrompt,
|
||||
opts.NegativePrompt,
|
||||
opts.Dst)
|
||||
}
|
@ -311,12 +311,16 @@ func (s *Store) StoresGet(opts *pb.StoresGetOptions) (pb.StoresGetResult, error)
|
||||
}
|
||||
|
||||
func isNormalized(k []float32) bool {
|
||||
var sum float32
|
||||
var sum float64
|
||||
|
||||
for _, v := range k {
|
||||
sum += v
|
||||
v64 := float64(v)
|
||||
sum += v64*v64
|
||||
}
|
||||
|
||||
return sum == 1.0
|
||||
s := math.Sqrt(sum)
|
||||
|
||||
return s >= 0.99 && s <= 1.01
|
||||
}
|
||||
|
||||
// TODO: This we could replace with handwritten SIMD code
|
||||
@ -328,7 +332,7 @@ func normalizedCosineSimilarity(k1, k2 []float32) float32 {
|
||||
dot += k1[i] * k2[i]
|
||||
}
|
||||
|
||||
assert(dot >= -1 && dot <= 1, fmt.Sprintf("dot = %f", dot))
|
||||
assert(dot >= -1.01 && dot <= 1.01, fmt.Sprintf("dot = %f", dot))
|
||||
|
||||
// 2.0 * (1.0 - dot) would be the Euclidean distance
|
||||
return dot
|
||||
@ -418,7 +422,7 @@ func cosineSimilarity(k1, k2 []float32, mag1 float64) float32 {
|
||||
|
||||
sim := float32(dot / (mag1 * math.Sqrt(mag2)))
|
||||
|
||||
assert(sim >= -1 && sim <= 1, fmt.Sprintf("sim = %f", sim))
|
||||
assert(sim >= -1.01 && sim <= 1.01, fmt.Sprintf("sim = %f", sim))
|
||||
|
||||
return sim
|
||||
}
|
||||
|
@ -1,8 +1,9 @@
|
||||
.DEFAULT_GOAL := install
|
||||
|
||||
.PHONY: install
|
||||
install: protogen
|
||||
install:
|
||||
bash install.sh
|
||||
$(MAKE) protogen
|
||||
|
||||
.PHONY: protogen
|
||||
protogen: backend_pb2_grpc.py backend_pb2.py
|
||||
@ -12,14 +13,8 @@ protogen-clean:
|
||||
$(RM) backend_pb2_grpc.py backend_pb2.py
|
||||
|
||||
backend_pb2_grpc.py backend_pb2.py:
|
||||
python3 -m grpc_tools.protoc -I../.. --python_out=. --grpc_python_out=. backend.proto
|
||||
bash protogen.sh
|
||||
|
||||
.PHONY: clean
|
||||
clean: protogen-clean
|
||||
rm -rf venv __pycache__
|
||||
|
||||
.PHONY: test
|
||||
test: protogen
|
||||
@echo "Testing openvoice..."
|
||||
bash test.sh
|
||||
@echo "openvoice tested."
|
||||
rm -rf venv __pycache__
|
94
backend/python/faster-whisper/backend.py
Executable file
94
backend/python/faster-whisper/backend.py
Executable file
@ -0,0 +1,94 @@
|
||||
#!/usr/bin/env python3
|
||||
"""
|
||||
This is an extra gRPC server of LocalAI for Bark TTS
|
||||
"""
|
||||
from concurrent import futures
|
||||
import time
|
||||
import argparse
|
||||
import signal
|
||||
import sys
|
||||
import os
|
||||
import backend_pb2
|
||||
import backend_pb2_grpc
|
||||
|
||||
from faster_whisper import WhisperModel
|
||||
|
||||
import grpc
|
||||
|
||||
|
||||
_ONE_DAY_IN_SECONDS = 60 * 60 * 24
|
||||
|
||||
# If MAX_WORKERS are specified in the environment use it, otherwise default to 1
|
||||
MAX_WORKERS = int(os.environ.get('PYTHON_GRPC_MAX_WORKERS', '1'))
|
||||
COQUI_LANGUAGE = os.environ.get('COQUI_LANGUAGE', None)
|
||||
|
||||
# Implement the BackendServicer class with the service methods
|
||||
class BackendServicer(backend_pb2_grpc.BackendServicer):
|
||||
"""
|
||||
BackendServicer is the class that implements the gRPC service
|
||||
"""
|
||||
def Health(self, request, context):
|
||||
return backend_pb2.Reply(message=bytes("OK", 'utf-8'))
|
||||
def LoadModel(self, request, context):
|
||||
device = "cpu"
|
||||
# Get device
|
||||
# device = "cuda" if request.CUDA else "cpu"
|
||||
if request.CUDA:
|
||||
device = "cuda"
|
||||
|
||||
try:
|
||||
print("Preparing models, please wait", file=sys.stderr)
|
||||
self.model = WhisperModel(request.Model, device=device, compute_type="float16")
|
||||
except Exception as err:
|
||||
return backend_pb2.Result(success=False, message=f"Unexpected {err=}, {type(err)=}")
|
||||
# Implement your logic here for the LoadModel service
|
||||
# Replace this with your desired response
|
||||
return backend_pb2.Result(message="Model loaded successfully", success=True)
|
||||
|
||||
def AudioTranscription(self, request, context):
|
||||
resultSegments = []
|
||||
text = ""
|
||||
try:
|
||||
segments, info = self.model.transcribe(request.dst, beam_size=5, condition_on_previous_text=False)
|
||||
id = 0
|
||||
for segment in segments:
|
||||
print("[%.2fs -> %.2fs] %s" % (segment.start, segment.end, segment.text))
|
||||
resultSegments.append(backend_pb2.TranscriptSegment(id=id, start=segment.start, end=segment.end, text=segment.text))
|
||||
text += segment.text
|
||||
id += 1
|
||||
except Exception as err:
|
||||
print(f"Unexpected {err=}, {type(err)=}", file=sys.stderr)
|
||||
|
||||
return backend_pb2.TranscriptResult(segments=resultSegments, text=text)
|
||||
|
||||
def serve(address):
|
||||
server = grpc.server(futures.ThreadPoolExecutor(max_workers=MAX_WORKERS))
|
||||
backend_pb2_grpc.add_BackendServicer_to_server(BackendServicer(), server)
|
||||
server.add_insecure_port(address)
|
||||
server.start()
|
||||
print("Server started. Listening on: " + address, file=sys.stderr)
|
||||
|
||||
# Define the signal handler function
|
||||
def signal_handler(sig, frame):
|
||||
print("Received termination signal. Shutting down...")
|
||||
server.stop(0)
|
||||
sys.exit(0)
|
||||
|
||||
# Set the signal handlers for SIGINT and SIGTERM
|
||||
signal.signal(signal.SIGINT, signal_handler)
|
||||
signal.signal(signal.SIGTERM, signal_handler)
|
||||
|
||||
try:
|
||||
while True:
|
||||
time.sleep(_ONE_DAY_IN_SECONDS)
|
||||
except KeyboardInterrupt:
|
||||
server.stop(0)
|
||||
|
||||
if __name__ == "__main__":
|
||||
parser = argparse.ArgumentParser(description="Run the gRPC server.")
|
||||
parser.add_argument(
|
||||
"--addr", default="localhost:50051", help="The address to bind the server to."
|
||||
)
|
||||
args = parser.parse_args()
|
||||
|
||||
serve(args.addr)
|
@ -12,5 +12,3 @@ if [ "x${BUILD_PROFILE}" == "xintel" ]; then
|
||||
fi
|
||||
|
||||
installRequirements
|
||||
|
||||
python -m unidic download
|
0
backend/python/parler-tts/protogen.sh → backend/python/faster-whisper/protogen.sh
Executable file → Normal file
0
backend/python/parler-tts/protogen.sh → backend/python/faster-whisper/protogen.sh
Executable file → Normal file
8
backend/python/faster-whisper/requirements-cpu.txt
Normal file
8
backend/python/faster-whisper/requirements-cpu.txt
Normal file
@ -0,0 +1,8 @@
|
||||
faster-whisper
|
||||
opencv-python
|
||||
accelerate
|
||||
compel
|
||||
peft
|
||||
sentencepiece
|
||||
torch==2.4.1
|
||||
optimum-quanto
|
9
backend/python/faster-whisper/requirements-cublas11.txt
Normal file
9
backend/python/faster-whisper/requirements-cublas11.txt
Normal file
@ -0,0 +1,9 @@
|
||||
--extra-index-url https://download.pytorch.org/whl/cu118
|
||||
torch==2.4.1+cu118
|
||||
faster-whisper
|
||||
opencv-python
|
||||
accelerate
|
||||
compel
|
||||
peft
|
||||
sentencepiece
|
||||
optimum-quanto
|
8
backend/python/faster-whisper/requirements-cublas12.txt
Normal file
8
backend/python/faster-whisper/requirements-cublas12.txt
Normal file
@ -0,0 +1,8 @@
|
||||
torch==2.4.1
|
||||
faster-whisper
|
||||
opencv-python
|
||||
accelerate
|
||||
compel
|
||||
peft
|
||||
sentencepiece
|
||||
optimum-quanto
|
3
backend/python/faster-whisper/requirements-hipblas.txt
Normal file
3
backend/python/faster-whisper/requirements-hipblas.txt
Normal file
@ -0,0 +1,3 @@
|
||||
--extra-index-url https://download.pytorch.org/whl/rocm6.0
|
||||
torch
|
||||
faster-whisper
|
@ -1,8 +1,6 @@
|
||||
--extra-index-url https://pytorch-extension.intel.com/release-whl/stable/xpu/us/
|
||||
intel-extension-for-pytorch==2.3.110+xpu
|
||||
torch==2.3.1+cxx11.abi
|
||||
torchaudio==2.3.1+cxx11.abi
|
||||
oneccl_bind_pt==2.3.100+xpu
|
||||
optimum[openvino]
|
||||
transformers
|
||||
accelerate
|
||||
faster-whisper
|
@ -1,3 +1,3 @@
|
||||
grpcio==1.69.0
|
||||
protobuf
|
||||
certifi
|
||||
grpcio-tools
|
@ -1,29 +0,0 @@
|
||||
.PHONY: mamba
|
||||
mamba: protogen
|
||||
bash install.sh
|
||||
|
||||
.PHONY: run
|
||||
run: protogen
|
||||
@echo "Running mamba..."
|
||||
bash run.sh
|
||||
@echo "mamba run."
|
||||
|
||||
.PHONY: test
|
||||
test: protogen
|
||||
@echo "Testing mamba..."
|
||||
bash test.sh
|
||||
@echo "mamba tested."
|
||||
|
||||
.PHONY: protogen
|
||||
protogen: backend_pb2_grpc.py backend_pb2.py
|
||||
|
||||
.PHONY: protogen-clean
|
||||
protogen-clean:
|
||||
$(RM) backend_pb2_grpc.py backend_pb2.py
|
||||
|
||||
backend_pb2_grpc.py backend_pb2.py:
|
||||
python3 -m grpc_tools.protoc -I../.. --python_out=. --grpc_python_out=. backend.proto
|
||||
|
||||
.PHONY: clean
|
||||
clean: protogen-clean
|
||||
$(RM) -r venv __pycache__
|
@ -1,5 +0,0 @@
|
||||
# Creating a separate environment for the mamba project
|
||||
|
||||
```
|
||||
make mamba
|
||||
```
|
@ -1,179 +0,0 @@
|
||||
#!/usr/bin/env python3
|
||||
from concurrent import futures
|
||||
import time
|
||||
import argparse
|
||||
import signal
|
||||
import sys
|
||||
import os
|
||||
|
||||
import backend_pb2
|
||||
import backend_pb2_grpc
|
||||
|
||||
import grpc
|
||||
|
||||
import torch
|
||||
from transformers import AutoTokenizer, AutoModelForCausalLM
|
||||
from mamba_ssm.models.mixer_seq_simple import MambaLMHeadModel
|
||||
|
||||
_ONE_DAY_IN_SECONDS = 60 * 60 * 24
|
||||
|
||||
# If MAX_WORKERS are specified in the environment use it, otherwise default to 1
|
||||
MAX_WORKERS = int(os.environ.get('PYTHON_GRPC_MAX_WORKERS', '1'))
|
||||
MAMBA_CHAT= os.environ.get('MAMBA_CHAT', '1') == '1'
|
||||
|
||||
# Implement the BackendServicer class with the service methods
|
||||
class BackendServicer(backend_pb2_grpc.BackendServicer):
|
||||
"""
|
||||
A gRPC servicer that implements the Backend service defined in backend.proto.
|
||||
"""
|
||||
def generate(self,prompt, max_new_tokens):
|
||||
"""
|
||||
Generates text based on the given prompt and maximum number of new tokens.
|
||||
|
||||
Args:
|
||||
prompt (str): The prompt to generate text from.
|
||||
max_new_tokens (int): The maximum number of new tokens to generate.
|
||||
|
||||
Returns:
|
||||
str: The generated text.
|
||||
"""
|
||||
self.generator.end_beam_search()
|
||||
|
||||
# Tokenizing the input
|
||||
ids = self.generator.tokenizer.encode(prompt)
|
||||
|
||||
self.generator.gen_begin_reuse(ids)
|
||||
initial_len = self.generator.sequence[0].shape[0]
|
||||
has_leading_space = False
|
||||
decoded_text = ''
|
||||
for i in range(max_new_tokens):
|
||||
token = self.generator.gen_single_token()
|
||||
if i == 0 and self.generator.tokenizer.tokenizer.IdToPiece(int(token)).startswith('▁'):
|
||||
has_leading_space = True
|
||||
|
||||
decoded_text = self.generator.tokenizer.decode(self.generator.sequence[0][initial_len:])
|
||||
if has_leading_space:
|
||||
decoded_text = ' ' + decoded_text
|
||||
|
||||
if token.item() == self.generator.tokenizer.eos_token_id:
|
||||
break
|
||||
return decoded_text
|
||||
|
||||
def Health(self, request, context):
|
||||
"""
|
||||
Returns a health check message.
|
||||
|
||||
Args:
|
||||
request: The health check request.
|
||||
context: The gRPC context.
|
||||
|
||||
Returns:
|
||||
backend_pb2.Reply: The health check reply.
|
||||
"""
|
||||
return backend_pb2.Reply(message=bytes("OK", 'utf-8'))
|
||||
|
||||
def LoadModel(self, request, context):
|
||||
"""
|
||||
Loads a language model.
|
||||
|
||||
Args:
|
||||
request: The load model request.
|
||||
context: The gRPC context.
|
||||
|
||||
Returns:
|
||||
backend_pb2.Result: The load model result.
|
||||
"""
|
||||
try:
|
||||
tokenizerModel = request.Tokenizer
|
||||
if tokenizerModel == "":
|
||||
tokenizerModel = request.Model
|
||||
|
||||
tokenizer = AutoTokenizer.from_pretrained(tokenizerModel)
|
||||
if MAMBA_CHAT:
|
||||
tokenizer.eos_token = "<|endoftext|>"
|
||||
tokenizer.pad_token = tokenizer.eos_token
|
||||
self.tokenizer = tokenizer
|
||||
self.model = MambaLMHeadModel.from_pretrained(request.Model, device="cuda", dtype=torch.float16)
|
||||
except Exception as err:
|
||||
return backend_pb2.Result(success=False, message=f"Unexpected {err=}, {type(err)=}")
|
||||
return backend_pb2.Result(message="Model loaded successfully", success=True)
|
||||
|
||||
def Predict(self, request, context):
|
||||
"""
|
||||
Generates text based on the given prompt and sampling parameters.
|
||||
|
||||
Args:
|
||||
request: The predict request.
|
||||
context: The gRPC context.
|
||||
|
||||
Returns:
|
||||
backend_pb2.Result: The predict result.
|
||||
"""
|
||||
if request.TopP == 0:
|
||||
request.TopP = 0.9
|
||||
|
||||
max_tokens = request.Tokens
|
||||
|
||||
if request.Tokens == 0:
|
||||
max_tokens = 2000
|
||||
|
||||
# encoded_input = self.tokenizer(request.Prompt)
|
||||
tokens = self.tokenizer(request.Prompt, return_tensors="pt")
|
||||
input_ids = tokens.input_ids.to(device="cuda")
|
||||
out = self.model.generate(input_ids=input_ids, max_length=max_tokens, temperature=request.Temperature,
|
||||
top_p=request.TopP, eos_token_id=self.tokenizer.eos_token_id)
|
||||
|
||||
decoded = self.tokenizer.batch_decode(out)
|
||||
|
||||
generated_text = decoded[0]
|
||||
|
||||
# Remove prompt from response if present
|
||||
if request.Prompt in generated_text:
|
||||
generated_text = generated_text.replace(request.Prompt, "")
|
||||
|
||||
return backend_pb2.Reply(message=bytes(generated_text, encoding='utf-8'))
|
||||
|
||||
def PredictStream(self, request, context):
|
||||
"""
|
||||
Generates text based on the given prompt and sampling parameters, and streams the results.
|
||||
|
||||
Args:
|
||||
request: The predict stream request.
|
||||
context: The gRPC context.
|
||||
|
||||
Returns:
|
||||
backend_pb2.Result: The predict stream result.
|
||||
"""
|
||||
yield self.Predict(request, context)
|
||||
|
||||
def serve(address):
|
||||
server = grpc.server(futures.ThreadPoolExecutor(max_workers=MAX_WORKERS))
|
||||
backend_pb2_grpc.add_BackendServicer_to_server(BackendServicer(), server)
|
||||
server.add_insecure_port(address)
|
||||
server.start()
|
||||
print("Server started. Listening on: " + address, file=sys.stderr)
|
||||
|
||||
# Define the signal handler function
|
||||
def signal_handler(sig, frame):
|
||||
print("Received termination signal. Shutting down...")
|
||||
server.stop(0)
|
||||
sys.exit(0)
|
||||
|
||||
# Set the signal handlers for SIGINT and SIGTERM
|
||||
signal.signal(signal.SIGINT, signal_handler)
|
||||
signal.signal(signal.SIGTERM, signal_handler)
|
||||
|
||||
try:
|
||||
while True:
|
||||
time.sleep(_ONE_DAY_IN_SECONDS)
|
||||
except KeyboardInterrupt:
|
||||
server.stop(0)
|
||||
|
||||
if __name__ == "__main__":
|
||||
parser = argparse.ArgumentParser(description="Run the gRPC server.")
|
||||
parser.add_argument(
|
||||
"--addr", default="localhost:50051", help="The address to bind the server to."
|
||||
)
|
||||
args = parser.parse_args()
|
||||
|
||||
serve(args.addr)
|
@ -1,9 +0,0 @@
|
||||
#!/bin/bash
|
||||
set -e
|
||||
|
||||
LIMIT_TARGETS="cublas"
|
||||
EXTRA_PIP_INSTALL_FLAGS="--no-build-isolation"
|
||||
|
||||
source $(dirname $0)/../common/libbackend.sh
|
||||
|
||||
installRequirements
|
@ -1,2 +0,0 @@
|
||||
causal-conv1d==1.4.0
|
||||
mamba-ssm==2.2.2
|
@ -1,2 +0,0 @@
|
||||
torch==2.4.1
|
||||
transformers
|
@ -1,3 +0,0 @@
|
||||
--extra-index-url https://download.pytorch.org/whl/cu118
|
||||
torch==2.4.1+cu118
|
||||
transformers
|
@ -1,2 +0,0 @@
|
||||
torch==2.4.1
|
||||
transformers
|
@ -1,6 +0,0 @@
|
||||
# mabma does not specify it's build dependencies per PEP517, so we need to disable build isolation
|
||||
# this also means that we need to install the basic build dependencies into the venv ourselves
|
||||
# https://github.com/Dao-AILab/causal-conv1d/issues/24
|
||||
packaging
|
||||
setuptools
|
||||
wheel
|
@ -1,6 +0,0 @@
|
||||
#!/bin/bash
|
||||
LIMIT_TARGETS="cublas"
|
||||
|
||||
source $(dirname $0)/../common/libbackend.sh
|
||||
|
||||
startBackend $@
|
@ -1,76 +0,0 @@
|
||||
import unittest
|
||||
import subprocess
|
||||
import time
|
||||
import backend_pb2
|
||||
import backend_pb2_grpc
|
||||
|
||||
import grpc
|
||||
|
||||
import unittest
|
||||
import subprocess
|
||||
import time
|
||||
import grpc
|
||||
import backend_pb2_grpc
|
||||
import backend_pb2
|
||||
|
||||
class TestBackendServicer(unittest.TestCase):
|
||||
"""
|
||||
TestBackendServicer is the class that tests the gRPC service.
|
||||
|
||||
This class contains methods to test the startup and shutdown of the gRPC service.
|
||||
"""
|
||||
def setUp(self):
|
||||
self.service = subprocess.Popen(["python", "backend.py", "--addr", "localhost:50051"])
|
||||
time.sleep(10)
|
||||
|
||||
def tearDown(self) -> None:
|
||||
self.service.terminate()
|
||||
self.service.wait()
|
||||
|
||||
def test_server_startup(self):
|
||||
try:
|
||||
self.setUp()
|
||||
with grpc.insecure_channel("localhost:50051") as channel:
|
||||
stub = backend_pb2_grpc.BackendStub(channel)
|
||||
response = stub.Health(backend_pb2.HealthMessage())
|
||||
self.assertEqual(response.message, b'OK')
|
||||
except Exception as err:
|
||||
print(err)
|
||||
self.fail("Server failed to start")
|
||||
finally:
|
||||
self.tearDown()
|
||||
def test_load_model(self):
|
||||
"""
|
||||
This method tests if the model is loaded successfully
|
||||
"""
|
||||
try:
|
||||
self.setUp()
|
||||
with grpc.insecure_channel("localhost:50051") as channel:
|
||||
stub = backend_pb2_grpc.BackendStub(channel)
|
||||
response = stub.LoadModel(backend_pb2.ModelOptions(Model="facebook/opt-125m"))
|
||||
self.assertTrue(response.success)
|
||||
self.assertEqual(response.message, "Model loaded successfully")
|
||||
except Exception as err:
|
||||
print(err)
|
||||
self.fail("LoadModel service failed")
|
||||
finally:
|
||||
self.tearDown()
|
||||
|
||||
def test_text(self):
|
||||
"""
|
||||
This method tests if the embeddings are generated successfully
|
||||
"""
|
||||
try:
|
||||
self.setUp()
|
||||
with grpc.insecure_channel("localhost:50051") as channel:
|
||||
stub = backend_pb2_grpc.BackendStub(channel)
|
||||
response = stub.LoadModel(backend_pb2.ModelOptions(Model="facebook/opt-125m"))
|
||||
self.assertTrue(response.success)
|
||||
req = backend_pb2.PredictOptions(Prompt="The capital of France is")
|
||||
resp = stub.Predict(req)
|
||||
self.assertIsNotNone(resp.message)
|
||||
except Exception as err:
|
||||
print(err)
|
||||
self.fail("text service failed")
|
||||
finally:
|
||||
self.tearDown()
|
@ -1,158 +0,0 @@
|
||||
#!/usr/bin/env python3
|
||||
"""
|
||||
Extra gRPC server for OpenVoice models.
|
||||
"""
|
||||
from concurrent import futures
|
||||
|
||||
import argparse
|
||||
import signal
|
||||
import sys
|
||||
import os
|
||||
import torch
|
||||
from openvoice import se_extractor
|
||||
from openvoice.api import ToneColorConverter
|
||||
from melo.api import TTS
|
||||
|
||||
import time
|
||||
import backend_pb2
|
||||
import backend_pb2_grpc
|
||||
|
||||
import grpc
|
||||
|
||||
|
||||
_ONE_DAY_IN_SECONDS = 60 * 60 * 24
|
||||
|
||||
# If MAX_WORKERS are specified in the environment use it, otherwise default to 1
|
||||
MAX_WORKERS = int(os.environ.get('PYTHON_GRPC_MAX_WORKERS', '1'))
|
||||
|
||||
# Implement the BackendServicer class with the service methods
|
||||
class BackendServicer(backend_pb2_grpc.BackendServicer):
|
||||
"""
|
||||
A gRPC servicer for the backend service.
|
||||
|
||||
This class implements the gRPC methods for the backend service, including Health, LoadModel, and Embedding.
|
||||
"""
|
||||
def Health(self, request, context):
|
||||
"""
|
||||
A gRPC method that returns the health status of the backend service.
|
||||
|
||||
Args:
|
||||
request: A HealthRequest object that contains the request parameters.
|
||||
context: A grpc.ServicerContext object that provides information about the RPC.
|
||||
|
||||
Returns:
|
||||
A Reply object that contains the health status of the backend service.
|
||||
"""
|
||||
return backend_pb2.Reply(message=bytes("OK", 'utf-8'))
|
||||
|
||||
def LoadModel(self, request, context):
|
||||
"""
|
||||
A gRPC method that loads a model into memory.
|
||||
|
||||
Args:
|
||||
request: A LoadModelRequest object that contains the request parameters.
|
||||
context: A grpc.ServicerContext object that provides information about the RPC.
|
||||
|
||||
Returns:
|
||||
A Result object that contains the result of the LoadModel operation.
|
||||
"""
|
||||
model_name = request.Model
|
||||
try:
|
||||
|
||||
self.clonedVoice = False
|
||||
# Assume directory from request.ModelFile.
|
||||
# Only if request.LoraAdapter it's not an absolute path
|
||||
if request.AudioPath and request.ModelFile != "" and not os.path.isabs(request.AudioPath):
|
||||
# get base path of modelFile
|
||||
modelFileBase = os.path.dirname(request.ModelFile)
|
||||
request.AudioPath = os.path.join(modelFileBase, request.AudioPath)
|
||||
if request.AudioPath != "":
|
||||
self.clonedVoice = True
|
||||
|
||||
self.modelpath = request.ModelFile
|
||||
self.speaker = request.Type
|
||||
self.ClonedVoicePath = request.AudioPath
|
||||
|
||||
ckpt_converter = request.Model+'/converter'
|
||||
device = "cuda:0" if torch.cuda.is_available() else "cpu"
|
||||
self.device = device
|
||||
self.tone_color_converter = None
|
||||
if self.clonedVoice:
|
||||
self.tone_color_converter = ToneColorConverter(f'{ckpt_converter}/config.json', device=device)
|
||||
self.tone_color_converter.load_ckpt(f'{ckpt_converter}/checkpoint.pth')
|
||||
|
||||
except Exception as err:
|
||||
return backend_pb2.Result(success=False, message=f"Unexpected {err=}, {type(err)=}")
|
||||
|
||||
return backend_pb2.Result(message="Model loaded successfully", success=True)
|
||||
|
||||
def TTS(self, request, context):
|
||||
model_name = request.model
|
||||
if model_name == "":
|
||||
return backend_pb2.Result(success=False, message="request.model is required")
|
||||
try:
|
||||
# Speed is adjustable
|
||||
speed = 1.0
|
||||
voice = "EN"
|
||||
if request.voice:
|
||||
voice = request.voice
|
||||
model = TTS(language=voice, device=self.device)
|
||||
speaker_ids = model.hps.data.spk2id
|
||||
speaker_key = self.speaker
|
||||
modelpath = self.modelpath
|
||||
for s in speaker_ids.keys():
|
||||
print(f"Speaker: {s} - ID: {speaker_ids[s]}")
|
||||
speaker_id = speaker_ids[speaker_key]
|
||||
speaker_key = speaker_key.lower().replace('_', '-')
|
||||
source_se = torch.load(f'{modelpath}/base_speakers/ses/{speaker_key}.pth', map_location=self.device)
|
||||
model.tts_to_file(request.text, speaker_id, request.dst, speed=speed)
|
||||
if self.clonedVoice:
|
||||
reference_speaker = self.ClonedVoicePath
|
||||
target_se, audio_name = se_extractor.get_se(reference_speaker, self.tone_color_converter, vad=False)
|
||||
# Run the tone color converter
|
||||
encode_message = "@MyShell"
|
||||
self.tone_color_converter.convert(
|
||||
audio_src_path=request.dst,
|
||||
src_se=source_se,
|
||||
tgt_se=target_se,
|
||||
output_path=request.dst,
|
||||
message=encode_message)
|
||||
|
||||
print("[OpenVoice] TTS generated!", file=sys.stderr)
|
||||
print("[OpenVoice] TTS saved to", request.dst, file=sys.stderr)
|
||||
print(request, file=sys.stderr)
|
||||
except Exception as err:
|
||||
return backend_pb2.Result(success=False, message=f"Unexpected {err=}, {type(err)=}")
|
||||
return backend_pb2.Result(success=True)
|
||||
|
||||
def serve(address):
|
||||
server = grpc.server(futures.ThreadPoolExecutor(max_workers=MAX_WORKERS))
|
||||
backend_pb2_grpc.add_BackendServicer_to_server(BackendServicer(), server)
|
||||
server.add_insecure_port(address)
|
||||
server.start()
|
||||
print("[OpenVoice] Server started. Listening on: " + address, file=sys.stderr)
|
||||
|
||||
# Define the signal handler function
|
||||
def signal_handler(sig, frame):
|
||||
print("[OpenVoice] Received termination signal. Shutting down...")
|
||||
server.stop(0)
|
||||
sys.exit(0)
|
||||
|
||||
# Set the signal handlers for SIGINT and SIGTERM
|
||||
signal.signal(signal.SIGINT, signal_handler)
|
||||
signal.signal(signal.SIGTERM, signal_handler)
|
||||
|
||||
try:
|
||||
while True:
|
||||
time.sleep(_ONE_DAY_IN_SECONDS)
|
||||
except KeyboardInterrupt:
|
||||
server.stop(0)
|
||||
|
||||
if __name__ == "__main__":
|
||||
parser = argparse.ArgumentParser(description="Run the gRPC server.")
|
||||
parser.add_argument(
|
||||
"--addr", default="localhost:50051", help="The address to bind the server to."
|
||||
)
|
||||
args = parser.parse_args()
|
||||
print(f"[OpenVoice] startup: {args}", file=sys.stderr)
|
||||
serve(args.addr)
|
@ -1,7 +0,0 @@
|
||||
torch==2.4.1
|
||||
git+https://github.com/myshell-ai/MeloTTS.git
|
||||
git+https://github.com/myshell-ai/OpenVoice.git
|
||||
whisper-timestamped
|
||||
pydub==0.25.1
|
||||
wavmark==0.0.3
|
||||
eng_to_ipa==0.0.2
|
@ -1,8 +0,0 @@
|
||||
--extra-index-url https://download.pytorch.org/whl/cu118
|
||||
torch==2.4.1+cu118
|
||||
git+https://github.com/myshell-ai/MeloTTS.git
|
||||
git+https://github.com/myshell-ai/OpenVoice.git
|
||||
whisper-timestamped
|
||||
pydub==0.25.1
|
||||
wavmark==0.0.3
|
||||
eng_to_ipa==0.0.2
|
@ -1,7 +0,0 @@
|
||||
torch==2.4.1
|
||||
git+https://github.com/myshell-ai/MeloTTS.git
|
||||
git+https://github.com/myshell-ai/OpenVoice.git
|
||||
whisper-timestamped
|
||||
pydub==0.25.1
|
||||
wavmark==0.0.3
|
||||
eng_to_ipa==0.0.2
|
@ -1,8 +0,0 @@
|
||||
--extra-index-url https://download.pytorch.org/whl/rocm6.0
|
||||
torch==2.4.1+rocm6.0
|
||||
git+https://github.com/myshell-ai/MeloTTS.git
|
||||
git+https://github.com/myshell-ai/OpenVoice.git
|
||||
whisper-timestamped
|
||||
pydub==0.25.1
|
||||
wavmark==0.0.3
|
||||
eng_to_ipa==0.0.2
|
@ -1,24 +0,0 @@
|
||||
--extra-index-url https://pytorch-extension.intel.com/release-whl/stable/xpu/us/
|
||||
intel-extension-for-pytorch==2.3.110+xpu
|
||||
torch==2.3.1+cxx11.abi
|
||||
torchaudio==2.3.1+cxx11.abi
|
||||
oneccl_bind_pt==2.3.100+xpu
|
||||
optimum[openvino]
|
||||
grpcio==1.69.0
|
||||
protobuf
|
||||
librosa==0.9.1
|
||||
faster-whisper==0.9.0
|
||||
pydub==0.25.1
|
||||
wavmark==0.0.3
|
||||
eng_to_ipa==0.0.2
|
||||
inflect==7.0.0
|
||||
unidecode==1.3.7
|
||||
whisper-timestamped==1.14.2
|
||||
openai
|
||||
python-dotenv
|
||||
pypinyin==0.50.0
|
||||
cn2an==0.5.22
|
||||
jieba==0.42.1
|
||||
langid==1.1.6
|
||||
git+https://github.com/myshell-ai/MeloTTS.git
|
||||
git+https://github.com/myshell-ai/OpenVoice.git
|
@ -1,17 +0,0 @@
|
||||
grpcio==1.69.0
|
||||
protobuf
|
||||
librosa
|
||||
faster-whisper
|
||||
inflect
|
||||
unidecode
|
||||
openai
|
||||
python-dotenv
|
||||
pypinyin
|
||||
cn2an==0.5.22
|
||||
numpy==1.22.0
|
||||
networkx==2.8.8
|
||||
jieba==0.42.1
|
||||
gradio==5.9.1
|
||||
langid==1.1.6
|
||||
llvmlite==0.43.0
|
||||
setuptools
|
@ -1,82 +0,0 @@
|
||||
"""
|
||||
A test script to test the gRPC service
|
||||
"""
|
||||
import unittest
|
||||
import subprocess
|
||||
import time
|
||||
import backend_pb2
|
||||
import backend_pb2_grpc
|
||||
|
||||
import grpc
|
||||
|
||||
|
||||
class TestBackendServicer(unittest.TestCase):
|
||||
"""
|
||||
TestBackendServicer is the class that tests the gRPC service
|
||||
"""
|
||||
def setUp(self):
|
||||
"""
|
||||
This method sets up the gRPC service by starting the server
|
||||
"""
|
||||
self.service = subprocess.Popen(["python3", "backend.py", "--addr", "localhost:50051"])
|
||||
time.sleep(30)
|
||||
|
||||
def tearDown(self) -> None:
|
||||
"""
|
||||
This method tears down the gRPC service by terminating the server
|
||||
"""
|
||||
self.service.terminate()
|
||||
self.service.wait()
|
||||
|
||||
def test_server_startup(self):
|
||||
"""
|
||||
This method tests if the server starts up successfully
|
||||
"""
|
||||
try:
|
||||
self.setUp()
|
||||
with grpc.insecure_channel("localhost:50051") as channel:
|
||||
stub = backend_pb2_grpc.BackendStub(channel)
|
||||
response = stub.Health(backend_pb2.HealthMessage())
|
||||
self.assertEqual(response.message, b'OK')
|
||||
except Exception as err:
|
||||
print(err)
|
||||
self.fail("Server failed to start")
|
||||
finally:
|
||||
self.tearDown()
|
||||
|
||||
def test_load_model(self):
|
||||
"""
|
||||
This method tests if the model is loaded successfully
|
||||
"""
|
||||
try:
|
||||
self.setUp()
|
||||
with grpc.insecure_channel("localhost:50051") as channel:
|
||||
stub = backend_pb2_grpc.BackendStub(channel)
|
||||
response = stub.LoadModel(backend_pb2.ModelOptions(Model="checkpoints_v2",
|
||||
Type="en-us"))
|
||||
self.assertTrue(response.success)
|
||||
self.assertEqual(response.message, "Model loaded successfully")
|
||||
except Exception as err:
|
||||
print(err)
|
||||
self.fail("LoadModel service failed")
|
||||
finally:
|
||||
self.tearDown()
|
||||
|
||||
def test_tts(self):
|
||||
"""
|
||||
This method tests if the embeddings are generated successfully
|
||||
"""
|
||||
try:
|
||||
self.setUp()
|
||||
with grpc.insecure_channel("localhost:50051") as channel:
|
||||
stub = backend_pb2_grpc.BackendStub(channel)
|
||||
response = stub.LoadModel(backend_pb2.ModelOptions(Model="dingzhen"))
|
||||
self.assertTrue(response.success)
|
||||
tts_request = backend_pb2.TTSRequest(text="80s TV news production music hit for tonight's biggest story", voice="EN")
|
||||
tts_response = stub.TTS(tts_request)
|
||||
self.assertIsNotNone(tts_response)
|
||||
except Exception as err:
|
||||
print(err)
|
||||
self.fail("TTS service failed")
|
||||
finally:
|
||||
self.tearDown()
|
@ -1,12 +0,0 @@
|
||||
#!/bin/bash
|
||||
set -e
|
||||
|
||||
source $(dirname $0)/../common/libbackend.sh
|
||||
|
||||
# Download checkpoints if not present
|
||||
if [ ! -d "checkpoints_v2" ]; then
|
||||
wget https://myshell-public-repo-host.s3.amazonaws.com/openvoice/checkpoints_v2_0417.zip -O checkpoints_v2.zip
|
||||
unzip checkpoints_v2.zip
|
||||
fi
|
||||
|
||||
runUnittests
|
@ -1,44 +0,0 @@
|
||||
export CONDA_ENV_PATH = "parler.yml"
|
||||
SKIP_CONDA?=0
|
||||
ifeq ($(BUILD_TYPE), cublas)
|
||||
export CONDA_ENV_PATH = "parler-nvidia.yml"
|
||||
endif
|
||||
|
||||
# Intel GPU are supposed to have dependencies installed in the main python
|
||||
# environment, so we skip conda installation for SYCL builds.
|
||||
# https://github.com/intel/intel-extension-for-pytorch/issues/538
|
||||
ifneq (,$(findstring sycl,$(BUILD_TYPE)))
|
||||
export SKIP_CONDA=1
|
||||
endif
|
||||
|
||||
.PHONY: parler-tts
|
||||
parler-tts:
|
||||
@echo "Installing $(CONDA_ENV_PATH)..."
|
||||
bash install.sh $(CONDA_ENV_PATH)
|
||||
$(MAKE) protogen
|
||||
|
||||
.PHONY: run
|
||||
run: protogen
|
||||
@echo "Running transformers..."
|
||||
bash run.sh
|
||||
@echo "transformers run."
|
||||
|
||||
.PHONY: test
|
||||
test: protogen
|
||||
@echo "Testing transformers..."
|
||||
bash test.sh
|
||||
@echo "transformers tested."
|
||||
|
||||
.PHONY: protogen
|
||||
protogen: backend_pb2_grpc.py backend_pb2.py
|
||||
|
||||
.PHONY: protogen-clean
|
||||
protogen-clean:
|
||||
$(RM) backend_pb2_grpc.py backend_pb2.py
|
||||
|
||||
backend_pb2_grpc.py backend_pb2.py:
|
||||
bash protogen.sh
|
||||
|
||||
.PHONY: clean
|
||||
clean: protogen-clean
|
||||
$(RM) -r venv __pycache__
|
@ -1,125 +0,0 @@
|
||||
#!/usr/bin/env python3
|
||||
"""
|
||||
Extra gRPC server for MusicgenForConditionalGeneration models.
|
||||
"""
|
||||
from concurrent import futures
|
||||
|
||||
import argparse
|
||||
import signal
|
||||
import sys
|
||||
import os
|
||||
|
||||
import time
|
||||
import backend_pb2
|
||||
import backend_pb2_grpc
|
||||
|
||||
import grpc
|
||||
|
||||
from scipy.io.wavfile import write as write_wav
|
||||
|
||||
from parler_tts import ParlerTTSForConditionalGeneration
|
||||
from transformers import AutoTokenizer
|
||||
import soundfile as sf
|
||||
import torch
|
||||
|
||||
_ONE_DAY_IN_SECONDS = 60 * 60 * 24
|
||||
|
||||
# If MAX_WORKERS are specified in the environment use it, otherwise default to 1
|
||||
MAX_WORKERS = int(os.environ.get('PYTHON_GRPC_MAX_WORKERS', '1'))
|
||||
|
||||
# Implement the BackendServicer class with the service methods
|
||||
class BackendServicer(backend_pb2_grpc.BackendServicer):
|
||||
"""
|
||||
A gRPC servicer for the backend service.
|
||||
|
||||
This class implements the gRPC methods for the backend service, including Health, LoadModel, and Embedding.
|
||||
"""
|
||||
def Health(self, request, context):
|
||||
"""
|
||||
A gRPC method that returns the health status of the backend service.
|
||||
|
||||
Args:
|
||||
request: A HealthRequest object that contains the request parameters.
|
||||
context: A grpc.ServicerContext object that provides information about the RPC.
|
||||
|
||||
Returns:
|
||||
A Reply object that contains the health status of the backend service.
|
||||
"""
|
||||
return backend_pb2.Reply(message=bytes("OK", 'utf-8'))
|
||||
|
||||
def LoadModel(self, request, context):
|
||||
"""
|
||||
A gRPC method that loads a model into memory.
|
||||
|
||||
Args:
|
||||
request: A LoadModelRequest object that contains the request parameters.
|
||||
context: A grpc.ServicerContext object that provides information about the RPC.
|
||||
|
||||
Returns:
|
||||
A Result object that contains the result of the LoadModel operation.
|
||||
"""
|
||||
model_name = request.Model
|
||||
device = "cuda:0" if torch.cuda.is_available() else "cpu"
|
||||
try:
|
||||
self.model = ParlerTTSForConditionalGeneration.from_pretrained(model_name).to(device)
|
||||
self.tokenizer = AutoTokenizer.from_pretrained(model_name)
|
||||
except Exception as err:
|
||||
return backend_pb2.Result(success=False, message=f"Unexpected {err=}, {type(err)=}")
|
||||
|
||||
return backend_pb2.Result(message="Model loaded successfully", success=True)
|
||||
|
||||
def TTS(self, request, context):
|
||||
model_name = request.model
|
||||
voice = request.voice
|
||||
if voice == "":
|
||||
voice = "A female speaker with a slightly low-pitched voice delivers her words quite expressively, in a very confined sounding environment with clear audio quality. She speaks very fast."
|
||||
if model_name == "":
|
||||
return backend_pb2.Result(success=False, message="request.model is required")
|
||||
try:
|
||||
device = "cuda:0" if torch.cuda.is_available() else "cpu"
|
||||
input_ids = self.tokenizer(voice, return_tensors="pt").input_ids.to(device)
|
||||
prompt_input_ids = self.tokenizer(request.text, return_tensors="pt").input_ids.to(device)
|
||||
|
||||
generation = self.model.generate(input_ids=input_ids, prompt_input_ids=prompt_input_ids)
|
||||
audio_arr = generation.cpu().numpy().squeeze()
|
||||
print("[parler-tts] TTS generated!", file=sys.stderr)
|
||||
sf.write(request.dst, audio_arr, self.model.config.sampling_rate)
|
||||
print("[parler-tts] TTS saved to", request.dst, file=sys.stderr)
|
||||
print("[parler-tts] TTS for", file=sys.stderr)
|
||||
print(request, file=sys.stderr)
|
||||
except Exception as err:
|
||||
return backend_pb2.Result(success=False, message=f"Unexpected {err=}, {type(err)=}")
|
||||
return backend_pb2.Result(success=True)
|
||||
|
||||
|
||||
def serve(address):
|
||||
server = grpc.server(futures.ThreadPoolExecutor(max_workers=MAX_WORKERS))
|
||||
backend_pb2_grpc.add_BackendServicer_to_server(BackendServicer(), server)
|
||||
server.add_insecure_port(address)
|
||||
server.start()
|
||||
print("[parler-tts] Server started. Listening on: " + address, file=sys.stderr)
|
||||
|
||||
# Define the signal handler function
|
||||
def signal_handler(sig, frame):
|
||||
print("[parler-tts] Received termination signal. Shutting down...")
|
||||
server.stop(0)
|
||||
sys.exit(0)
|
||||
|
||||
# Set the signal handlers for SIGINT and SIGTERM
|
||||
signal.signal(signal.SIGINT, signal_handler)
|
||||
signal.signal(signal.SIGTERM, signal_handler)
|
||||
|
||||
try:
|
||||
while True:
|
||||
time.sleep(_ONE_DAY_IN_SECONDS)
|
||||
except KeyboardInterrupt:
|
||||
server.stop(0)
|
||||
|
||||
if __name__ == "__main__":
|
||||
parser = argparse.ArgumentParser(description="Run the gRPC server.")
|
||||
parser.add_argument(
|
||||
"--addr", default="localhost:50051", help="The address to bind the server to."
|
||||
)
|
||||
args = parser.parse_args()
|
||||
print(f"[parler-tts] startup: {args}", file=sys.stderr)
|
||||
serve(args.addr)
|
@ -1,28 +0,0 @@
|
||||
#!/bin/bash
|
||||
set -e
|
||||
|
||||
source $(dirname $0)/../common/libbackend.sh
|
||||
|
||||
# This is here because the Intel pip index is broken and returns 200 status codes for every package name, it just doesn't return any package links.
|
||||
# This makes uv think that the package exists in the Intel pip index, and by default it stops looking at other pip indexes once it finds a match.
|
||||
# We need uv to continue falling through to the pypi default index to find optimum[openvino] in the pypi index
|
||||
# the --upgrade actually allows us to *downgrade* torch to the version provided in the Intel pip index
|
||||
if [ "x${BUILD_PROFILE}" == "xintel" ]; then
|
||||
EXTRA_PIP_INSTALL_FLAGS+=" --upgrade --index-strategy=unsafe-first-match"
|
||||
fi
|
||||
|
||||
|
||||
installRequirements
|
||||
|
||||
|
||||
# https://github.com/descriptinc/audiotools/issues/101
|
||||
# incompatible protobuf versions.
|
||||
PYDIR=python3.10
|
||||
pyenv="${MY_DIR}/venv/lib/${PYDIR}/site-packages/google/protobuf/internal/"
|
||||
|
||||
if [ ! -d ${pyenv} ]; then
|
||||
echo "(parler-tts/install.sh): Error: ${pyenv} does not exist"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
curl -L https://raw.githubusercontent.com/protocolbuffers/protobuf/main/python/google/protobuf/internal/builder.py -o ${pyenv}/builder.py
|
@ -1,4 +0,0 @@
|
||||
git+https://github.com/huggingface/parler-tts.git@8e465f1b5fcd223478e07175cb40494d19ffbe17
|
||||
llvmlite==0.43.0
|
||||
numba==0.60.0
|
||||
grpcio-tools==1.42.0
|
@ -1,3 +0,0 @@
|
||||
transformers
|
||||
accelerate
|
||||
torch==2.4.1
|
@ -1,5 +0,0 @@
|
||||
--extra-index-url https://download.pytorch.org/whl/cu118
|
||||
torch==2.4.1+cu118
|
||||
torchaudio==2.4.1+cu118
|
||||
transformers
|
||||
accelerate
|
@ -1,4 +0,0 @@
|
||||
torch==2.4.1
|
||||
torchaudio==2.4.1
|
||||
transformers
|
||||
accelerate
|
@ -1,5 +0,0 @@
|
||||
--extra-index-url https://download.pytorch.org/whl/rocm6.0
|
||||
torch==2.3.0+rocm6.0
|
||||
torchaudio==2.3.0+rocm6.0
|
||||
transformers
|
||||
accelerate
|
@ -1,4 +0,0 @@
|
||||
grpcio==1.69.0
|
||||
certifi
|
||||
llvmlite==0.43.0
|
||||
setuptools
|
@ -1,4 +0,0 @@
|
||||
#!/bin/bash
|
||||
source $(dirname $0)/../common/libbackend.sh
|
||||
|
||||
startBackend $@
|
@ -1,81 +0,0 @@
|
||||
"""
|
||||
A test script to test the gRPC service
|
||||
"""
|
||||
import unittest
|
||||
import subprocess
|
||||
import time
|
||||
import backend_pb2
|
||||
import backend_pb2_grpc
|
||||
|
||||
import grpc
|
||||
|
||||
|
||||
class TestBackendServicer(unittest.TestCase):
|
||||
"""
|
||||
TestBackendServicer is the class that tests the gRPC service
|
||||
"""
|
||||
def setUp(self):
|
||||
"""
|
||||
This method sets up the gRPC service by starting the server
|
||||
"""
|
||||
self.service = subprocess.Popen(["python3", "backend.py", "--addr", "localhost:50051"])
|
||||
time.sleep(10)
|
||||
|
||||
def tearDown(self) -> None:
|
||||
"""
|
||||
This method tears down the gRPC service by terminating the server
|
||||
"""
|
||||
self.service.terminate()
|
||||
self.service.wait()
|
||||
|
||||
def test_server_startup(self):
|
||||
"""
|
||||
This method tests if the server starts up successfully
|
||||
"""
|
||||
try:
|
||||
self.setUp()
|
||||
with grpc.insecure_channel("localhost:50051") as channel:
|
||||
stub = backend_pb2_grpc.BackendStub(channel)
|
||||
response = stub.Health(backend_pb2.HealthMessage())
|
||||
self.assertEqual(response.message, b'OK')
|
||||
except Exception as err:
|
||||
print(err)
|
||||
self.fail("Server failed to start")
|
||||
finally:
|
||||
self.tearDown()
|
||||
|
||||
def test_load_model(self):
|
||||
"""
|
||||
This method tests if the model is loaded successfully
|
||||
"""
|
||||
try:
|
||||
self.setUp()
|
||||
with grpc.insecure_channel("localhost:50051") as channel:
|
||||
stub = backend_pb2_grpc.BackendStub(channel)
|
||||
response = stub.LoadModel(backend_pb2.ModelOptions(Model="parler-tts/parler_tts_mini_v0.1"))
|
||||
self.assertTrue(response.success)
|
||||
self.assertEqual(response.message, "Model loaded successfully")
|
||||
except Exception as err:
|
||||
print(err)
|
||||
self.fail("LoadModel service failed")
|
||||
finally:
|
||||
self.tearDown()
|
||||
|
||||
def test_tts(self):
|
||||
"""
|
||||
This method tests if the embeddings are generated successfully
|
||||
"""
|
||||
try:
|
||||
self.setUp()
|
||||
with grpc.insecure_channel("localhost:50051") as channel:
|
||||
stub = backend_pb2_grpc.BackendStub(channel)
|
||||
response = stub.LoadModel(backend_pb2.ModelOptions(Model="parler-tts/parler_tts_mini_v0.1"))
|
||||
self.assertTrue(response.success)
|
||||
tts_request = backend_pb2.TTSRequest(text="Hey, how are you doing today?")
|
||||
tts_response = stub.TTS(tts_request)
|
||||
self.assertIsNotNone(tts_response)
|
||||
except Exception as err:
|
||||
print(err)
|
||||
self.fail("TTS service failed")
|
||||
finally:
|
||||
self.tearDown()
|
@ -1,6 +0,0 @@
|
||||
#!/bin/bash
|
||||
set -e
|
||||
|
||||
source $(dirname $0)/../common/libbackend.sh
|
||||
|
||||
runUnittests
|
@ -21,7 +21,7 @@ import torch.cuda
|
||||
|
||||
|
||||
XPU=os.environ.get("XPU", "0") == "1"
|
||||
from transformers import AutoTokenizer, AutoModel, set_seed, TextIteratorStreamer, StoppingCriteriaList, StopStringCriteria
|
||||
from transformers import AutoTokenizer, AutoModel, set_seed, TextIteratorStreamer, StoppingCriteriaList, StopStringCriteria, MambaConfig, MambaForCausalLM
|
||||
from transformers import AutoProcessor, MusicgenForConditionalGeneration
|
||||
from scipy.io import wavfile
|
||||
import outetts
|
||||
@ -245,6 +245,10 @@ class BackendServicer(backend_pb2_grpc.BackendServicer):
|
||||
autoTokenizer = False
|
||||
self.model = SentenceTransformer(model_name, trust_remote_code=request.TrustRemoteCode)
|
||||
self.SentenceTransformer = True
|
||||
elif request.Type == "Mamba":
|
||||
autoTokenizer = False
|
||||
self.tokenizer = AutoTokenizer.from_pretrained(model_name)
|
||||
self.model = MambaForCausalLM.from_pretrained(model_name)
|
||||
else:
|
||||
print("Automodel", file=sys.stderr)
|
||||
self.model = AutoModel.from_pretrained(model_name,
|
||||
|
@ -1,7 +1,8 @@
|
||||
torch==2.4.1
|
||||
llvmlite==0.43.0
|
||||
numba==0.60.0
|
||||
accelerate
|
||||
transformers
|
||||
bitsandbytes
|
||||
outetts
|
||||
sentence-transformers==3.3.1
|
||||
sentence-transformers==3.3.1
|
@ -1,6 +1,7 @@
|
||||
--extra-index-url https://download.pytorch.org/whl/cu118
|
||||
torch==2.4.1+cu118
|
||||
llvmlite==0.43.0
|
||||
numba==0.60.0
|
||||
accelerate
|
||||
transformers
|
||||
bitsandbytes
|
||||
|
@ -1,6 +1,7 @@
|
||||
torch==2.4.1
|
||||
accelerate
|
||||
llvmlite==0.43.0
|
||||
numba==0.60.0
|
||||
transformers
|
||||
bitsandbytes
|
||||
outetts
|
||||
|
@ -3,6 +3,7 @@ torch==2.4.1+rocm6.0
|
||||
accelerate
|
||||
transformers
|
||||
llvmlite==0.43.0
|
||||
numba==0.60.0
|
||||
bitsandbytes
|
||||
outetts
|
||||
bitsandbytes
|
||||
|
@ -4,6 +4,7 @@ torch==2.3.1+cxx11.abi
|
||||
oneccl_bind_pt==2.3.100+xpu
|
||||
optimum[openvino]
|
||||
llvmlite==0.43.0
|
||||
numba==0.60.0
|
||||
intel-extension-for-transformers
|
||||
bitsandbytes
|
||||
outetts
|
||||
|
@ -3,5 +3,4 @@ protobuf
|
||||
certifi
|
||||
setuptools
|
||||
scipy==1.15.1
|
||||
numpy>=2.0.0
|
||||
numba==0.60.0
|
||||
numpy>=2.0.0
|
@ -515,7 +515,7 @@ func (c *BackendConfig) GuessUsecases(u BackendConfigUsecases) bool {
|
||||
}
|
||||
}
|
||||
if (u & FLAG_IMAGE) == FLAG_IMAGE {
|
||||
imageBackends := []string{"diffusers", "stablediffusion"}
|
||||
imageBackends := []string{"diffusers", "stablediffusion", "stablediffusion-ggml"}
|
||||
if !slices.Contains(imageBackends, c.Backend) {
|
||||
return false
|
||||
}
|
||||
|
@ -48,5 +48,66 @@ var _ = Describe("Test cases for config related functions", func() {
|
||||
// config should includes whisper-1 models's api.config
|
||||
Expect(loadedModelNames).To(ContainElements("whisper-1"))
|
||||
})
|
||||
|
||||
It("Test new loadconfig", func() {
|
||||
|
||||
bcl := NewBackendConfigLoader(os.Getenv("MODELS_PATH"))
|
||||
err := bcl.LoadBackendConfigsFromPath(os.Getenv("MODELS_PATH"))
|
||||
Expect(err).To(BeNil())
|
||||
configs := bcl.GetAllBackendConfigs()
|
||||
loadedModelNames := []string{}
|
||||
for _, v := range configs {
|
||||
loadedModelNames = append(loadedModelNames, v.Name)
|
||||
}
|
||||
Expect(configs).ToNot(BeNil())
|
||||
totalModels := len(loadedModelNames)
|
||||
|
||||
Expect(loadedModelNames).To(ContainElements("code-search-ada-code-001"))
|
||||
|
||||
// config should includes text-embedding-ada-002 models's api.config
|
||||
Expect(loadedModelNames).To(ContainElements("text-embedding-ada-002"))
|
||||
|
||||
// config should includes rwkv_test models's api.config
|
||||
Expect(loadedModelNames).To(ContainElements("rwkv_test"))
|
||||
|
||||
// config should includes whisper-1 models's api.config
|
||||
Expect(loadedModelNames).To(ContainElements("whisper-1"))
|
||||
|
||||
// create a temp directory and store a temporary model
|
||||
tmpdir, err := os.MkdirTemp("", "test")
|
||||
Expect(err).ToNot(HaveOccurred())
|
||||
defer os.RemoveAll(tmpdir)
|
||||
|
||||
// create a temporary model
|
||||
model := `name: "test-model"
|
||||
description: "test model"
|
||||
options:
|
||||
- foo
|
||||
- bar
|
||||
- baz
|
||||
`
|
||||
modelFile := tmpdir + "/test-model.yaml"
|
||||
err = os.WriteFile(modelFile, []byte(model), 0644)
|
||||
Expect(err).ToNot(HaveOccurred())
|
||||
|
||||
err = bcl.LoadBackendConfigsFromPath(tmpdir)
|
||||
Expect(err).ToNot(HaveOccurred())
|
||||
|
||||
configs = bcl.GetAllBackendConfigs()
|
||||
Expect(len(configs)).ToNot(Equal(totalModels))
|
||||
|
||||
loadedModelNames = []string{}
|
||||
var testModel BackendConfig
|
||||
for _, v := range configs {
|
||||
loadedModelNames = append(loadedModelNames, v.Name)
|
||||
if v.Name == "test-model" {
|
||||
testModel = v
|
||||
}
|
||||
}
|
||||
Expect(loadedModelNames).To(ContainElements("test-model"))
|
||||
Expect(testModel.Description).To(Equal("test model"))
|
||||
Expect(testModel.Options).To(ContainElements("foo", "bar", "baz"))
|
||||
|
||||
})
|
||||
})
|
||||
})
|
||||
|
@ -687,6 +687,10 @@ var _ = Describe("API test", func() {
|
||||
Name: "model-gallery",
|
||||
URL: "https://raw.githubusercontent.com/go-skynet/model-gallery/main/index.yaml",
|
||||
},
|
||||
{
|
||||
Name: "localai",
|
||||
URL: "https://raw.githubusercontent.com/mudler/LocalAI/refs/heads/master/gallery/index.yaml",
|
||||
},
|
||||
}
|
||||
|
||||
application, err := application.New(
|
||||
@ -764,10 +768,8 @@ var _ = Describe("API test", func() {
|
||||
}
|
||||
|
||||
response := postModelApplyRequest("http://127.0.0.1:9090/models/apply", modelApplyRequest{
|
||||
ID: "model-gallery@stablediffusion",
|
||||
Overrides: map[string]interface{}{
|
||||
"parameters": map[string]interface{}{"model": "stablediffusion_assets"},
|
||||
},
|
||||
ID: "localai@sd-1.5-ggml",
|
||||
Name: "stablediffusion",
|
||||
})
|
||||
|
||||
Expect(response["uuid"]).ToNot(BeEmpty(), fmt.Sprint(response))
|
||||
@ -778,14 +780,14 @@ var _ = Describe("API test", func() {
|
||||
response := getModelStatus("http://127.0.0.1:9090/models/jobs/" + uuid)
|
||||
fmt.Println(response)
|
||||
return response["processed"].(bool)
|
||||
}, "360s", "10s").Should(Equal(true))
|
||||
}, "1200s", "10s").Should(Equal(true))
|
||||
|
||||
resp, err := http.Post(
|
||||
"http://127.0.0.1:9090/v1/images/generations",
|
||||
"application/json",
|
||||
bytes.NewBuffer([]byte(`{
|
||||
"prompt": "floating hair, portrait, ((loli)), ((one girl)), cute face, hidden hands, asymmetrical bangs, beautiful detailed eyes, eye shadow, hair ornament, ribbons, bowties, buttons, pleated skirt, (((masterpiece))), ((best quality)), colorful|((part of the head)), ((((mutated hands and fingers)))), deformed, blurry, bad anatomy, disfigured, poorly drawn face, mutation, mutated, extra limb, ugly, poorly drawn hands, missing limb, blurry, floating limbs, disconnected limbs, malformed hands, blur, out of focus, long neck, long body, Octane renderer, lowres, bad anatomy, bad hands, text",
|
||||
"mode": 2, "seed":9000,
|
||||
"prompt": "a lovely cat",
|
||||
"step": 1, "seed":9000,
|
||||
"size": "256x256", "n":2}`)))
|
||||
// The response should contain an URL
|
||||
Expect(err).ToNot(HaveOccurred(), fmt.Sprint(resp))
|
||||
@ -794,6 +796,7 @@ var _ = Describe("API test", func() {
|
||||
|
||||
imgUrlResp := &schema.OpenAIResponse{}
|
||||
err = json.Unmarshal(dat, imgUrlResp)
|
||||
Expect(err).ToNot(HaveOccurred(), fmt.Sprint(dat))
|
||||
Expect(imgUrlResp.Data).ToNot(Or(BeNil(), BeZero()))
|
||||
imgUrl := imgUrlResp.Data[0].URL
|
||||
Expect(imgUrl).To(ContainSubstring("http://127.0.0.1:9090/"), imgUrl)
|
||||
|
@ -28,7 +28,7 @@ func BackendMonitorEndpoint(bm *services.BackendMonitorService) func(c *fiber.Ct
|
||||
}
|
||||
}
|
||||
|
||||
// BackendMonitorEndpoint shuts down the specified backend
|
||||
// BackendShutdownEndpoint shuts down the specified backend
|
||||
// @Summary Backend monitor endpoint
|
||||
// @Param request body schema.BackendMonitorRequest true "Backend statistics request"
|
||||
// @Router /backend/shutdown [post]
|
||||
|
@ -72,7 +72,7 @@ func ImageEndpoint(cl *config.BackendConfigLoader, ml *model.ModelLoader, appCon
|
||||
}
|
||||
|
||||
if m == "" {
|
||||
m = model.StableDiffusionBackend
|
||||
m = "stablediffusion"
|
||||
}
|
||||
log.Debug().Msgf("Loading model: %+v", m)
|
||||
|
||||
@ -129,9 +129,9 @@ func ImageEndpoint(cl *config.BackendConfigLoader, ml *model.ModelLoader, appCon
|
||||
|
||||
switch config.Backend {
|
||||
case "stablediffusion":
|
||||
config.Backend = model.StableDiffusionBackend
|
||||
config.Backend = model.StableDiffusionGGMLBackend
|
||||
case "":
|
||||
config.Backend = model.StableDiffusionBackend
|
||||
config.Backend = model.StableDiffusionGGMLBackend
|
||||
}
|
||||
|
||||
if !strings.Contains(input.Size, "x") {
|
||||
|
@ -4,6 +4,7 @@ import (
|
||||
"context"
|
||||
"encoding/json"
|
||||
"fmt"
|
||||
"strconv"
|
||||
|
||||
"github.com/gofiber/fiber/v2"
|
||||
"github.com/google/uuid"
|
||||
@ -296,6 +297,14 @@ func updateRequestConfig(config *config.BackendConfig, input *schema.OpenAIReque
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// If a quality was defined as number, convert it to step
|
||||
if input.Quality != "" {
|
||||
q, err := strconv.Atoi(input.Quality)
|
||||
if err == nil {
|
||||
config.Step = q
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func mergeRequestWithConfig(modelFile string, input *schema.OpenAIRequest, cm *config.BackendConfigLoader, loader *model.ModelLoader, debug bool, threads, ctx int, f16 bool) (*config.BackendConfig, *schema.OpenAIRequest, error) {
|
||||
|
@ -191,8 +191,9 @@ type OpenAIRequest struct {
|
||||
Stream bool `json:"stream"`
|
||||
|
||||
// Image (not supported by OpenAI)
|
||||
Mode int `json:"mode"`
|
||||
Step int `json:"step"`
|
||||
Mode int `json:"mode"`
|
||||
Quality string `json:"quality"`
|
||||
Step int `json:"step"`
|
||||
|
||||
// A grammar to constrain the LLM output
|
||||
Grammar string `json:"grammar" yaml:"grammar"`
|
||||
|
2
docs/themes/hugo-theme-relearn
vendored
2
docs/themes/hugo-theme-relearn
vendored
@ -1 +1 @@
|
||||
Subproject commit 80e448e5bdaa92c87ee0d0d86f1125c8606ebf5f
|
||||
Subproject commit 8dad5ee419e5bb2a0b380aa72d7a7389af4945f6
|
@ -2,6 +2,7 @@
|
||||
- &phi4
|
||||
url: "github:mudler/LocalAI/gallery/phi-4-chat.yaml@master"
|
||||
name: "phi-4"
|
||||
icon: https://avatars.githubusercontent.com/u/6154722
|
||||
license: mit
|
||||
tags:
|
||||
- llm
|
||||
@ -189,7 +190,7 @@
|
||||
- https://huggingface.co/Nitral-AI/NightWing3-10B-v0.1
|
||||
- https://huggingface.co/bartowski/NightWing3-10B-v0.1-GGUF
|
||||
description: |
|
||||
Base model: (Falcon3-10B)
|
||||
Base model: (Falcon3-10B)
|
||||
overrides:
|
||||
parameters:
|
||||
model: NightWing3-10B-v0.1-Q4_K_M.gguf
|
||||
@ -224,7 +225,7 @@
|
||||
uri: huggingface://bartowski/INTELLECT-1-Instruct-GGUF/INTELLECT-1-Instruct-Q4_K_M.gguf
|
||||
- &llama33
|
||||
url: "github:mudler/LocalAI/gallery/llama3.1-instruct.yaml@master"
|
||||
icon: https://cdn-uploads.huggingface.co/production/uploads/642cc1c253e76b4c2286c58e/aJJxKus1wP5N-euvHEUq7.png
|
||||
icon: https://avatars.githubusercontent.com/u/153379578
|
||||
license: llama3.3
|
||||
description: |
|
||||
The Meta Llama 3.3 multilingual large language model (LLM) is a pretrained and instruction tuned generative model in 70B (text in/text out). The Llama 3.3 instruction tuned text only model is optimized for multilingual dialogue use cases and outperform many of the available open source and closed chat models on common industry benchmarks.
|
||||
@ -418,9 +419,47 @@
|
||||
- filename: L3.3-MS-Nevoria-70b-Q4_K_M.gguf
|
||||
sha256: e8b0763f263089a19d4b112b7ed5085cc5f1ed9ca49c5085baa8d51f4ded1f94
|
||||
uri: huggingface://bartowski/L3.3-MS-Nevoria-70b-GGUF/L3.3-MS-Nevoria-70b-Q4_K_M.gguf
|
||||
- !!merge <<: *llama33
|
||||
name: "l3.3-70b-magnum-v4-se"
|
||||
urls:
|
||||
- https://huggingface.co/Doctor-Shotgun/L3.3-70B-Magnum-v4-SE
|
||||
- https://huggingface.co/bartowski/L3.3-70B-Magnum-v4-SE-GGUF
|
||||
description: |
|
||||
The Magnum v4 series is complete, but here's something a little extra I wanted to tack on as I wasn't entirely satisfied with the results of v4 72B. "SE" for Special Edition - this model is finetuned from meta-llama/Llama-3.3-70B-Instruct as an rsLoRA adapter. The dataset is a slightly revised variant of the v4 data with some elements of the v2 data re-introduced.
|
||||
|
||||
The objective, as with the other Magnum models, is to emulate the prose style and quality of the Claude 3 Sonnet/Opus series of models on a local scale, so don't be surprised to see "Claude-isms" in its output.
|
||||
overrides:
|
||||
parameters:
|
||||
model: L3.3-70B-Magnum-v4-SE-Q4_K_M.gguf
|
||||
files:
|
||||
- filename: L3.3-70B-Magnum-v4-SE-Q4_K_M.gguf
|
||||
sha256: 9724a6364a42caa3d5a1687258eb329c9af6cbb2ce01c8dd556c1a222a2e0352
|
||||
uri: huggingface://bartowski/L3.3-70B-Magnum-v4-SE-GGUF/L3.3-70B-Magnum-v4-SE-Q4_K_M.gguf
|
||||
- !!merge <<: *llama33
|
||||
name: "l3.3-prikol-70b-v0.2"
|
||||
icon: https://files.catbox.moe/x9t3zo.png
|
||||
urls:
|
||||
- https://huggingface.co/Nohobby/L3.3-Prikol-70B-v0.2
|
||||
- https://huggingface.co/bartowski/L3.3-Prikol-70B-v0.2-GGUF
|
||||
description: |
|
||||
A merge of some Llama 3.3 models because um uh yeah
|
||||
|
||||
Went extra schizo on the recipe, hoping for an extra fun result, and... Well, I guess it's an overall improvement over the previous revision. It's a tiny bit smarter, has even more distinct swipes and nice dialogues, but for some reason it's damn sloppy.
|
||||
|
||||
I've published the second step of this merge as a separate model, and I'd say the results are more interesting, but not as usable as this one. https://huggingface.co/Nohobby/AbominationSnowPig
|
||||
|
||||
Prompt format: Llama3 OR Llama3 Context and ChatML Instruct. It actually works a bit better this way
|
||||
overrides:
|
||||
parameters:
|
||||
model: L3.3-Prikol-70B-v0.2-Q4_K_M.gguf
|
||||
files:
|
||||
- filename: L3.3-Prikol-70B-v0.2-Q4_K_M.gguf
|
||||
sha256: fc0ff514efbc0b67981c2bf1423d5a2e1b8801e4266ba0c653ea148414fe5ffc
|
||||
uri: huggingface://bartowski/L3.3-Prikol-70B-v0.2-GGUF/L3.3-Prikol-70B-v0.2-Q4_K_M.gguf
|
||||
- &rwkv
|
||||
url: "github:mudler/LocalAI/gallery/rwkv.yaml@master"
|
||||
name: "rwkv-6-world-7b"
|
||||
icon: https://avatars.githubusercontent.com/u/132652788
|
||||
license: apache-2.0
|
||||
urls:
|
||||
- https://huggingface.co/RWKV/rwkv-6-world-7b
|
||||
@ -443,6 +482,7 @@
|
||||
uri: huggingface://bartowski/rwkv-6-world-7b-GGUF/rwkv-6-world-7b-Q4_K_M.gguf
|
||||
- &qwen25coder
|
||||
name: "qwen2.5-coder-14b"
|
||||
icon: https://avatars.githubusercontent.com/u/141221163
|
||||
url: "github:mudler/LocalAI/gallery/chatml.yaml@master"
|
||||
license: apache-2.0
|
||||
tags:
|
||||
@ -628,7 +668,7 @@
|
||||
uri: huggingface://mraWdermacher/Qwen2.5-Coder-32B-Instruct-Uncensored-i1-GGUF/Qwen2.5-Coder-32B-Instruct-Uncensored.i1-Q4_K_M.gguf
|
||||
- &opencoder
|
||||
name: "opencoder-8b-base"
|
||||
icon: https://github.com/OpenCoder-llm/opencoder-llm.github.io/blob/main/static/images/opencoder_icon.jpg?raw=true
|
||||
icon: https://avatars.githubusercontent.com/u/186387526
|
||||
url: "github:mudler/LocalAI/gallery/codellama.yaml@master"
|
||||
urls:
|
||||
- https://huggingface.co/infly/OpenCoder-8B-Base
|
||||
@ -694,6 +734,7 @@
|
||||
uri: huggingface://QuantFactory/OpenCoder-1.5B-Instruct-GGUF/OpenCoder-1.5B-Instruct.Q4_K_M.gguf
|
||||
- &granite3
|
||||
name: "granite-3.0-1b-a400m-instruct"
|
||||
icon: https://avatars.githubusercontent.com/u/167822367
|
||||
urls:
|
||||
- https://huggingface.co/ibm-granite/granite-3.0-1b-a400m-instruct
|
||||
- https://huggingface.co/QuantFactory/granite-3.0-1b-a400m-instruct-GGUF
|
||||
@ -778,10 +819,9 @@
|
||||
- filename: salamandra-7b-instruct.Q4_K_M-f32.gguf
|
||||
sha256: bac8e8c1d1d9d53cbdb148b8ff9ad378ddb392429207099e85b5aae3a43bff3d
|
||||
uri: huggingface://cstr/salamandra-7b-instruct-GGUF/salamandra-7b-instruct.Q4_K_M-f32.gguf
|
||||
- &llama32
|
||||
## llama3.2
|
||||
- &llama32 ## llama3.2
|
||||
url: "github:mudler/LocalAI/gallery/llama3.1-instruct.yaml@master"
|
||||
icon: https://cdn-uploads.huggingface.co/production/uploads/642cc1c253e76b4c2286c58e/aJJxKus1wP5N-euvHEUq7.png
|
||||
icon: https://avatars.githubusercontent.com/u/153379578
|
||||
license: llama3.2
|
||||
description: |
|
||||
The Meta Llama 3.2 collection of multilingual large language models (LLMs) is a collection of pretrained and instruction-tuned generative models in 1B and 3B sizes (text in/text out). The Llama 3.2 instruction-tuned text only models are optimized for multilingual dialogue use cases, including agentic retrieval and summarization tasks. They outperform many of the available open source and closed chat models on common industry benchmarks.
|
||||
@ -950,7 +990,6 @@
|
||||
uri: huggingface://mradermacher/Llama-3.2-3B-Reasoning-Time-GGUF/Llama-3.2-3B-Reasoning-Time.Q4_K_M.gguf
|
||||
- !!merge <<: *llama32
|
||||
name: "llama-3.2-sun-2.5b-chat"
|
||||
icon: https://i.ibb.co/PF0TdMJ/imagine-image-9a56cee7-0f4f-4cc2-b265-a5b8d04f266b.png
|
||||
urls:
|
||||
- https://huggingface.co/meditsolutions/Llama-3.2-SUN-2.5B-chat
|
||||
- https://huggingface.co/mradermacher/Llama-3.2-SUN-2.5B-chat-GGUF
|
||||
@ -982,7 +1021,6 @@
|
||||
uri: huggingface://mradermacher/Llama-3.2-SUN-2.5B-chat-GGUF/Llama-3.2-SUN-2.5B-chat.Q4_K_M.gguf
|
||||
- !!merge <<: *llama32
|
||||
name: "llama-3.2-3b-instruct-uncensored"
|
||||
icon: https://i.imgur.com/JOePyAN.png
|
||||
urls:
|
||||
- https://huggingface.co/bartowski/Llama-3.2-3B-Instruct-uncensored-GGUF
|
||||
- https://huggingface.co/chuanli11/Llama-3.2-3B-Instruct-uncensored
|
||||
@ -1316,9 +1354,9 @@
|
||||
- filename: FineMath-Llama-3B-Q4_K_M.gguf
|
||||
sha256: 16c73b5cf2a417a7e1608bcc9469f1461fc3e759ce04a3a337f48df977dc158c
|
||||
uri: huggingface://bartowski/FineMath-Llama-3B-GGUF/FineMath-Llama-3B-Q4_K_M.gguf
|
||||
- &qwen25
|
||||
## Qwen2.5
|
||||
- &qwen25 ## Qwen2.5
|
||||
name: "qwen2.5-14b-instruct"
|
||||
icon: https://avatars.githubusercontent.com/u/141221163
|
||||
url: "github:mudler/LocalAI/gallery/chatml.yaml@master"
|
||||
license: apache-2.0
|
||||
description: |
|
||||
@ -1608,6 +1646,7 @@
|
||||
uri: huggingface://bartowski/qwen2.5-7b-ins-v3-GGUF/qwen2.5-7b-ins-v3-Q4_K_M.gguf
|
||||
- !!merge <<: *qwen25
|
||||
name: "supernova-medius"
|
||||
icon: https://avatars.githubusercontent.com/u/126496414
|
||||
urls:
|
||||
- https://huggingface.co/arcee-ai/SuperNova-Medius-GGUF
|
||||
description: |
|
||||
@ -1762,7 +1801,7 @@
|
||||
uri: huggingface://bartowski/TheBeagle-v2beta-32B-MGS-GGUF/TheBeagle-v2beta-32B-MGS-Q4_K_M.gguf
|
||||
- !!merge <<: *qwen25
|
||||
name: "meraj-mini"
|
||||
icon: https://i.ibb.co/CmPSSpq/Screenshot-2024-10-06-at-9-45-06-PM.png
|
||||
icon: https://avatars.githubusercontent.com/u/126496414
|
||||
urls:
|
||||
- https://huggingface.co/arcee-ai/Meraj-Mini
|
||||
- https://huggingface.co/QuantFactory/Meraj-Mini-GGUF
|
||||
@ -2142,7 +2181,6 @@
|
||||
sha256: 42cf7a96784dc8f25c61c2404620c3e6548a024caa8dff6e435d7c86400d7ab8
|
||||
uri: huggingface://mradermacher/Qwen2.5-7B-nerd-uncensored-v1.7-GGUF/Qwen2.5-7B-nerd-uncensored-v1.7.Q4_K_M.gguf
|
||||
- !!merge <<: *qwen25
|
||||
icon: https://i.imgur.com/OxX2Usi.png
|
||||
name: "evathene-v1.0"
|
||||
urls:
|
||||
- https://huggingface.co/sophosympatheia/Evathene-v1.0
|
||||
@ -2392,7 +2430,7 @@
|
||||
uri: huggingface://QuantFactory/Math-IIO-7B-Instruct-GGUF/Math-IIO-7B-Instruct.Q4_K_M.gguf
|
||||
- !!merge <<: *qwen25
|
||||
name: "virtuoso-small"
|
||||
icon: https://i.ibb.co/pXD6Bcv/SW2-U-g-QQLSH1-ZAbxhs-Iu-A.webp
|
||||
icon: https://avatars.githubusercontent.com/u/126496414
|
||||
urls:
|
||||
- https://huggingface.co/arcee-ai/Virtuoso-Small-GGUF
|
||||
description: |
|
||||
@ -2501,7 +2539,6 @@
|
||||
sha256: 91907f29746625a62885793475956220b81d8a5a34b53686a1acd1d03fd403ea
|
||||
uri: huggingface://bartowski/72B-Qwen2.5-Kunou-v1-GGUF/72B-Qwen2.5-Kunou-v1-Q4_K_M.gguf
|
||||
- !!merge <<: *qwen25
|
||||
icon: https://i.imgur.com/OxX2Usi.png
|
||||
name: "evathene-v1.3"
|
||||
urls:
|
||||
- https://huggingface.co/sophosympatheia/Evathene-v1.3
|
||||
@ -2659,6 +2696,39 @@
|
||||
- filename: Qwentile2.5-32B-Instruct-Q4_K_M.gguf
|
||||
sha256: e476d6e3c15c78fc3f986d7ae8fa35c16116843827f2e6243c05767cef2f3615
|
||||
uri: huggingface://bartowski/Qwentile2.5-32B-Instruct-GGUF/Qwentile2.5-32B-Instruct-Q4_K_M.gguf
|
||||
- !!merge <<: *qwen25
|
||||
name: "deepseek-r1-distill-qwen-1.5b"
|
||||
icon: "https://avatars.githubusercontent.com/u/148330874"
|
||||
urls:
|
||||
- https://huggingface.co/deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5b
|
||||
- https://huggingface.co/unsloth/DeepSeek-R1-Distill-Qwen-1.5B-GGUF
|
||||
description: |
|
||||
DeepSeek-R1 is our advanced first-generation reasoning model designed to enhance performance in reasoning tasks.
|
||||
Building on the foundation laid by its predecessor, DeepSeek-R1-Zero, which was trained using large-scale reinforcement learning (RL) without supervised fine-tuning, DeepSeek-R1 addresses the challenges faced by R1-Zero, such as endless repetition, poor readability, and language mixing.
|
||||
By incorporating cold-start data prior to the RL phase,DeepSeek-R1 significantly improves reasoning capabilities and achieves performance levels comparable to OpenAI-o1 across a variety of domains, including mathematics, coding, and complex reasoning tasks.
|
||||
overrides:
|
||||
parameters:
|
||||
model: deepseek-r1-distill-qwen-1.5b-Q4_K_M.gguf
|
||||
files:
|
||||
- filename: deepseek-r1-distill-qwen-1.5b-Q4_K_M.gguf
|
||||
sha256: c2c43b6018cf7700ce0ddee8807deb1a9a26758ef878232f3a142d16df81f0fe
|
||||
uri: huggingface://unsloth/DeepSeek-R1-Distill-Qwen-1.5B-GGUF/DeepSeek-R1-Distill-Qwen-1.5B-Q4_K_M.gguf
|
||||
- !!merge <<: *qwen25
|
||||
name: "deepseek-r1-distill-qwen-7b"
|
||||
urls:
|
||||
- https://huggingface.co/deepseek-ai/DeepSeek-R1-Distill-Qwen-7B
|
||||
- https://huggingface.co/bartowski/DeepSeek-R1-Distill-Qwen-7B-GGUF
|
||||
description: |
|
||||
DeepSeek-R1 is our advanced first-generation reasoning model designed to enhance performance in reasoning tasks.
|
||||
Building on the foundation laid by its predecessor, DeepSeek-R1-Zero, which was trained using large-scale reinforcement learning (RL) without supervised fine-tuning, DeepSeek-R1 addresses the challenges faced by R1-Zero, such as endless repetition, poor readability, and language mixing.
|
||||
By incorporating cold-start data prior to the RL phase,DeepSeek-R1 significantly improves reasoning capabilities and achieves performance levels comparable to OpenAI-o1 across a variety of domains, including mathematics, coding, and complex reasoning tasks.
|
||||
overrides:
|
||||
parameters:
|
||||
model: DeepSeek-R1-Distill-Qwen-7B-Q4_K_M.gguf
|
||||
files:
|
||||
- filename: DeepSeek-R1-Distill-Qwen-7B-Q4_K_M.gguf
|
||||
sha256: 731ece8d06dc7eda6f6572997feb9ee1258db0784827e642909d9b565641937b
|
||||
uri: huggingface://bartowski/DeepSeek-R1-Distill-Qwen-7B-GGUF/DeepSeek-R1-Distill-Qwen-7B-Q4_K_M.gguf
|
||||
- &archfunct
|
||||
license: apache-2.0
|
||||
tags:
|
||||
@ -2670,6 +2740,7 @@
|
||||
- cpu
|
||||
- function-calling
|
||||
name: "arch-function-1.5b"
|
||||
icon: https://avatars.githubusercontent.com/u/112724757
|
||||
uri: "github:mudler/LocalAI/gallery/arch-function.yaml@master"
|
||||
urls:
|
||||
- https://huggingface.co/katanemolabs/Arch-Function-1.5B
|
||||
@ -3109,7 +3180,7 @@
|
||||
uri: huggingface://bartowski/Rombos-Qwen2.5-Writer-32b-GGUF/Rombos-Qwen2.5-Writer-32b-Q4_K_M.gguf
|
||||
- !!merge <<: *qwen25
|
||||
name: "sky-t1-32b-preview"
|
||||
icon: https://raw.githubusercontent.com/NovaSky-AI/novasky-ai.github.io/main/assets/images/blue-bird-wider.jpeg
|
||||
icon: https://github.com/NovaSky-AI/novasky-ai.github.io/raw/main/assets/images/blue-bird-wider.jpeg
|
||||
urls:
|
||||
- https://huggingface.co/NovaSky-AI/Sky-T1-32B-Preview
|
||||
- https://huggingface.co/bartowski/Sky-T1-32B-Preview-GGUF
|
||||
@ -3236,8 +3307,7 @@
|
||||
- filename: DRT-o1-14B-Q4_K_M.gguf
|
||||
sha256: 9619ca984cf4ce8e4f69bcde831de17b2ce05dd89536e3130608877521e3d328
|
||||
uri: huggingface://bartowski/DRT-o1-14B-GGUF/DRT-o1-14B-Q4_K_M.gguf
|
||||
- &smollm
|
||||
## SmolLM
|
||||
- &smollm ## SmolLM
|
||||
url: "github:mudler/LocalAI/gallery/chatml.yaml@master"
|
||||
name: "smollm-1.7b-instruct"
|
||||
icon: https://huggingface.co/datasets/HuggingFaceTB/images/resolve/main/banner_smol.png
|
||||
@ -3295,10 +3365,9 @@
|
||||
- filename: Vikhr-Qwen-2.5-1.5B-Instruct.Q4_K_M.gguf
|
||||
sha256: eaeac314e30b461413bc1cc819cdc0cd6a79265711fd0b8268702960a082c7bd
|
||||
uri: huggingface://QuantFactory/Vikhr-Qwen-2.5-1.5B-Instruct-GGUF/Vikhr-Qwen-2.5-1.5B-Instruct.Q4_K_M.gguf
|
||||
- &llama31
|
||||
## LLama3.1
|
||||
- &llama31 ## LLama3.1
|
||||
url: "github:mudler/LocalAI/gallery/llama3.1-instruct.yaml@master"
|
||||
icon: https://cdn-uploads.huggingface.co/production/uploads/642cc1c253e76b4c2286c58e/aJJxKus1wP5N-euvHEUq7.png
|
||||
icon: https://avatars.githubusercontent.com/u/153379578
|
||||
name: "meta-llama-3.1-8b-instruct"
|
||||
license: llama3.1
|
||||
description: |
|
||||
@ -3387,7 +3456,7 @@
|
||||
sha256: 6d175432f66d10dfed9737f73a5073d513d18e1ee7bd4b9cf2a59deb359f36ff
|
||||
- !!merge <<: *llama31
|
||||
name: "meta-llama-3.1-8b-instruct-abliterated"
|
||||
icon: https://i.imgur.com/KhorYYG.png
|
||||
icon: https://cdn-uploads.huggingface.co/production/uploads/61b8e2ba285851687028d395/AsTgL8VCgMHgobq4cr46b.png
|
||||
urls:
|
||||
- https://huggingface.co/mlabonne/Meta-Llama-3.1-8B-Instruct-abliterated
|
||||
- https://huggingface.co/mlabonne/Meta-Llama-3.1-8B-Instruct-abliterated-GGUF
|
||||
@ -3416,7 +3485,7 @@
|
||||
uri: huggingface://mmnga/Llama-3.1-70B-Japanese-Instruct-2407-gguf/Llama-3.1-70B-Japanese-Instruct-2407-Q4_K_M.gguf
|
||||
- !!merge <<: *llama31
|
||||
name: "openbuddy-llama3.1-8b-v22.1-131k"
|
||||
icon: https://raw.githubusercontent.com/OpenBuddy/OpenBuddy/main/media/demo.png
|
||||
icon: https://github.com/OpenBuddy/OpenBuddy/raw/main/media/demo.png
|
||||
urls:
|
||||
- https://huggingface.co/sunnyyy/openbuddy-llama3.1-8b-v22.1-131k-Q4_K_M-GGUF
|
||||
description: |
|
||||
@ -3592,7 +3661,7 @@
|
||||
sha256: 6557c5d5091f2507d19ab1f8bfb9ceb4e1536a755ab70f148b18aeb33741580f
|
||||
uri: huggingface://mradermacher/Llama-3.1-Techne-RP-8b-v1-GGUF/Llama-3.1-Techne-RP-8b-v1.Q4_K_M.gguf
|
||||
- !!merge <<: *llama31
|
||||
icon: https://i.ibb.co/9hwFrvL/BLMs-Wkx-NQf-W-46-FZDg-ILhg.jpg
|
||||
icon: https://avatars.githubusercontent.com/u/126496414
|
||||
name: "llama-spark"
|
||||
urls:
|
||||
- https://huggingface.co/arcee-ai/Llama-Spark
|
||||
@ -3710,7 +3779,6 @@
|
||||
- !!merge <<: *llama31
|
||||
name: "llama-3.1-supernova-lite-reflection-v1.0-i1"
|
||||
url: "github:mudler/LocalAI/gallery/llama3.1-reflective.yaml@master"
|
||||
icon: https://i.ibb.co/r072p7j/eopi-ZVu-SQ0-G-Cav78-Byq-Tg.png
|
||||
urls:
|
||||
- https://huggingface.co/SE6446/Llama-3.1-SuperNova-Lite-Reflection-V1.0
|
||||
- https://huggingface.co/mradermacher/Llama-3.1-SuperNova-Lite-Reflection-V1.0-i1-GGUF
|
||||
@ -3725,7 +3793,7 @@
|
||||
uri: huggingface://mradermacher/Llama-3.1-SuperNova-Lite-Reflection-V1.0-i1-GGUF/Llama-3.1-SuperNova-Lite-Reflection-V1.0.i1-Q4_K_M.gguf
|
||||
- !!merge <<: *llama31
|
||||
name: "llama-3.1-supernova-lite"
|
||||
icon: https://i.ibb.co/r072p7j/eopi-ZVu-SQ0-G-Cav78-Byq-Tg.png
|
||||
icon: https://avatars.githubusercontent.com/u/126496414
|
||||
urls:
|
||||
- https://huggingface.co/arcee-ai/Llama-3.1-SuperNova-Lite
|
||||
- https://huggingface.co/arcee-ai/Llama-3.1-SuperNova-Lite-GGUF
|
||||
@ -4239,6 +4307,7 @@
|
||||
uri: huggingface://mradermacher/Hermes-3-Llama-3.1-70B-lorablated-GGUF/Hermes-3-Llama-3.1-70B-lorablated.Q4_K_M.gguf
|
||||
- !!merge <<: *llama31
|
||||
name: "hermes-3-llama-3.1-8b-lorablated"
|
||||
icon: https://cdn-uploads.huggingface.co/production/uploads/61b8e2ba285851687028d395/4Hbw5n68jKUSBQeTqQIeT.png
|
||||
urls:
|
||||
- https://huggingface.co/mlabonne/Hermes-3-Llama-3.1-8B-lorablated-GGUF
|
||||
description: |
|
||||
@ -4447,7 +4516,6 @@
|
||||
sha256: 27b10c3ca4507e8bf7d305d60e5313b54ef5fffdb43a03f36223d19d906e39f3
|
||||
uri: huggingface://mradermacher/L3.1-70Blivion-v0.1-rc1-70B-i1-GGUF/L3.1-70Blivion-v0.1-rc1-70B.i1-Q4_K_M.gguf
|
||||
- !!merge <<: *llama31
|
||||
icon: https://i.imgur.com/sdN0Aqg.jpeg
|
||||
name: "llama-3.1-hawkish-8b"
|
||||
urls:
|
||||
- https://huggingface.co/mukaj/Llama-3.1-Hawkish-8B
|
||||
@ -5184,11 +5252,27 @@
|
||||
- filename: Dolphin3.0-Llama3.1-8B-Q4_K_M.gguf
|
||||
sha256: 268390e07edd407ad93ea21a868b7ae995b5950e01cad0db9e1802ae5049d405
|
||||
uri: huggingface://bartowski/Dolphin3.0-Llama3.1-8B-GGUF/Dolphin3.0-Llama3.1-8B-Q4_K_M.gguf
|
||||
- &deepseek
|
||||
## Deepseek
|
||||
- !!merge <<: *llama31
|
||||
name: "deepseek-r1-distill-llama-8b"
|
||||
icon: "https://avatars.githubusercontent.com/u/148330874"
|
||||
urls:
|
||||
- https://huggingface.co/deepseek-ai/DeepSeek-R1-Distill-Llama-8B
|
||||
- https://huggingface.co/unsloth/DeepSeek-R1-Distill-Llama-8B-GGUF
|
||||
description: |
|
||||
DeepSeek-R1 is our advanced first-generation reasoning model designed to enhance performance in reasoning tasks.
|
||||
Building on the foundation laid by its predecessor, DeepSeek-R1-Zero, which was trained using large-scale reinforcement learning (RL) without supervised fine-tuning, DeepSeek-R1 addresses the challenges faced by R1-Zero, such as endless repetition, poor readability, and language mixing.
|
||||
By incorporating cold-start data prior to the RL phase,DeepSeek-R1 significantly improves reasoning capabilities and achieves performance levels comparable to OpenAI-o1 across a variety of domains, including mathematics, coding, and complex reasoning tasks.
|
||||
overrides:
|
||||
parameters:
|
||||
model: deepseek-r1-distill-llama-8b-Q4_K_M.gguf
|
||||
files:
|
||||
- filename: deepseek-r1-distill-llama-8b-Q4_K_M.gguf
|
||||
sha256: f8eba201522ab44b79bc54166126bfaf836111ff4cbf2d13c59c3b57da10573b
|
||||
uri: huggingface://unsloth/DeepSeek-R1-Distill-Llama-8B-GGUF/DeepSeek-R1-Distill-Llama-8B-Q4_K_M.gguf
|
||||
- &deepseek ## Deepseek
|
||||
url: "github:mudler/LocalAI/gallery/deepseek.yaml@master"
|
||||
name: "deepseek-coder-v2-lite-instruct"
|
||||
icon: "https://github.com/deepseek-ai/DeepSeek-V2/blob/main/figures/logo.svg?raw=true"
|
||||
icon: "https://avatars.githubusercontent.com/u/148330874"
|
||||
license: deepseek
|
||||
description: |
|
||||
DeepSeek-Coder-V2, an open-source Mixture-of-Experts (MoE) code language model that achieves performance comparable to GPT4-Turbo in code-specific tasks. Specifically, DeepSeek-Coder-V2 is further pre-trained from DeepSeek-Coder-V2-Base with 6 trillion tokens sourced from a high-quality and multi-source corpus. Through this continued pre-training, DeepSeek-Coder-V2 substantially enhances the coding and mathematical reasoning capabilities of DeepSeek-Coder-V2-Base, while maintaining comparable performance in general language tasks. Compared to DeepSeek-Coder, DeepSeek-Coder-V2 demonstrates significant advancements in various aspects of code-related tasks, as well as reasoning and general capabilities. Additionally, DeepSeek-Coder-V2 expands its support for programming languages from 86 to 338, while extending the context length from 16K to 128K.
|
||||
@ -5250,10 +5334,10 @@
|
||||
- filename: archangel_sft_pythia2-8b.Q4_K_M.gguf
|
||||
sha256: a47782c55ef2b39b19644213720a599d9849511a73c9ebb0c1de749383c0a0f8
|
||||
uri: huggingface://RichardErkhov/ContextualAI_-_archangel_sft_pythia2-8b-gguf/archangel_sft_pythia2-8b.Q4_K_M.gguf
|
||||
- &qwen2
|
||||
## Start QWEN2
|
||||
- &qwen2 ## Start QWEN2
|
||||
url: "github:mudler/LocalAI/gallery/chatml.yaml@master"
|
||||
name: "qwen2-7b-instruct"
|
||||
icon: https://avatars.githubusercontent.com/u/141221163
|
||||
license: apache-2.0
|
||||
description: |
|
||||
Qwen2 is the new series of Qwen large language models. For Qwen2, we release a number of base language models and instruction-tuned language models ranging from 0.5 to 72 billion parameters, including a Mixture-of-Experts model. This repo contains the instruction-tuned 7B Qwen2 model.
|
||||
@ -5360,7 +5444,7 @@
|
||||
uri: huggingface://bartowski/Einstein-v7-Qwen2-7B-GGUF/Einstein-v7-Qwen2-7B-Q4_K_M.gguf
|
||||
- !!merge <<: *qwen2
|
||||
name: "arcee-spark"
|
||||
icon: https://i.ibb.co/80ssNWS/o-Vdk-Qx-ARNmzr-Pi1h-Efj-SA.webp
|
||||
icon: https://avatars.githubusercontent.com/u/126496414
|
||||
description: |
|
||||
Arcee Spark is a powerful 7B parameter language model that punches well above its weight class. Initialized from Qwen2, this model underwent a sophisticated training process:
|
||||
|
||||
@ -5398,7 +5482,7 @@
|
||||
uri: huggingface://Hercules-5.0-Qwen2-7B-Q4_K_M.gguf/Hercules-5.0-Qwen2-7B-Q4_K_M.gguf
|
||||
- !!merge <<: *qwen2
|
||||
name: "arcee-agent"
|
||||
icon: https://i.ibb.co/CBHmTDn/136719a5-6d8a-4654-a618-46eabc788953.jpg
|
||||
icon: https://avatars.githubusercontent.com/u/126496414
|
||||
description: |
|
||||
Arcee Agent is a cutting-edge 7B parameter language model specifically designed for function calling and tool use. Initialized from Qwen2-7B, it rivals the performance of much larger models while maintaining efficiency and speed. This model is particularly suited for developers, researchers, and businesses looking to implement sophisticated AI-driven solutions without the computational overhead of larger language models. Compute for training Arcee-Agent was provided by CrusoeAI. Arcee-Agent was trained using Spectrum.
|
||||
urls:
|
||||
@ -5583,8 +5667,34 @@
|
||||
- filename: marco-o1-uncensored.Q4_K_M.gguf
|
||||
sha256: ad0440270a7254098f90779744d3e5b34fe49b7baf97c819909ba9c5648cc0d9
|
||||
uri: huggingface://QuantFactory/marco-o1-uncensored-GGUF/marco-o1-uncensored.Q4_K_M.gguf
|
||||
- &mistral03
|
||||
## START Mistral
|
||||
- !!merge <<: *qwen2
|
||||
name: "minicpm-v-2_6"
|
||||
license: apache-2.0
|
||||
icon: https://avatars.githubusercontent.com/u/89920203
|
||||
urls:
|
||||
- https://huggingface.co/openbmb/MiniCPM-V-2_6-gguf
|
||||
- https://huggingface.co/openbmb/MiniCPM-V-2_6
|
||||
description: |
|
||||
MiniCPM-V 2.6 is the latest and most capable model in the MiniCPM-V series. The model is built on SigLip-400M and Qwen2-7B with a total of 8B parameters
|
||||
tags:
|
||||
- llm
|
||||
- multimodal
|
||||
- gguf
|
||||
- gpu
|
||||
- qwen2
|
||||
- cpu
|
||||
overrides:
|
||||
mmproj: minicpm-v-2_6-mmproj-f16.gguf
|
||||
parameters:
|
||||
model: minicpm-v-2_6-Q4_K_M.gguf
|
||||
files:
|
||||
- filename: minicpm-v-2_6-Q4_K_M.gguf
|
||||
sha256: 3a4078d53b46f22989adbf998ce5a3fd090b6541f112d7e936eb4204a04100b1
|
||||
uri: huggingface://openbmb/MiniCPM-V-2_6-gguf/ggml-model-Q4_K_M.gguf
|
||||
- filename: minicpm-v-2_6-mmproj-f16.gguf
|
||||
sha256: f8a805e9e62085805c69c427287acefc284932eb4abfe6e1b1ce431d27e2f4e0
|
||||
uri: huggingface://openbmb/MiniCPM-V-2_6-gguf/mmproj-model-f16.gguf
|
||||
- &mistral03 ## START Mistral
|
||||
url: "github:mudler/LocalAI/gallery/mistral-0.3.yaml@master"
|
||||
name: "mistral-7b-instruct-v0.3"
|
||||
icon: https://cdn-avatars.huggingface.co/v1/production/uploads/62dac1c7a8ead43d20e3e17a/wrLf5yaGC6ng4XME70w6Z.png
|
||||
@ -6092,7 +6202,6 @@
|
||||
- !!merge <<: *mistral03
|
||||
name: "mn-12b-mag-mell-r1-iq-arm-imatrix"
|
||||
url: "github:mudler/LocalAI/gallery/chatml.yaml@master"
|
||||
icon: "https://i.imgur.com/wjyAaTO.png"
|
||||
urls:
|
||||
- https://huggingface.co/inflatebot/MN-12B-Mag-Mell-R1
|
||||
- https://huggingface.co/Lewdiculous/MN-12B-Mag-Mell-R1-GGUF-IQ-ARM-Imatrix
|
||||
@ -6189,8 +6298,35 @@
|
||||
- filename: Nera_Noctis-12B-Q4_K_M.gguf
|
||||
sha256: 0662a9a847adde046e6255c15d5a677ebf09ab00841547c8963668d14baf00ff
|
||||
uri: huggingface://bartowski/Nera_Noctis-12B-GGUF/Nera_Noctis-12B-Q4_K_M.gguf
|
||||
- &mudler
|
||||
### START mudler's LocalAI specific-models
|
||||
- !!merge <<: *mistral03
|
||||
name: "wayfarer-12b"
|
||||
url: "github:mudler/LocalAI/gallery/chatml.yaml@master"
|
||||
icon: https://huggingface.co/LatitudeGames/Wayfarer-12B/resolve/main/wayfarer.jpg
|
||||
urls:
|
||||
- https://huggingface.co/LatitudeGames/Wayfarer-12B
|
||||
- https://huggingface.co/bartowski/Wayfarer-12B-GGUF
|
||||
description: |
|
||||
We’ve heard over and over from AI Dungeon players that modern AI models are too nice, never letting them fail or die. While it may be good for a chatbot to be nice and helpful, great stories and games aren’t all rainbows and unicorns. They have conflict, tension, and even death. These create real stakes and consequences for characters and the journeys they go on.
|
||||
|
||||
Similarly, great games need opposition. You must be able to fail, die, and may even have to start over. This makes games more fun!
|
||||
|
||||
However, the vast majority of AI models, through alignment RLHF, have been trained away from darkness, violence, or conflict, preventing them from fulfilling this role. To give our players better options, we decided to train our own model to fix these issues.
|
||||
|
||||
Wayfarer is an adventure role-play model specifically trained to give players a challenging and dangerous experience. We thought they would like it, but since releasing it on AI Dungeon, players have reacted even more positively than we expected.
|
||||
|
||||
Because they loved it so much, we’ve decided to open-source the model so anyone can experience unforgivingly brutal AI adventures! Anyone can download the model to run locally.
|
||||
|
||||
Or if you want to easily try this model for free, you can do so at https://aidungeon.com.
|
||||
|
||||
We plan to continue improving and open-sourcing similar models, so please share any and all feedback on how we can improve model behavior. Below we share more details on how Wayfarer was created.
|
||||
overrides:
|
||||
parameters:
|
||||
model: Wayfarer-12B-Q4_K_M.gguf
|
||||
files:
|
||||
- filename: Wayfarer-12B-Q4_K_M.gguf
|
||||
sha256: 6cd9f290c820c64854fcdcfd312b066447acc2f63abe2e2e71af9bc4f1946c08
|
||||
uri: huggingface://bartowski/Wayfarer-12B-GGUF/Wayfarer-12B-Q4_K_M.gguf
|
||||
- &mudler ### START mudler's LocalAI specific-models
|
||||
url: "github:mudler/LocalAI/gallery/mudler.yaml@master"
|
||||
name: "LocalAI-llama3-8b-function-call-v0.2"
|
||||
icon: "https://cdn-uploads.huggingface.co/production/uploads/647374aa7ff32a81ac6d35d4/us5JKi9z046p8K-cn_M0w.webp"
|
||||
@ -6235,8 +6371,7 @@
|
||||
- filename: Mirai-Nova-Llama3-LocalAI-8B-v0.1-q4_k_m.bin
|
||||
sha256: 579cbb229f9c11d0330759ff4733102d2491615a4c61289e26c09d1b3a583fec
|
||||
uri: huggingface://mudler/Mirai-Nova-Llama3-LocalAI-8B-v0.1-GGUF/Mirai-Nova-Llama3-LocalAI-8B-v0.1-q4_k_m.bin
|
||||
- &parler-tts
|
||||
### START parler-tts
|
||||
- &parler-tts ### START parler-tts
|
||||
url: "github:mudler/LocalAI/gallery/parler-tts.yaml@master"
|
||||
name: parler-tts-mini-v0.1
|
||||
overrides:
|
||||
@ -6253,8 +6388,7 @@
|
||||
- cpu
|
||||
- text-to-speech
|
||||
- python
|
||||
- &rerankers
|
||||
### START rerankers
|
||||
- &rerankers ### START rerankers
|
||||
url: "github:mudler/LocalAI/gallery/rerankers.yaml@master"
|
||||
name: cross-encoder
|
||||
parameters:
|
||||
@ -6294,6 +6428,7 @@
|
||||
- &gemma
|
||||
url: "github:mudler/LocalAI/gallery/gemma.yaml@master"
|
||||
name: "gemma-2b"
|
||||
icon: https://avatars.githubusercontent.com/u/1342004
|
||||
license: gemma
|
||||
urls:
|
||||
- https://ai.google.dev/gemma/docs
|
||||
@ -7009,7 +7144,7 @@
|
||||
uri: huggingface://bartowski/GWQ-9B-Preview2-GGUF/GWQ-9B-Preview2-Q4_K_M.gguf
|
||||
- &llama3
|
||||
url: "github:mudler/LocalAI/gallery/llama3-instruct.yaml@master"
|
||||
icon: https://cdn-uploads.huggingface.co/production/uploads/642cc1c253e76b4c2286c58e/aJJxKus1wP5N-euvHEUq7.png
|
||||
icon: https://avatars.githubusercontent.com/u/153379578
|
||||
name: "llama3-8b-instruct"
|
||||
license: llama3
|
||||
description: |
|
||||
@ -7176,10 +7311,9 @@
|
||||
name: "l3-8b-stheno-v3.1"
|
||||
urls:
|
||||
- https://huggingface.co/Sao10K/L3-8B-Stheno-v3.1
|
||||
icon: https://w.forfun.com/fetch/cb/cba2205390e517bea1ea60ca0b491af4.jpeg
|
||||
description: |
|
||||
- A model made for 1-on-1 Roleplay ideally, but one that is able to handle scenarios, RPGs and storywriting fine.
|
||||
- Uncensored during actual roleplay scenarios. # I do not care for zero-shot prompting like what some people do. It is uncensored enough in actual usecases.
|
||||
- Uncensored during actual roleplay scenarios. # I do not care for zero-shot prompting like what some people do. It is uncensored enough in actual usecases.
|
||||
- I quite like the prose and style for this model.
|
||||
overrides:
|
||||
parameters:
|
||||
@ -7970,7 +8104,6 @@
|
||||
urls:
|
||||
- https://huggingface.co/bartowski/New-Dawn-Llama-3-70B-32K-v1.0-GGUF
|
||||
- https://huggingface.co/sophosympatheia/New-Dawn-Llama-3-70B-32K-v1.0
|
||||
icon: https://imgur.com/tKzncGo.png
|
||||
description: |
|
||||
This model is a multi-level SLERP merge of several Llama 3 70B variants. See the merge recipe below for details. I extended the context window for this model out to 32K by snagging some layers from abacusai/Smaug-Llama-3-70B-Instruct-32K using a technique similar to what I used for Midnight Miqu, which was further honed by jukofyork.
|
||||
This model is uncensored. You are responsible for whatever you do with it.
|
||||
@ -8322,7 +8455,8 @@
|
||||
- filename: dolphin-2.9.2-Phi-3-Medium-abliterated-Q4_K_M.gguf
|
||||
sha256: 566331c2efe87725310aacb709ca15088a0063fa0ddc14a345bf20d69982156b
|
||||
uri: huggingface://bartowski/dolphin-2.9.2-Phi-3-Medium-abliterated-GGUF/dolphin-2.9.2-Phi-3-Medium-abliterated-Q4_K_M.gguf
|
||||
- url: "github:mudler/LocalAI/gallery/chatml.yaml@master"
|
||||
- !!merge <<: *llama3
|
||||
url: "github:mudler/LocalAI/gallery/chatml.yaml@master"
|
||||
name: "llama-3-8b-instruct-dpo-v0.3-32k"
|
||||
license: llama3
|
||||
urls:
|
||||
@ -8476,7 +8610,7 @@
|
||||
urls:
|
||||
- https://huggingface.co/arcee-ai/Llama-3-SEC-Chat-GGUF
|
||||
- https://huggingface.co/arcee-ai/Llama-3-SEC-Chat
|
||||
icon: https://i.ibb.co/kHtBmDN/w8m6-X4-HCQRa-IR86ar-Cm5gg.webp
|
||||
icon: https://avatars.githubusercontent.com/u/126496414
|
||||
tags:
|
||||
- llama3
|
||||
- gguf
|
||||
@ -8506,10 +8640,9 @@
|
||||
- filename: Copus-2x8B.i1-Q4_K_M.gguf
|
||||
sha256: 685da1ba49e203e8f491105585143d76044286d4b4687bed37d325f6b55501e5
|
||||
uri: huggingface://mradermacher/Copus-2x8B-i1-GGUF/Copus-2x8B.i1-Q4_K_M.gguf
|
||||
- &yi-chat
|
||||
### Start Yi
|
||||
- &yi-chat ### Start Yi
|
||||
url: "github:mudler/LocalAI/gallery/chatml.yaml@master"
|
||||
icon: "https://raw.githubusercontent.com/01-ai/Yi/main/assets/img/Yi_logo_icon_light.svg"
|
||||
icon: "https://github.com/01-ai/Yi/raw/main/assets/img/Yi_logo_icon_light.svg"
|
||||
name: "yi-1.5-9b-chat"
|
||||
license: apache-2.0
|
||||
urls:
|
||||
@ -8718,8 +8851,7 @@
|
||||
- filename: Fimbulvetr-11B-v2-Q4_K_M-imat.gguf
|
||||
sha256: 3f309b59508342536a70edd6c4be6cf4f2cb97f2e32cbc79ad2ab3f4c02933a4
|
||||
uri: huggingface://Lewdiculous/Fimbulvetr-11B-v2-GGUF-IQ-Imatrix/Fimbulvetr-11B-v2-Q4_K_M-imat.gguf
|
||||
- &noromaid
|
||||
### Start noromaid
|
||||
- &noromaid ### Start noromaid
|
||||
url: "github:mudler/LocalAI/gallery/noromaid.yaml@master"
|
||||
name: "noromaid-13b-0.4-DPO"
|
||||
icon: https://cdn-uploads.huggingface.co/production/uploads/630dfb008df86f1e5becadc3/VKX2Z2yjZX5J8kXzgeCYO.png
|
||||
@ -8739,8 +8871,7 @@
|
||||
- filename: Noromaid-13B-0.4-DPO.q4_k_m.gguf
|
||||
sha256: cb28e878d034fae3d0b43326c5fc1cfb4ab583b17c56e41d6ce023caec03c1c1
|
||||
uri: huggingface://NeverSleep/Noromaid-13B-0.4-DPO-GGUF/Noromaid-13B-0.4-DPO.q4_k_m.gguf
|
||||
- &wizardlm2
|
||||
### START Vicuna based
|
||||
- &wizardlm2 ### START Vicuna based
|
||||
url: "github:mudler/LocalAI/gallery/wizardlm2.yaml@master"
|
||||
name: "wizardlm2-7b"
|
||||
description: |
|
||||
@ -8795,8 +8926,9 @@
|
||||
- filename: moondream2-mmproj-f16.gguf
|
||||
sha256: 4cc1cb3660d87ff56432ebeb7884ad35d67c48c7b9f6b2856f305e39c38eed8f
|
||||
uri: huggingface://moondream/moondream2-gguf/moondream2-mmproj-f16.gguf
|
||||
- &llava
|
||||
### START LLaVa
|
||||
- &llava ### START LLaVa
|
||||
name: "llava-1.6-vicuna"
|
||||
icon: https://github.com/lobehub/lobe-icons/raw/master/packages/static-png/dark/llava-color.png
|
||||
url: "github:mudler/LocalAI/gallery/llava.yaml@master"
|
||||
license: apache-2.0
|
||||
description: |
|
||||
@ -8810,7 +8942,6 @@
|
||||
- gpu
|
||||
- llama2
|
||||
- cpu
|
||||
name: "llava-1.6-vicuna"
|
||||
overrides:
|
||||
mmproj: mmproj-vicuna7b-f16.gguf
|
||||
parameters:
|
||||
@ -9138,7 +9269,7 @@
|
||||
urls:
|
||||
- https://huggingface.co/BAAI/Bunny-Llama-3-8B-V-gguf
|
||||
description: |
|
||||
Bunny is a family of lightweight but powerful multimodal models. It offers multiple plug-and-play vision encoders, like EVA-CLIP, SigLIP and language backbones, including Llama-3-8B, Phi-1.5, StableLM-2, Qwen1.5, MiniCPM and Phi-2. To compensate for the decrease in model size, we construct more informative training data by curated selection from a broader data source.
|
||||
Bunny is a family of lightweight but powerful multimodal models. It offers multiple plug-and-play vision encoders, like EVA-CLIP, SigLIP and language backbones, including Llama-3-8B, Phi-1.5, StableLM-2, Qwen1.5, and Phi-2. To compensate for the decrease in model size, we construct more informative training data by curated selection from a broader data source.
|
||||
|
||||
We provide Bunny-Llama-3-8B-V, which is built upon SigLIP and Llama-3-8B-Instruct. More details about this model can be found in GitHub.
|
||||
icon: https://huggingface.co/BAAI/Bunny-Llama-3-8B-V-gguf/resolve/main/icon.png
|
||||
@ -9187,7 +9318,7 @@
|
||||
uri: huggingface://xtuner/llava-llama-3-8b-v1_1-gguf/llava-llama-3-8b-v1_1-mmproj-f16.gguf
|
||||
- !!merge <<: *llama3
|
||||
name: "minicpm-llama3-v-2_5"
|
||||
icon: https://raw.githubusercontent.com/OpenBMB/MiniCPM/main/assets/minicpm_logo.png
|
||||
icon: https://avatars.githubusercontent.com/u/89920203
|
||||
urls:
|
||||
- https://huggingface.co/openbmb/MiniCPM-Llama3-V-2_5-gguf
|
||||
- https://huggingface.co/openbmb/MiniCPM-Llama3-V-2_5
|
||||
@ -9211,33 +9342,6 @@
|
||||
- filename: minicpm-llama3-mmproj-f16.gguf
|
||||
sha256: 391d11736c3cd24a90417c47b0c88975e86918fcddb1b00494c4d715b08af13e
|
||||
uri: huggingface://openbmb/MiniCPM-Llama3-V-2_5-gguf/mmproj-model-f16.gguf
|
||||
- !!merge <<: *llama3
|
||||
name: "minicpm-v-2_6"
|
||||
license: apache-2.0
|
||||
icon: https://raw.githubusercontent.com/OpenBMB/MiniCPM/main/assets/minicpm_logo.png
|
||||
urls:
|
||||
- https://huggingface.co/openbmb/MiniCPM-V-2_6-gguf
|
||||
- https://huggingface.co/openbmb/MiniCPM-V-2_6
|
||||
description: |
|
||||
MiniCPM-V 2.6 is the latest and most capable model in the MiniCPM-V series. The model is built on SigLip-400M and Qwen2-7B with a total of 8B parameters
|
||||
tags:
|
||||
- llm
|
||||
- multimodal
|
||||
- gguf
|
||||
- gpu
|
||||
- llama3
|
||||
- cpu
|
||||
overrides:
|
||||
mmproj: minicpm-v-2_6-mmproj-f16.gguf
|
||||
parameters:
|
||||
model: minicpm-v-2_6-Q4_K_M.gguf
|
||||
files:
|
||||
- filename: minicpm-v-2_6-Q4_K_M.gguf
|
||||
sha256: 3a4078d53b46f22989adbf998ce5a3fd090b6541f112d7e936eb4204a04100b1
|
||||
uri: huggingface://openbmb/MiniCPM-V-2_6-gguf/ggml-model-Q4_K_M.gguf
|
||||
- filename: minicpm-v-2_6-mmproj-f16.gguf
|
||||
sha256: f8a805e9e62085805c69c427287acefc284932eb4abfe6e1b1ce431d27e2f4e0
|
||||
uri: huggingface://openbmb/MiniCPM-V-2_6-gguf/mmproj-model-f16.gguf
|
||||
- !!merge <<: *llama3
|
||||
name: "llama-3-cursedstock-v1.8-8b-iq-imatrix"
|
||||
urls:
|
||||
@ -9305,7 +9409,6 @@
|
||||
June 18, 2024 Update, After extensive testing of the intermediate checkpoints, significant progress has been made.
|
||||
The model is slowly — I mean, really slowly — unlearning its alignment. By significantly lowering the learning rate, I was able to visibly observe deep behavioral changes, this process is taking longer than anticipated, but it's going to be worth it. Estimated time to completion: 4 more days.. I'm pleased to report that in several tests, the model not only maintained its intelligence but actually showed a slight improvement, especially in terms of common sense. An intermediate checkpoint of this model was used to create invisietch/EtherealRainbow-v0.3-rc7, with promising results. Currently, it seems like I'm on the right track. I hope this model will serve as a solid foundation for further merges, whether for role-playing (RP) or for uncensoring. This approach also allows us to save on actual fine-tuning, thereby reducing our carbon footprint. The merge process takes just a few minutes of CPU time, instead of days of GPU work.
|
||||
June 20, 2024 Update, Unaligning was partially successful, and the results are decent, but I am not fully satisfied. I decided to bite the bullet, and do a full finetune, god have mercy on my GPUs. I am also releasing the intermediate checkpoint of this model.
|
||||
icon: https://i.imgur.com/Kpk1PgZ.png
|
||||
overrides:
|
||||
parameters:
|
||||
model: LLAMA-3_8B_Unaligned_Alpha-Q4_K_M.gguf
|
||||
@ -9331,7 +9434,6 @@
|
||||
uri: huggingface://bartowski/L3-8B-Lunaris-v1-GGUF/L3-8B-Lunaris-v1-Q4_K_M.gguf
|
||||
- !!merge <<: *llama3
|
||||
name: "llama-3_8b_unaligned_alpha_rp_soup-i1"
|
||||
icon: https://i.imgur.com/pXcjpoV.png
|
||||
urls:
|
||||
- https://huggingface.co/SicariusSicariiStuff/LLAMA-3_8B_Unaligned_Alpha_RP_Soup
|
||||
- https://huggingface.co/mradermacher/LLAMA-3_8B_Unaligned_Alpha_RP_Soup-i1-GGUF
|
||||
@ -9681,8 +9783,7 @@
|
||||
- filename: Freyja-v4.95-maldv-7b-NON-FICTION.i1-Q4_K_M.gguf
|
||||
sha256: cdc0f4de6df2ba120835fbd25c2a0ae2af8548f46d2c40c7a018c51c3d19e0c0
|
||||
uri: huggingface://mradermacher/Freyja-v4.95-maldv-7b-NON-FICTION-i1-GGUF/Freyja-v4.95-maldv-7b-NON-FICTION.i1-Q4_K_M.gguf
|
||||
- &chatml
|
||||
### ChatML
|
||||
- &chatml ### ChatML
|
||||
url: "github:mudler/LocalAI/gallery/chatml.yaml@master"
|
||||
name: "una-thepitbull-21.4b-v2"
|
||||
license: afl-3.0
|
||||
@ -9730,7 +9831,6 @@
|
||||
sha256: 9c90f3a65332a03a6cbb563eee19c7586d9544f646ff9f33f7f1904b3d415ae2
|
||||
uri: huggingface://nold/HelpingAI-9B-GGUF/HelpingAI-9B_Q4_K_M.gguf
|
||||
- url: "github:mudler/LocalAI/gallery/chatml-hercules.yaml@master"
|
||||
icon: "https://tse3.mm.bing.net/th/id/OIG1.vnrl3xpEcypR3McLW63q?pid=ImgGn"
|
||||
urls:
|
||||
- https://huggingface.co/Locutusque/Llama-3-Hercules-5.0-8B
|
||||
- https://huggingface.co/bartowski/Llama-3-Hercules-5.0-8B-GGUF
|
||||
@ -9968,8 +10068,7 @@
|
||||
- filename: Triangulum-10B.Q4_K_M.gguf
|
||||
sha256: dd071f99edf6b166044bf229cdeec19419c4c348e3fc3d6587cfcc55e6fb85fa
|
||||
uri: huggingface://mradermacher/Triangulum-10B-GGUF/Triangulum-10B.Q4_K_M.gguf
|
||||
- &command-R
|
||||
### START Command-r
|
||||
- &command-R ### START Command-r
|
||||
url: "github:mudler/LocalAI/gallery/command-r.yaml@master"
|
||||
name: "command-r-v01:q1_s"
|
||||
license: "cc-by-nc-4.0"
|
||||
@ -10024,8 +10123,7 @@
|
||||
- filename: "aya-23-35B-Q4_K_M.gguf"
|
||||
sha256: "57824768c1a945e21e028c8e9a29b39adb4838d489f5865c82601ab9ad98065d"
|
||||
uri: "huggingface://bartowski/aya-23-35B-GGUF/aya-23-35B-Q4_K_M.gguf"
|
||||
- &phi-2-chat
|
||||
### START Phi-2
|
||||
- &phi-2-chat ### START Phi-2
|
||||
url: "github:mudler/LocalAI/gallery/phi-2-chat.yaml@master"
|
||||
license: mit
|
||||
description: |
|
||||
@ -10054,6 +10152,7 @@
|
||||
- llama2
|
||||
- cpu
|
||||
name: "phi-2-chat:Q8_0"
|
||||
icon: https://avatars.githubusercontent.com/u/6154722
|
||||
overrides:
|
||||
parameters:
|
||||
model: phi-2-layla-v1-chatml-Q8_0.gguf
|
||||
@ -10100,7 +10199,7 @@
|
||||
urls:
|
||||
- https://huggingface.co/internlm/internlm2_5-7b-chat-1m
|
||||
- https://huggingface.co/bartowski/internlm2_5-7b-chat-1m-GGUF
|
||||
icon: https://github.com/InternLM/InternLM/assets/22529082/b9788105-8892-4398-8b47-b513a292378e
|
||||
icon: https://avatars.githubusercontent.com/u/135356492
|
||||
tags:
|
||||
- internlm2
|
||||
- gguf
|
||||
@ -10121,10 +10220,35 @@
|
||||
- filename: internlm2_5-7b-chat-1m-Q4_K_M.gguf
|
||||
uri: huggingface://bartowski/internlm2_5-7b-chat-1m-GGUF/internlm2_5-7b-chat-1m-Q4_K_M.gguf
|
||||
sha256: 10d5e18a4125f9d4d74a9284a21e0c820b150af06dee48665e54ff6e1be3a564
|
||||
- &phi-3
|
||||
### START Phi-3
|
||||
### Internlm3
|
||||
- name: "internlm3-8b-instruct"
|
||||
url: "github:mudler/LocalAI/gallery/chatml.yaml@master"
|
||||
urls:
|
||||
- https://huggingface.co/internlm/internlm3-8b-instruct
|
||||
- https://huggingface.co/bartowski/internlm3-8b-instruct-GGUF
|
||||
icon: https://avatars.githubusercontent.com/u/135356492
|
||||
tags:
|
||||
- internlm3
|
||||
- gguf
|
||||
- cpu
|
||||
- gpu
|
||||
description: |
|
||||
InternLM3 has open-sourced an 8-billion parameter instruction model, InternLM3-8B-Instruct, designed for general-purpose usage and advanced reasoning. The model has the following characteristics:
|
||||
|
||||
Enhanced performance at reduced cost: State-of-the-art performance on reasoning and knowledge-intensive tasks surpass models like Llama3.1-8B and Qwen2.5-7B.
|
||||
|
||||
Deep thinking capability: InternLM3 supports both the deep thinking mode for solving complicated reasoning tasks via the long chain-of-thought and the normal response mode for fluent user interactions.
|
||||
overrides:
|
||||
parameters:
|
||||
model: internlm3-8b-instruct-Q4_K_M.gguf
|
||||
files:
|
||||
- filename: internlm3-8b-instruct-Q4_K_M.gguf
|
||||
uri: huggingface://bartowski/internlm3-8b-instruct-GGUF/internlm3-8b-instruct-Q4_K_M.gguf
|
||||
sha256: 2a9644687318e8659c9cf9b40730d5cc2f5af06f786a50439c7c51359b23896e
|
||||
- &phi-3 ### START Phi-3
|
||||
url: "github:mudler/LocalAI/gallery/phi-3-chat.yaml@master"
|
||||
name: "phi-3-mini-4k-instruct"
|
||||
icon: https://avatars.githubusercontent.com/u/6154722
|
||||
license: mit
|
||||
description: |
|
||||
The Phi-3-Mini-4K-Instruct is a 3.8B parameters, lightweight, state-of-the-art open model trained with the Phi-3 datasets that includes both synthetic data and the filtered publicly available websites data with a focus on high-quality and reasoning dense properties. The model belongs to the Phi-3 family with the Mini version in two variants 4K and 128K which is the context length (in tokens) it can support. The model has underwent a post-training process that incorporates both supervised fine-tuning and direct preference optimization to ensure precise instruction adherence and robust safety measures. When assessed against benchmarks testing common sense, language understanding, math, code, long context and logical reasoning, Phi-3 Mini-4K-Instruct showcased a robust and state-of-the-art performance among models with less than 13 billion parameters.
|
||||
@ -10321,8 +10445,7 @@
|
||||
- filename: Phi-3.5-MoE-instruct-Q4_K_M.gguf
|
||||
sha256: 43e91bb720869bd8a92d8eb86bc3c74a52c49cf61642ca709b3d7bb89644df36
|
||||
uri: huggingface://bartowski/Phi-3.5-MoE-instruct-GGUF/Phi-3.5-MoE-instruct-Q4_K_M.gguf
|
||||
- &hermes-2-pro-mistral
|
||||
### START Hermes
|
||||
- &hermes-2-pro-mistral ### START Hermes
|
||||
url: "github:mudler/LocalAI/gallery/hermes-2-pro-mistral.yaml@master"
|
||||
name: "hermes-2-pro-mistral"
|
||||
icon: https://cdn-uploads.huggingface.co/production/uploads/6317aade83d8d2fd903192d9/ggO2sBDJ8Bhc6w-zwTx5j.png
|
||||
@ -10658,8 +10781,7 @@
|
||||
- filename: "galatolo-Q4_K.gguf"
|
||||
sha256: "ca0cfd5a9ad40dc16416aa3a277015d0299b62c0803b67f5709580042202c172"
|
||||
uri: "huggingface://galatolo/cerbero-7b-gguf/ggml-model-Q4_K.gguf"
|
||||
- &codellama
|
||||
### START Codellama
|
||||
- &codellama ### START Codellama
|
||||
url: "github:mudler/LocalAI/gallery/codellama.yaml@master"
|
||||
name: "codellama-7b"
|
||||
license: llama2
|
||||
@ -10790,8 +10912,7 @@
|
||||
- filename: "llm-compiler-7b-ftd.Q4_K.gguf"
|
||||
uri: "huggingface://legraphista/llm-compiler-7b-ftd-IMat-GGUF/llm-compiler-7b-ftd.Q4_K.gguf"
|
||||
sha256: d862dd18ed335413787d0ad196522a9902a3c10a6456afdab8721822cb0ddde8
|
||||
- &openvino
|
||||
### START OpenVINO
|
||||
- &openvino ### START OpenVINO
|
||||
url: "github:mudler/LocalAI/gallery/openvino.yaml@master"
|
||||
name: "openvino-llama-3-8b-instruct-ov-int8"
|
||||
license: llama3
|
||||
@ -10905,8 +11026,7 @@
|
||||
- gpu
|
||||
- embedding
|
||||
- cpu
|
||||
- &sentencentransformers
|
||||
### START Embeddings
|
||||
- &sentencentransformers ### START Embeddings
|
||||
description: |
|
||||
This framework provides an easy method to compute dense vector representations for sentences, paragraphs, and images. The models are based on transformer networks like BERT / RoBERTa / XLM-RoBERTa etc. and achieve state-of-the-art performance in various tasks. Text is embedded in vector space such that similar text are closer and can efficiently be found using cosine similarity.
|
||||
urls:
|
||||
@ -10921,8 +11041,7 @@
|
||||
overrides:
|
||||
parameters:
|
||||
model: all-MiniLM-L6-v2
|
||||
- &dreamshaper
|
||||
### START Image generation
|
||||
- &dreamshaper ### START Image generation
|
||||
name: dreamshaper
|
||||
icon: https://image.civitai.com/xG1nkqKTMzGDvpLrqFT7WA/dd9b038c-bd15-43ab-86ab-66e145ad7ff2/width=450/26072158-132340247-8k%20portrait%20of%20beautiful%20cyborg%20with%20brown%20hair,%20intricate,%20elegant,%20highly%20detailed,%20majestic,%20digital%20photography,%20art%20by%20artg_ed.jpeg
|
||||
license: other
|
||||
@ -10959,6 +11078,92 @@
|
||||
- sd-3
|
||||
- gpu
|
||||
url: "github:mudler/LocalAI/gallery/stablediffusion3.yaml@master"
|
||||
- name: sd-1.5-ggml
|
||||
license: creativeml-openrail-m
|
||||
url: "github:mudler/LocalAI/gallery/sd-ggml.yaml@master"
|
||||
description: |
|
||||
Stable Diffusion 1.5
|
||||
urls:
|
||||
- https://huggingface.co/second-state/stable-diffusion-v1-5-GGUF
|
||||
tags:
|
||||
- text-to-image
|
||||
- stablediffusion
|
||||
- gpu
|
||||
- cpu
|
||||
overrides:
|
||||
options:
|
||||
- "sampler:euler"
|
||||
parameters:
|
||||
model: stable-diffusion-v1-5-pruned-emaonly-Q4_0.gguf
|
||||
files:
|
||||
- filename: "stable-diffusion-v1-5-pruned-emaonly-Q4_0.gguf"
|
||||
sha256: "b8944e9fe0b69b36ae1b5bb0185b3a7b8ef14347fe0fa9af6c64c4829022261f"
|
||||
uri: "huggingface://second-state/stable-diffusion-v1-5-GGUF/stable-diffusion-v1-5-pruned-emaonly-Q4_0.gguf"
|
||||
- name: sd-3.5-medium-ggml
|
||||
license: stabilityai-ai-community
|
||||
url: "github:mudler/LocalAI/gallery/sd-ggml.yaml@master"
|
||||
description: |
|
||||
Stable Diffusion 3.5 Medium is a Multimodal Diffusion Transformer (MMDiT) text-to-image model that features improved performance in image quality, typography, complex prompt understanding, and resource-efficiency.
|
||||
urls:
|
||||
- https://huggingface.co/stabilityai/stable-diffusion-3.5-medium
|
||||
- https://huggingface.co/second-state/stable-diffusion-3.5-medium-GGUF
|
||||
tags:
|
||||
- text-to-image
|
||||
- stablediffusion
|
||||
- gpu
|
||||
- cpu
|
||||
icon: https://huggingface.co/stabilityai/stable-diffusion-3.5-medium/media/main/sd3.5_medium_demo.jpg
|
||||
overrides:
|
||||
options:
|
||||
- "clip_l_path:clip_l-Q4_0.gguf"
|
||||
- "clip_g_path:clip_g-Q4_0.gguf"
|
||||
- "t5xxl_path:t5xxl-Q4_0.gguf"
|
||||
- "sampler:euler"
|
||||
parameters:
|
||||
model: sd3.5_medium-Q4_0.gguf
|
||||
files:
|
||||
- filename: "sd3.5_medium-Q4_0.gguf"
|
||||
sha256: "3bb8c5e9ab0a841117089ed4ed81d885bb85161df2a766b812f829bc55b31adf"
|
||||
uri: "huggingface://second-state/stable-diffusion-3.5-medium-GGUF/sd3.5_medium-Q4_0.gguf"
|
||||
- filename: clip_g-Q4_0.gguf
|
||||
sha256: c142411147e16b7c4b9cc1f5d977cbe596104435d76fde47172d3d35c5e58bb8
|
||||
uri: huggingface://second-state/stable-diffusion-3.5-medium-GGUF/clip_g-Q4_0.gguf
|
||||
- filename: clip_l-Q4_0.gguf
|
||||
sha256: f5ad88ae2ac924eb4ac0298b77afa304b5e6014fc0c4128f0e3df40fdfcc0f8a
|
||||
uri: huggingface://second-state/stable-diffusion-3.5-medium-GGUF/clip_l-Q4_0.gguf
|
||||
- filename: t5xxl-Q4_0.gguf
|
||||
sha256: 987ba47c158b890c274f78fd35324419f50941e846a49789f0977e9fe9d97ab7
|
||||
uri: huggingface://second-state/stable-diffusion-3.5-medium-GGUF/t5xxl-Q4_0.gguf
|
||||
- name: sd-3.5-large-ggml
|
||||
license: stabilityai-ai-community
|
||||
url: "github:mudler/LocalAI/gallery/sd-ggml.yaml@master"
|
||||
description: |
|
||||
Stable Diffusion 3.5 Large is a Multimodal Diffusion Transformer (MMDiT) text-to-image model that features improved performance in image quality, typography, complex prompt understanding, and resource-efficiency.
|
||||
urls:
|
||||
- https://huggingface.co/stabilityai/stable-diffusion-3.5-large
|
||||
- https://huggingface.co/second-state/stable-diffusion-3.5-large-GGUF
|
||||
tags:
|
||||
- text-to-image
|
||||
- stablediffusion
|
||||
- gpu
|
||||
- cpu
|
||||
icon: https://huggingface.co/stabilityai/stable-diffusion-3.5-large/media/main/sd3.5_large_demo.png
|
||||
overrides:
|
||||
parameters:
|
||||
model: sd3.5_large-Q4_0.gguf
|
||||
files:
|
||||
- filename: "sd3.5_large-Q4_0.gguf"
|
||||
sha256: "c79ed6cdaa7decaca6b05ccc636b956b37c47de9b104c56315ca8ed086347b00"
|
||||
uri: "huggingface://second-state/stable-diffusion-3.5-large-GGUF/sd3.5_large-Q4_0.gguf"
|
||||
- filename: clip_g.safetensors
|
||||
sha256: ec310df2af79c318e24d20511b601a591ca8cd4f1fce1d8dff822a356bcdb1f4
|
||||
uri: huggingface://second-state/stable-diffusion-3.5-large-GGUF/clip_g.safetensors
|
||||
- filename: clip_l.safetensors
|
||||
sha256: 660c6f5b1abae9dc498ac2d21e1347d2abdb0cf6c0c0c8576cd796491d9a6cdd
|
||||
uri: huggingface://second-state/stable-diffusion-3.5-large-GGUF/clip_l.safetensors
|
||||
- filename: t5xxl-Q5_0.gguf
|
||||
sha256: f4df16c641a05c4a6ca717068ba3ee312875000f6fac0efbd152915553b5fc3e
|
||||
uri: huggingface://second-state/stable-diffusion-3.5-large-GGUF/t5xxl-Q5_0.gguf
|
||||
- &flux
|
||||
name: flux.1-dev
|
||||
license: flux-1-dev-non-commercial-license
|
||||
@ -11034,8 +11239,7 @@
|
||||
- filename: t5xxl_fp16.safetensors
|
||||
sha256: 6e480b09fae049a72d2a8c5fbccb8d3e92febeb233bbe9dfe7256958a9167635
|
||||
uri: https://huggingface.co/comfyanonymous/flux_text_encoders/resolve/main/t5xxl_fp16.safetensors
|
||||
- &whisper
|
||||
## Whisper
|
||||
- &whisper ## Whisper
|
||||
url: "github:mudler/LocalAI/gallery/whisper-base.yaml@master"
|
||||
name: "whisper-1"
|
||||
license: "MIT"
|
||||
@ -11215,8 +11419,7 @@
|
||||
description: |
|
||||
Stable Diffusion in NCNN with c++, supported txt2img and img2img
|
||||
name: stablediffusion-cpp
|
||||
- &piper
|
||||
## Piper TTS
|
||||
- &piper ## Piper TTS
|
||||
url: github:mudler/LocalAI/gallery/piper.yaml@master
|
||||
name: voice-en-us-kathleen-low
|
||||
icon: https://github.com/rhasspy/piper/raw/master/etc/logo.png
|
||||
|
12
gallery/sd-ggml.yaml
Normal file
12
gallery/sd-ggml.yaml
Normal file
@ -0,0 +1,12 @@
|
||||
---
|
||||
name: "sd-ggml"
|
||||
|
||||
config_file: |
|
||||
backend: stablediffusion-ggml
|
||||
step: 25
|
||||
cfg_scale: 4.5
|
||||
options:
|
||||
- "clip_l_path:clip_l.safetensors"
|
||||
- "clip_g_path:clip_g.safetensors"
|
||||
- "t5xxl_path:t5xxl-Q5_0.gguf"
|
||||
- "sampler:euler"
|
@ -34,7 +34,7 @@ type Tool struct {
|
||||
}
|
||||
type Tools []Tool
|
||||
|
||||
// ToJSONNameStructure converts a list of functions to a JSON structure that can be parsed to a grammar
|
||||
// ToJSONStructure converts a list of functions to a JSON structure that can be parsed to a grammar
|
||||
// This allows the LLM to return a response of the type: { "name": "function_name", "arguments": { "arg1": "value1", "arg2": "value2" } }
|
||||
func (f Functions) ToJSONStructure(name, args string) JSONFunctionStructure {
|
||||
nameKey := defaultFunctionNameKey
|
||||
|
@ -29,11 +29,14 @@ var Aliases map[string]string = map[string]string{
|
||||
"langchain-huggingface": LCHuggingFaceBackend,
|
||||
"transformers-musicgen": TransformersBackend,
|
||||
"sentencetransformers": TransformersBackend,
|
||||
"mamba": TransformersBackend,
|
||||
"stablediffusion": StableDiffusionGGMLBackend,
|
||||
}
|
||||
|
||||
var TypeAlias map[string]string = map[string]string{
|
||||
"sentencetransformers": "SentenceTransformer",
|
||||
"huggingface-embeddings": "SentenceTransformer",
|
||||
"mamba": "Mamba",
|
||||
"transformers-musicgen": "MusicgenForConditionalGeneration",
|
||||
}
|
||||
|
||||
@ -54,10 +57,10 @@ const (
|
||||
|
||||
LLamaCPPGRPC = "llama-cpp-grpc"
|
||||
|
||||
WhisperBackend = "whisper"
|
||||
StableDiffusionBackend = "stablediffusion"
|
||||
PiperBackend = "piper"
|
||||
LCHuggingFaceBackend = "huggingface"
|
||||
WhisperBackend = "whisper"
|
||||
StableDiffusionGGMLBackend = "stablediffusion-ggml"
|
||||
PiperBackend = "piper"
|
||||
LCHuggingFaceBackend = "huggingface"
|
||||
|
||||
TransformersBackend = "transformers"
|
||||
LocalStoreBackend = "local-store"
|
||||
|
@ -1,35 +0,0 @@
|
||||
//go:build stablediffusion
|
||||
// +build stablediffusion
|
||||
|
||||
package stablediffusion
|
||||
|
||||
import (
|
||||
stableDiffusion "github.com/mudler/go-stable-diffusion"
|
||||
)
|
||||
|
||||
func GenerateImage(height, width, mode, step, seed int, positive_prompt, negative_prompt, dst, asset_dir string) error {
|
||||
if height > 512 || width > 512 {
|
||||
return stableDiffusion.GenerateImageUpscaled(
|
||||
height,
|
||||
width,
|
||||
step,
|
||||
seed,
|
||||
positive_prompt,
|
||||
negative_prompt,
|
||||
dst,
|
||||
asset_dir,
|
||||
)
|
||||
}
|
||||
return stableDiffusion.GenerateImage(
|
||||
height,
|
||||
width,
|
||||
mode,
|
||||
step,
|
||||
seed,
|
||||
positive_prompt,
|
||||
negative_prompt,
|
||||
dst,
|
||||
"",
|
||||
asset_dir,
|
||||
)
|
||||
}
|
@ -1,10 +0,0 @@
|
||||
//go:build !stablediffusion
|
||||
// +build !stablediffusion
|
||||
|
||||
package stablediffusion
|
||||
|
||||
import "fmt"
|
||||
|
||||
func GenerateImage(height, width, mode, step, seed int, positive_prompt, negative_prompt, dst, asset_dir string) error {
|
||||
return fmt.Errorf("This version of LocalAI was built without the stablediffusion tag")
|
||||
}
|
@ -1,20 +0,0 @@
|
||||
package stablediffusion
|
||||
|
||||
import "os"
|
||||
|
||||
type StableDiffusion struct {
|
||||
assetDir string
|
||||
}
|
||||
|
||||
func New(assetDir string) (*StableDiffusion, error) {
|
||||
if _, err := os.Stat(assetDir); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
return &StableDiffusion{
|
||||
assetDir: assetDir,
|
||||
}, nil
|
||||
}
|
||||
|
||||
func (s *StableDiffusion) GenerateImage(height, width, mode, step, seed int, positive_prompt, negative_prompt, dst string) error {
|
||||
return GenerateImage(height, width, mode, step, seed, positive_prompt, negative_prompt, dst, s.assetDir)
|
||||
}
|
@ -1645,6 +1645,9 @@ const docTemplate = `{
|
||||
"prompt": {
|
||||
"description": "Prompt is read only by completion/image API calls"
|
||||
},
|
||||
"quality": {
|
||||
"type": "string"
|
||||
},
|
||||
"repeat_last_n": {
|
||||
"type": "integer"
|
||||
},
|
||||
|
@ -1638,6 +1638,9 @@
|
||||
"prompt": {
|
||||
"description": "Prompt is read only by completion/image API calls"
|
||||
},
|
||||
"quality": {
|
||||
"type": "string"
|
||||
},
|
||||
"repeat_last_n": {
|
||||
"type": "integer"
|
||||
},
|
||||
|
@ -570,6 +570,8 @@ definitions:
|
||||
type: number
|
||||
prompt:
|
||||
description: Prompt is read only by completion/image API calls
|
||||
quality:
|
||||
type: string
|
||||
repeat_last_n:
|
||||
type: integer
|
||||
repeat_penalty:
|
||||
|
@ -54,7 +54,7 @@ var _ = BeforeSuite(func() {
|
||||
Eventually(func() error {
|
||||
_, err := client.ListModels(context.TODO())
|
||||
return err
|
||||
}, "20m").ShouldNot(HaveOccurred())
|
||||
}, "50m").ShouldNot(HaveOccurred())
|
||||
})
|
||||
|
||||
var _ = AfterSuite(func() {
|
||||
|
@ -123,8 +123,9 @@ var _ = Describe("E2E test", func() {
|
||||
It("correctly", func() {
|
||||
resp, err := client.CreateImage(context.TODO(),
|
||||
openai.ImageRequest{
|
||||
Prompt: "test",
|
||||
Size: openai.CreateImageSize512x512,
|
||||
Prompt: "test",
|
||||
Quality: "1",
|
||||
Size: openai.CreateImageSize256x256,
|
||||
},
|
||||
)
|
||||
Expect(err).ToNot(HaveOccurred())
|
||||
@ -135,7 +136,8 @@ var _ = Describe("E2E test", func() {
|
||||
resp, err := client.CreateImage(context.TODO(),
|
||||
openai.ImageRequest{
|
||||
Prompt: "test",
|
||||
Size: openai.CreateImageSize512x512,
|
||||
Size: openai.CreateImageSize256x256,
|
||||
Quality: "1",
|
||||
ResponseFormat: openai.CreateImageResponseFormatURL,
|
||||
},
|
||||
)
|
||||
@ -147,7 +149,8 @@ var _ = Describe("E2E test", func() {
|
||||
resp, err := client.CreateImage(context.TODO(),
|
||||
openai.ImageRequest{
|
||||
Prompt: "test",
|
||||
Size: openai.CreateImageSize512x512,
|
||||
Size: openai.CreateImageSize256x256,
|
||||
Quality: "1",
|
||||
ResponseFormat: openai.CreateImageResponseFormatB64JSON,
|
||||
},
|
||||
)
|
||||
|
@ -4,6 +4,7 @@ import (
|
||||
"context"
|
||||
"embed"
|
||||
"math"
|
||||
"math/rand"
|
||||
"os"
|
||||
"path/filepath"
|
||||
|
||||
@ -22,6 +23,19 @@ import (
|
||||
//go:embed backend-assets/*
|
||||
var backendAssets embed.FS
|
||||
|
||||
func normalize(vecs [][]float32) {
|
||||
for i, k := range vecs {
|
||||
norm := float64(0)
|
||||
for _, x := range k {
|
||||
norm += float64(x * x)
|
||||
}
|
||||
norm = math.Sqrt(norm)
|
||||
for j, x := range k {
|
||||
vecs[i][j] = x / float32(norm)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
var _ = Describe("Integration tests for the stores backend(s) and internal APIs", Label("stores"), func() {
|
||||
Context("Embedded Store get,set and delete", func() {
|
||||
var sl *model.ModelLoader
|
||||
@ -192,17 +206,8 @@ var _ = Describe("Integration tests for the stores backend(s) and internal APIs"
|
||||
// set 3 vectors that are at varying angles to {0.5, 0.5, 0.5}
|
||||
keys := [][]float32{{0.1, 0.3, 0.5}, {0.5, 0.5, 0.5}, {0.6, 0.6, -0.6}, {0.7, -0.7, -0.7}}
|
||||
vals := [][]byte{[]byte("test0"), []byte("test1"), []byte("test2"), []byte("test3")}
|
||||
// normalize the keys
|
||||
for i, k := range keys {
|
||||
norm := float64(0)
|
||||
for _, x := range k {
|
||||
norm += float64(x * x)
|
||||
}
|
||||
norm = math.Sqrt(norm)
|
||||
for j, x := range k {
|
||||
keys[i][j] = x / float32(norm)
|
||||
}
|
||||
}
|
||||
|
||||
normalize(keys)
|
||||
|
||||
err := store.SetCols(context.Background(), sc, keys, vals)
|
||||
Expect(err).ToNot(HaveOccurred())
|
||||
@ -225,5 +230,121 @@ var _ = Describe("Integration tests for the stores backend(s) and internal APIs"
|
||||
Expect(ks[1]).To(Equal(keys[1]))
|
||||
Expect(vals[1]).To(Equal(vals[1]))
|
||||
})
|
||||
|
||||
It("It produces the correct cosine similarities for orthogonal and opposite unit vectors", func() {
|
||||
keys := [][]float32{{1.0, 0.0, 0.0}, {0.0, 1.0, 0.0}, {0.0, 0.0, 1.0}, {-1.0, 0.0, 0.0}}
|
||||
vals := [][]byte{[]byte("x"), []byte("y"), []byte("z"), []byte("-z")}
|
||||
|
||||
err := store.SetCols(context.Background(), sc, keys, vals);
|
||||
Expect(err).ToNot(HaveOccurred())
|
||||
|
||||
_, _, sims, err := store.Find(context.Background(), sc, keys[0], 4)
|
||||
Expect(err).ToNot(HaveOccurred())
|
||||
Expect(sims).To(Equal([]float32{1.0, 0.0, 0.0, -1.0}))
|
||||
})
|
||||
|
||||
It("It produces the correct cosine similarities for orthogonal and opposite vectors", func() {
|
||||
keys := [][]float32{{1.0, 0.0, 1.0}, {0.0, 2.0, 0.0}, {0.0, 0.0, -1.0}, {-1.0, 0.0, -1.0}}
|
||||
vals := [][]byte{[]byte("x"), []byte("y"), []byte("z"), []byte("-z")}
|
||||
|
||||
err := store.SetCols(context.Background(), sc, keys, vals);
|
||||
Expect(err).ToNot(HaveOccurred())
|
||||
|
||||
_, _, sims, err := store.Find(context.Background(), sc, keys[0], 4)
|
||||
Expect(err).ToNot(HaveOccurred())
|
||||
Expect(sims[0]).To(BeNumerically("~", 1, 0.1))
|
||||
Expect(sims[1]).To(BeNumerically("~", 0, 0.1))
|
||||
Expect(sims[2]).To(BeNumerically("~", -0.7, 0.1))
|
||||
Expect(sims[3]).To(BeNumerically("~", -1, 0.1))
|
||||
})
|
||||
|
||||
expectTriangleEq := func(keys [][]float32, vals [][]byte) {
|
||||
sims := map[string]map[string]float32{}
|
||||
|
||||
// compare every key vector pair and store the similarities in a lookup table
|
||||
// that uses the values as keys
|
||||
for i, k := range keys {
|
||||
_, valsk, simsk, err := store.Find(context.Background(), sc, k, 9)
|
||||
Expect(err).ToNot(HaveOccurred())
|
||||
|
||||
for j, v := range valsk {
|
||||
p := string(vals[i])
|
||||
q := string(v)
|
||||
|
||||
if sims[p] == nil {
|
||||
sims[p] = map[string]float32{}
|
||||
}
|
||||
|
||||
//log.Debug().Strs("vals", []string{p, q}).Float32("similarity", simsk[j]).Send()
|
||||
|
||||
sims[p][q] = simsk[j]
|
||||
}
|
||||
}
|
||||
|
||||
// Check that the triangle inequality holds for every combination of the triplet
|
||||
// u, v and w
|
||||
for _, simsu := range sims {
|
||||
for w, simw := range simsu {
|
||||
// acos(u,w) <= ...
|
||||
uws := math.Acos(float64(simw))
|
||||
|
||||
// ... acos(u,v) + acos(v,w)
|
||||
for v, _ := range simsu {
|
||||
uvws := math.Acos(float64(simsu[v])) + math.Acos(float64(sims[v][w]))
|
||||
|
||||
//log.Debug().Str("u", u).Str("v", v).Str("w", w).Send()
|
||||
//log.Debug().Float32("uw", simw).Float32("uv", simsu[v]).Float32("vw", sims[v][w]).Send()
|
||||
Expect(uws).To(BeNumerically("<=", uvws))
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
It("It obeys the triangle inequality for normalized values", func() {
|
||||
keys := [][]float32{
|
||||
{1.0, 0.0, 0.0}, {0.0, 1.0, 0.0}, {0.0, 0.0, 1.0},
|
||||
{-1.0, 0.0, 0.0}, {0.0, -1.0, 0.0}, {0.0, 0.0, -1.0},
|
||||
{2.0, 3.0, 4.0}, {9.0, 7.0, 1.0}, {0.0, -1.2, 2.3},
|
||||
}
|
||||
vals := [][]byte{
|
||||
[]byte("x"), []byte("y"), []byte("z"),
|
||||
[]byte("-x"), []byte("-y"), []byte("-z"),
|
||||
[]byte("u"), []byte("v"), []byte("w"),
|
||||
}
|
||||
|
||||
normalize(keys[6:])
|
||||
|
||||
err := store.SetCols(context.Background(), sc, keys, vals);
|
||||
Expect(err).ToNot(HaveOccurred())
|
||||
|
||||
expectTriangleEq(keys, vals)
|
||||
})
|
||||
|
||||
It("It obeys the triangle inequality", func() {
|
||||
rnd := rand.New(rand.NewSource(151))
|
||||
keys := make([][]float32, 20)
|
||||
vals := make([][]byte, 20)
|
||||
|
||||
for i := range keys {
|
||||
k := make([]float32, 768)
|
||||
|
||||
for j := range k {
|
||||
k[j] = rnd.Float32()
|
||||
}
|
||||
|
||||
keys[i] = k
|
||||
}
|
||||
|
||||
c := byte('a')
|
||||
for i := range vals {
|
||||
vals[i] = []byte{c}
|
||||
c += 1
|
||||
}
|
||||
|
||||
err := store.SetCols(context.Background(), sc, keys, vals);
|
||||
Expect(err).ToNot(HaveOccurred())
|
||||
|
||||
expectTriangleEq(keys, vals)
|
||||
})
|
||||
})
|
||||
})
|
||||
|
Loading…
x
Reference in New Issue
Block a user