mirror of
https://github.com/mudler/LocalAI.git
synced 2024-12-20 21:23:10 +00:00
Merge branch 'master' into fix-pr-folder-tasks
This commit is contained in:
commit
171c55d7d5
@ -9,6 +9,7 @@
|
|||||||
# Param 2: email
|
# Param 2: email
|
||||||
#
|
#
|
||||||
config_user() {
|
config_user() {
|
||||||
|
echo "Configuring git for $1 <$2>"
|
||||||
local gcn=$(git config --global user.name)
|
local gcn=$(git config --global user.name)
|
||||||
if [ -z "${gcn}" ]; then
|
if [ -z "${gcn}" ]; then
|
||||||
echo "Setting up git user / remote"
|
echo "Setting up git user / remote"
|
||||||
@ -24,6 +25,7 @@ config_user() {
|
|||||||
# Param 2: remote url
|
# Param 2: remote url
|
||||||
#
|
#
|
||||||
config_remote() {
|
config_remote() {
|
||||||
|
echo "Adding git remote and fetching $2 as $1"
|
||||||
local gr=$(git remote -v | grep $1)
|
local gr=$(git remote -v | grep $1)
|
||||||
if [ -z "${gr}" ]; then
|
if [ -z "${gr}" ]; then
|
||||||
git remote add $1 $2
|
git remote add $1 $2
|
||||||
|
5
.github/check_and_update.py
vendored
5
.github/check_and_update.py
vendored
@ -29,8 +29,13 @@ def calculate_sha256(file_path):
|
|||||||
def manual_safety_check_hf(repo_id):
|
def manual_safety_check_hf(repo_id):
|
||||||
scanResponse = requests.get('https://huggingface.co/api/models/' + repo_id + "/scan")
|
scanResponse = requests.get('https://huggingface.co/api/models/' + repo_id + "/scan")
|
||||||
scan = scanResponse.json()
|
scan = scanResponse.json()
|
||||||
|
# Check if 'hasUnsafeFile' exists in the response
|
||||||
|
if 'hasUnsafeFile' in scan:
|
||||||
if scan['hasUnsafeFile']:
|
if scan['hasUnsafeFile']:
|
||||||
return scan
|
return scan
|
||||||
|
else:
|
||||||
|
return None
|
||||||
|
else:
|
||||||
return None
|
return None
|
||||||
|
|
||||||
download_type, repo_id_or_url = parse_uri(uri)
|
download_type, repo_id_or_url = parse_uri(uri)
|
||||||
|
117
.github/workflows/image.yml
vendored
117
.github/workflows/image.yml
vendored
@ -13,6 +13,78 @@ concurrency:
|
|||||||
cancel-in-progress: true
|
cancel-in-progress: true
|
||||||
|
|
||||||
jobs:
|
jobs:
|
||||||
|
hipblas-jobs:
|
||||||
|
uses: ./.github/workflows/image_build.yml
|
||||||
|
with:
|
||||||
|
tag-latest: ${{ matrix.tag-latest }}
|
||||||
|
tag-suffix: ${{ matrix.tag-suffix }}
|
||||||
|
ffmpeg: ${{ matrix.ffmpeg }}
|
||||||
|
image-type: ${{ matrix.image-type }}
|
||||||
|
build-type: ${{ matrix.build-type }}
|
||||||
|
cuda-major-version: ${{ matrix.cuda-major-version }}
|
||||||
|
cuda-minor-version: ${{ matrix.cuda-minor-version }}
|
||||||
|
platforms: ${{ matrix.platforms }}
|
||||||
|
runs-on: ${{ matrix.runs-on }}
|
||||||
|
base-image: ${{ matrix.base-image }}
|
||||||
|
grpc-base-image: ${{ matrix.grpc-base-image }}
|
||||||
|
aio: ${{ matrix.aio }}
|
||||||
|
makeflags: ${{ matrix.makeflags }}
|
||||||
|
latest-image: ${{ matrix.latest-image }}
|
||||||
|
latest-image-aio: ${{ matrix.latest-image-aio }}
|
||||||
|
secrets:
|
||||||
|
dockerUsername: ${{ secrets.DOCKERHUB_USERNAME }}
|
||||||
|
dockerPassword: ${{ secrets.DOCKERHUB_PASSWORD }}
|
||||||
|
quayUsername: ${{ secrets.LOCALAI_REGISTRY_USERNAME }}
|
||||||
|
quayPassword: ${{ secrets.LOCALAI_REGISTRY_PASSWORD }}
|
||||||
|
strategy:
|
||||||
|
# Pushing with all jobs in parallel
|
||||||
|
# eats the bandwidth of all the nodes
|
||||||
|
max-parallel: 2
|
||||||
|
matrix:
|
||||||
|
include:
|
||||||
|
- build-type: 'hipblas'
|
||||||
|
platforms: 'linux/amd64'
|
||||||
|
tag-latest: 'auto'
|
||||||
|
tag-suffix: '-hipblas-ffmpeg'
|
||||||
|
ffmpeg: 'true'
|
||||||
|
image-type: 'extras'
|
||||||
|
aio: "-aio-gpu-hipblas"
|
||||||
|
base-image: "rocm/dev-ubuntu-22.04:6.1"
|
||||||
|
grpc-base-image: "ubuntu:22.04"
|
||||||
|
latest-image: 'latest-gpu-hipblas'
|
||||||
|
latest-image-aio: 'latest-aio-gpu-hipblas'
|
||||||
|
runs-on: 'arc-runner-set'
|
||||||
|
makeflags: "--jobs=3 --output-sync=target"
|
||||||
|
- build-type: 'hipblas'
|
||||||
|
platforms: 'linux/amd64'
|
||||||
|
tag-latest: 'false'
|
||||||
|
tag-suffix: '-hipblas'
|
||||||
|
ffmpeg: 'false'
|
||||||
|
image-type: 'extras'
|
||||||
|
base-image: "rocm/dev-ubuntu-22.04:6.1"
|
||||||
|
grpc-base-image: "ubuntu:22.04"
|
||||||
|
runs-on: 'arc-runner-set'
|
||||||
|
makeflags: "--jobs=3 --output-sync=target"
|
||||||
|
- build-type: 'hipblas'
|
||||||
|
platforms: 'linux/amd64'
|
||||||
|
tag-latest: 'false'
|
||||||
|
tag-suffix: '-hipblas-ffmpeg-core'
|
||||||
|
ffmpeg: 'true'
|
||||||
|
image-type: 'core'
|
||||||
|
base-image: "rocm/dev-ubuntu-22.04:6.1"
|
||||||
|
grpc-base-image: "ubuntu:22.04"
|
||||||
|
runs-on: 'arc-runner-set'
|
||||||
|
makeflags: "--jobs=3 --output-sync=target"
|
||||||
|
- build-type: 'hipblas'
|
||||||
|
platforms: 'linux/amd64'
|
||||||
|
tag-latest: 'false'
|
||||||
|
tag-suffix: '-hipblas-core'
|
||||||
|
ffmpeg: 'false'
|
||||||
|
image-type: 'core'
|
||||||
|
base-image: "rocm/dev-ubuntu-22.04:6.1"
|
||||||
|
grpc-base-image: "ubuntu:22.04"
|
||||||
|
runs-on: 'arc-runner-set'
|
||||||
|
makeflags: "--jobs=3 --output-sync=target"
|
||||||
self-hosted-jobs:
|
self-hosted-jobs:
|
||||||
uses: ./.github/workflows/image_build.yml
|
uses: ./.github/workflows/image_build.yml
|
||||||
with:
|
with:
|
||||||
@ -39,7 +111,7 @@ jobs:
|
|||||||
strategy:
|
strategy:
|
||||||
# Pushing with all jobs in parallel
|
# Pushing with all jobs in parallel
|
||||||
# eats the bandwidth of all the nodes
|
# eats the bandwidth of all the nodes
|
||||||
max-parallel: ${{ github.event_name != 'pull_request' && 6 || 10 }}
|
max-parallel: ${{ github.event_name != 'pull_request' && 5 || 8 }}
|
||||||
matrix:
|
matrix:
|
||||||
include:
|
include:
|
||||||
# Extra images
|
# Extra images
|
||||||
@ -122,29 +194,6 @@ jobs:
|
|||||||
base-image: "ubuntu:22.04"
|
base-image: "ubuntu:22.04"
|
||||||
runs-on: 'arc-runner-set'
|
runs-on: 'arc-runner-set'
|
||||||
makeflags: "--jobs=3 --output-sync=target"
|
makeflags: "--jobs=3 --output-sync=target"
|
||||||
- build-type: 'hipblas'
|
|
||||||
platforms: 'linux/amd64'
|
|
||||||
tag-latest: 'auto'
|
|
||||||
tag-suffix: '-hipblas-ffmpeg'
|
|
||||||
ffmpeg: 'true'
|
|
||||||
image-type: 'extras'
|
|
||||||
aio: "-aio-gpu-hipblas"
|
|
||||||
base-image: "rocm/dev-ubuntu-22.04:6.1"
|
|
||||||
grpc-base-image: "ubuntu:22.04"
|
|
||||||
latest-image: 'latest-gpu-hipblas'
|
|
||||||
latest-image-aio: 'latest-aio-gpu-hipblas'
|
|
||||||
runs-on: 'arc-runner-set'
|
|
||||||
makeflags: "--jobs=3 --output-sync=target"
|
|
||||||
- build-type: 'hipblas'
|
|
||||||
platforms: 'linux/amd64'
|
|
||||||
tag-latest: 'false'
|
|
||||||
tag-suffix: '-hipblas'
|
|
||||||
ffmpeg: 'false'
|
|
||||||
image-type: 'extras'
|
|
||||||
base-image: "rocm/dev-ubuntu-22.04:6.1"
|
|
||||||
grpc-base-image: "ubuntu:22.04"
|
|
||||||
runs-on: 'arc-runner-set'
|
|
||||||
makeflags: "--jobs=3 --output-sync=target"
|
|
||||||
- build-type: 'sycl_f16'
|
- build-type: 'sycl_f16'
|
||||||
platforms: 'linux/amd64'
|
platforms: 'linux/amd64'
|
||||||
tag-latest: 'auto'
|
tag-latest: 'auto'
|
||||||
@ -212,26 +261,6 @@ jobs:
|
|||||||
image-type: 'core'
|
image-type: 'core'
|
||||||
runs-on: 'arc-runner-set'
|
runs-on: 'arc-runner-set'
|
||||||
makeflags: "--jobs=3 --output-sync=target"
|
makeflags: "--jobs=3 --output-sync=target"
|
||||||
- build-type: 'hipblas'
|
|
||||||
platforms: 'linux/amd64'
|
|
||||||
tag-latest: 'false'
|
|
||||||
tag-suffix: '-hipblas-ffmpeg-core'
|
|
||||||
ffmpeg: 'true'
|
|
||||||
image-type: 'core'
|
|
||||||
base-image: "rocm/dev-ubuntu-22.04:6.1"
|
|
||||||
grpc-base-image: "ubuntu:22.04"
|
|
||||||
runs-on: 'arc-runner-set'
|
|
||||||
makeflags: "--jobs=3 --output-sync=target"
|
|
||||||
- build-type: 'hipblas'
|
|
||||||
platforms: 'linux/amd64'
|
|
||||||
tag-latest: 'false'
|
|
||||||
tag-suffix: '-hipblas-core'
|
|
||||||
ffmpeg: 'false'
|
|
||||||
image-type: 'core'
|
|
||||||
base-image: "rocm/dev-ubuntu-22.04:6.1"
|
|
||||||
grpc-base-image: "ubuntu:22.04"
|
|
||||||
runs-on: 'arc-runner-set'
|
|
||||||
makeflags: "--jobs=3 --output-sync=target"
|
|
||||||
|
|
||||||
core-image-build:
|
core-image-build:
|
||||||
uses: ./.github/workflows/image_build.yml
|
uses: ./.github/workflows/image_build.yml
|
||||||
|
2
.github/workflows/secscan.yaml
vendored
2
.github/workflows/secscan.yaml
vendored
@ -18,7 +18,7 @@ jobs:
|
|||||||
if: ${{ github.actor != 'dependabot[bot]' }}
|
if: ${{ github.actor != 'dependabot[bot]' }}
|
||||||
- name: Run Gosec Security Scanner
|
- name: Run Gosec Security Scanner
|
||||||
if: ${{ github.actor != 'dependabot[bot]' }}
|
if: ${{ github.actor != 'dependabot[bot]' }}
|
||||||
uses: securego/gosec@v2.21.0
|
uses: securego/gosec@v2.21.4
|
||||||
with:
|
with:
|
||||||
# we let the report trigger content trigger a failure using the GitHub Security features.
|
# we let the report trigger content trigger a failure using the GitHub Security features.
|
||||||
args: '-no-fail -fmt sarif -out results.sarif ./...'
|
args: '-no-fail -fmt sarif -out results.sarif ./...'
|
||||||
|
11
.github/workflows/test.yml
vendored
11
.github/workflows/test.yml
vendored
@ -189,13 +189,22 @@ jobs:
|
|||||||
uses: actions/checkout@v4
|
uses: actions/checkout@v4
|
||||||
with:
|
with:
|
||||||
submodules: true
|
submodules: true
|
||||||
|
- name: Dependencies
|
||||||
|
run: |
|
||||||
|
# Install protoc
|
||||||
|
curl -L -s https://github.com/protocolbuffers/protobuf/releases/download/v26.1/protoc-26.1-linux-x86_64.zip -o protoc.zip && \
|
||||||
|
unzip -j -d /usr/local/bin protoc.zip bin/protoc && \
|
||||||
|
rm protoc.zip
|
||||||
|
go install google.golang.org/protobuf/cmd/protoc-gen-go@v1.34.2
|
||||||
|
go install google.golang.org/grpc/cmd/protoc-gen-go-grpc@1958fcbe2ca8bd93af633f11e97d44e567e945af
|
||||||
|
PATH="$PATH:$HOME/go/bin" make protogen-go
|
||||||
- name: Build images
|
- name: Build images
|
||||||
run: |
|
run: |
|
||||||
docker build --build-arg FFMPEG=true --build-arg IMAGE_TYPE=extras --build-arg EXTRA_BACKENDS=rerankers --build-arg MAKEFLAGS="--jobs=5 --output-sync=target" -t local-ai:tests -f Dockerfile .
|
docker build --build-arg FFMPEG=true --build-arg IMAGE_TYPE=extras --build-arg EXTRA_BACKENDS=rerankers --build-arg MAKEFLAGS="--jobs=5 --output-sync=target" -t local-ai:tests -f Dockerfile .
|
||||||
BASE_IMAGE=local-ai:tests DOCKER_AIO_IMAGE=local-ai-aio:test make docker-aio
|
BASE_IMAGE=local-ai:tests DOCKER_AIO_IMAGE=local-ai-aio:test make docker-aio
|
||||||
- name: Test
|
- name: Test
|
||||||
run: |
|
run: |
|
||||||
LOCALAI_MODELS_DIR=$PWD/models LOCALAI_IMAGE_TAG=test LOCALAI_IMAGE=local-ai-aio \
|
PATH="$PATH:$HOME/go/bin" LOCALAI_MODELS_DIR=$PWD/models LOCALAI_IMAGE_TAG=test LOCALAI_IMAGE=local-ai-aio \
|
||||||
make run-e2e-aio
|
make run-e2e-aio
|
||||||
- name: Setup tmate session if tests fail
|
- name: Setup tmate session if tests fail
|
||||||
if: ${{ failure() }}
|
if: ${{ failure() }}
|
||||||
|
13
Dockerfile
13
Dockerfile
@ -297,10 +297,10 @@ COPY .git .
|
|||||||
RUN make prepare
|
RUN make prepare
|
||||||
|
|
||||||
## Build the binary
|
## Build the binary
|
||||||
## If it's CUDA, we want to skip some of the llama-compat backends to save space
|
## If it's CUDA or hipblas, we want to skip some of the llama-compat backends to save space
|
||||||
## We only leave the most CPU-optimized variant and the fallback for the cublas build
|
## We only leave the most CPU-optimized variant and the fallback for the cublas/hipblas build
|
||||||
## (both will use CUDA for the actual computation)
|
## (both will use CUDA or hipblas for the actual computation)
|
||||||
RUN if [ "${BUILD_TYPE}" = "cublas" ]; then \
|
RUN if [ "${BUILD_TYPE}" = "cublas" ] || [ "${BUILD_TYPE}" = "hipblas" ]; then \
|
||||||
SKIP_GRPC_BACKEND="backend-assets/grpc/llama-cpp-avx backend-assets/grpc/llama-cpp-avx2" make build; \
|
SKIP_GRPC_BACKEND="backend-assets/grpc/llama-cpp-avx backend-assets/grpc/llama-cpp-avx2" make build; \
|
||||||
else \
|
else \
|
||||||
make build; \
|
make build; \
|
||||||
@ -338,9 +338,8 @@ RUN if [ "${FFMPEG}" = "true" ]; then \
|
|||||||
|
|
||||||
RUN apt-get update && \
|
RUN apt-get update && \
|
||||||
apt-get install -y --no-install-recommends \
|
apt-get install -y --no-install-recommends \
|
||||||
ssh less && \
|
ssh less wget
|
||||||
apt-get clean && \
|
# For the devcontainer, leave apt functional in case additional devtools are needed at runtime.
|
||||||
rm -rf /var/lib/apt/lists/*
|
|
||||||
|
|
||||||
RUN go install github.com/go-delve/delve/cmd/dlv@latest
|
RUN go install github.com/go-delve/delve/cmd/dlv@latest
|
||||||
|
|
||||||
|
9
Makefile
9
Makefile
@ -8,7 +8,7 @@ DETECT_LIBS?=true
|
|||||||
# llama.cpp versions
|
# llama.cpp versions
|
||||||
GOLLAMA_REPO?=https://github.com/go-skynet/go-llama.cpp
|
GOLLAMA_REPO?=https://github.com/go-skynet/go-llama.cpp
|
||||||
GOLLAMA_VERSION?=2b57a8ae43e4699d3dc5d1496a1ccd42922993be
|
GOLLAMA_VERSION?=2b57a8ae43e4699d3dc5d1496a1ccd42922993be
|
||||||
CPPLLAMA_VERSION?=6262d13e0b2da91f230129a93a996609a2f5a2f2
|
CPPLLAMA_VERSION?=6f1d9d71f4c568778a7637ff6582e6f6ba5fb9d3
|
||||||
|
|
||||||
# go-rwkv version
|
# go-rwkv version
|
||||||
RWKV_REPO?=https://github.com/donomii/go-rwkv.cpp
|
RWKV_REPO?=https://github.com/donomii/go-rwkv.cpp
|
||||||
@ -16,7 +16,7 @@ RWKV_VERSION?=661e7ae26d442f5cfebd2a0881b44e8c55949ec6
|
|||||||
|
|
||||||
# whisper.cpp version
|
# whisper.cpp version
|
||||||
WHISPER_REPO?=https://github.com/ggerganov/whisper.cpp
|
WHISPER_REPO?=https://github.com/ggerganov/whisper.cpp
|
||||||
WHISPER_CPP_VERSION?=049b3a0e53c8a8e4c4576c06a1a4fccf0063a73f
|
WHISPER_CPP_VERSION?=8feb375fbdf0277ad36958c218c6bf48fa0ba75a
|
||||||
|
|
||||||
# bert.cpp version
|
# bert.cpp version
|
||||||
BERT_REPO?=https://github.com/go-skynet/go-bert.cpp
|
BERT_REPO?=https://github.com/go-skynet/go-bert.cpp
|
||||||
@ -359,6 +359,9 @@ clean-tests:
|
|||||||
rm -rf test-dir
|
rm -rf test-dir
|
||||||
rm -rf core/http/backend-assets
|
rm -rf core/http/backend-assets
|
||||||
|
|
||||||
|
clean-dc: clean
|
||||||
|
cp -r /build/backend-assets /workspace/backend-assets
|
||||||
|
|
||||||
## Build:
|
## Build:
|
||||||
build: prepare backend-assets grpcs ## Build the project
|
build: prepare backend-assets grpcs ## Build the project
|
||||||
$(info ${GREEN}I local-ai build info:${RESET})
|
$(info ${GREEN}I local-ai build info:${RESET})
|
||||||
@ -465,7 +468,7 @@ run-e2e-image:
|
|||||||
ls -liah $(abspath ./tests/e2e-fixtures)
|
ls -liah $(abspath ./tests/e2e-fixtures)
|
||||||
docker run -p 5390:8080 -e MODELS_PATH=/models -e THREADS=1 -e DEBUG=true -d --rm -v $(TEST_DIR):/models --gpus all --name e2e-tests-$(RANDOM) localai-tests
|
docker run -p 5390:8080 -e MODELS_PATH=/models -e THREADS=1 -e DEBUG=true -d --rm -v $(TEST_DIR):/models --gpus all --name e2e-tests-$(RANDOM) localai-tests
|
||||||
|
|
||||||
run-e2e-aio:
|
run-e2e-aio: protogen-go
|
||||||
@echo 'Running e2e AIO tests'
|
@echo 'Running e2e AIO tests'
|
||||||
$(GOCMD) run github.com/onsi/ginkgo/v2/ginkgo --flake-attempts 5 -v -r ./tests/e2e-aio
|
$(GOCMD) run github.com/onsi/ginkgo/v2/ginkgo --flake-attempts 5 -v -r ./tests/e2e-aio
|
||||||
|
|
||||||
|
@ -2,7 +2,7 @@ backend: llama-cpp
|
|||||||
context_size: 4096
|
context_size: 4096
|
||||||
f16: true
|
f16: true
|
||||||
mmap: true
|
mmap: true
|
||||||
name: gpt-4-vision-preview
|
name: gpt-4o
|
||||||
|
|
||||||
roles:
|
roles:
|
||||||
user: "USER:"
|
user: "USER:"
|
||||||
|
@ -2,7 +2,7 @@ backend: llama-cpp
|
|||||||
context_size: 4096
|
context_size: 4096
|
||||||
f16: true
|
f16: true
|
||||||
mmap: true
|
mmap: true
|
||||||
name: gpt-4-vision-preview
|
name: gpt-4o
|
||||||
|
|
||||||
roles:
|
roles:
|
||||||
user: "USER:"
|
user: "USER:"
|
||||||
|
@ -2,7 +2,7 @@ backend: llama-cpp
|
|||||||
context_size: 4096
|
context_size: 4096
|
||||||
mmap: false
|
mmap: false
|
||||||
f16: false
|
f16: false
|
||||||
name: gpt-4-vision-preview
|
name: gpt-4o
|
||||||
|
|
||||||
roles:
|
roles:
|
||||||
user: "USER:"
|
user: "USER:"
|
||||||
|
@ -134,6 +134,9 @@ message PredictOptions {
|
|||||||
repeated string Images = 42;
|
repeated string Images = 42;
|
||||||
bool UseTokenizerTemplate = 43;
|
bool UseTokenizerTemplate = 43;
|
||||||
repeated Message Messages = 44;
|
repeated Message Messages = 44;
|
||||||
|
repeated string Videos = 45;
|
||||||
|
repeated string Audios = 46;
|
||||||
|
string CorrelationId = 47;
|
||||||
}
|
}
|
||||||
|
|
||||||
// The response message containing the result
|
// The response message containing the result
|
||||||
|
@ -2106,6 +2106,9 @@ json parse_options(bool streaming, const backend::PredictOptions* predict, llama
|
|||||||
data["ignore_eos"] = predict->ignoreeos();
|
data["ignore_eos"] = predict->ignoreeos();
|
||||||
data["embeddings"] = predict->embeddings();
|
data["embeddings"] = predict->embeddings();
|
||||||
|
|
||||||
|
// Add the correlationid to json data
|
||||||
|
data["correlation_id"] = predict->correlationid();
|
||||||
|
|
||||||
// for each image in the request, add the image data
|
// for each image in the request, add the image data
|
||||||
//
|
//
|
||||||
for (int i = 0; i < predict->images_size(); i++) {
|
for (int i = 0; i < predict->images_size(); i++) {
|
||||||
@ -2344,6 +2347,11 @@ public:
|
|||||||
int32_t tokens_evaluated = result.result_json.value("tokens_evaluated", 0);
|
int32_t tokens_evaluated = result.result_json.value("tokens_evaluated", 0);
|
||||||
reply.set_prompt_tokens(tokens_evaluated);
|
reply.set_prompt_tokens(tokens_evaluated);
|
||||||
|
|
||||||
|
// Log Request Correlation Id
|
||||||
|
LOG_VERBOSE("correlation:", {
|
||||||
|
{ "id", data["correlation_id"] }
|
||||||
|
});
|
||||||
|
|
||||||
// Send the reply
|
// Send the reply
|
||||||
writer->Write(reply);
|
writer->Write(reply);
|
||||||
|
|
||||||
@ -2367,6 +2375,12 @@ public:
|
|||||||
std::string completion_text;
|
std::string completion_text;
|
||||||
task_result result = llama.queue_results.recv(task_id);
|
task_result result = llama.queue_results.recv(task_id);
|
||||||
if (!result.error && result.stop) {
|
if (!result.error && result.stop) {
|
||||||
|
|
||||||
|
// Log Request Correlation Id
|
||||||
|
LOG_VERBOSE("correlation:", {
|
||||||
|
{ "id", data["correlation_id"] }
|
||||||
|
});
|
||||||
|
|
||||||
completion_text = result.result_json.value("content", "");
|
completion_text = result.result_json.value("content", "");
|
||||||
int32_t tokens_predicted = result.result_json.value("tokens_predicted", 0);
|
int32_t tokens_predicted = result.result_json.value("tokens_predicted", 0);
|
||||||
int32_t tokens_evaluated = result.result_json.value("tokens_evaluated", 0);
|
int32_t tokens_evaluated = result.result_json.value("tokens_evaluated", 0);
|
||||||
|
@ -2,4 +2,4 @@
|
|||||||
intel-extension-for-pytorch
|
intel-extension-for-pytorch
|
||||||
torch
|
torch
|
||||||
optimum[openvino]
|
optimum[openvino]
|
||||||
setuptools==72.1.0 # https://github.com/mudler/LocalAI/issues/2406
|
setuptools==75.1.0 # https://github.com/mudler/LocalAI/issues/2406
|
@ -1,6 +1,6 @@
|
|||||||
accelerate
|
accelerate
|
||||||
auto-gptq==0.7.1
|
auto-gptq==0.7.1
|
||||||
grpcio==1.66.1
|
grpcio==1.66.2
|
||||||
protobuf
|
protobuf
|
||||||
certifi
|
certifi
|
||||||
transformers
|
transformers
|
@ -3,6 +3,6 @@ intel-extension-for-pytorch
|
|||||||
torch
|
torch
|
||||||
torchaudio
|
torchaudio
|
||||||
optimum[openvino]
|
optimum[openvino]
|
||||||
setuptools==70.3.0 # https://github.com/mudler/LocalAI/issues/2406
|
setuptools==75.1.0 # https://github.com/mudler/LocalAI/issues/2406
|
||||||
transformers
|
transformers
|
||||||
accelerate
|
accelerate
|
@ -1,4 +1,4 @@
|
|||||||
bark==0.1.5
|
bark==0.1.5
|
||||||
grpcio==1.66.1
|
grpcio==1.66.2
|
||||||
protobuf
|
protobuf
|
||||||
certifi
|
certifi
|
@ -1,2 +1,2 @@
|
|||||||
grpcio==1.66.1
|
grpcio==1.66.2
|
||||||
protobuf
|
protobuf
|
@ -3,6 +3,6 @@ intel-extension-for-pytorch
|
|||||||
torch
|
torch
|
||||||
torchaudio
|
torchaudio
|
||||||
optimum[openvino]
|
optimum[openvino]
|
||||||
setuptools==72.1.0 # https://github.com/mudler/LocalAI/issues/2406
|
setuptools==75.1.0 # https://github.com/mudler/LocalAI/issues/2406
|
||||||
transformers
|
transformers
|
||||||
accelerate
|
accelerate
|
@ -1,4 +1,4 @@
|
|||||||
TTS==0.22.0
|
coqui-tts
|
||||||
grpcio==1.66.1
|
grpcio==1.66.2
|
||||||
protobuf
|
protobuf
|
||||||
certifi
|
certifi
|
@ -3,7 +3,7 @@ intel-extension-for-pytorch
|
|||||||
torch
|
torch
|
||||||
torchvision
|
torchvision
|
||||||
optimum[openvino]
|
optimum[openvino]
|
||||||
setuptools==70.3.0 # https://github.com/mudler/LocalAI/issues/2406
|
setuptools==75.1.0 # https://github.com/mudler/LocalAI/issues/2406
|
||||||
diffusers
|
diffusers
|
||||||
opencv-python
|
opencv-python
|
||||||
transformers
|
transformers
|
||||||
|
@ -1,5 +1,5 @@
|
|||||||
setuptools
|
setuptools
|
||||||
grpcio==1.66.1
|
grpcio==1.66.2
|
||||||
pillow
|
pillow
|
||||||
protobuf
|
protobuf
|
||||||
certifi
|
certifi
|
||||||
|
@ -1,4 +1,4 @@
|
|||||||
grpcio==1.66.1
|
grpcio==1.66.2
|
||||||
protobuf
|
protobuf
|
||||||
certifi
|
certifi
|
||||||
wheel
|
wheel
|
||||||
|
@ -1,3 +1,3 @@
|
|||||||
grpcio==1.66.1
|
grpcio==1.66.2
|
||||||
protobuf
|
protobuf
|
||||||
certifi
|
certifi
|
@ -2,7 +2,7 @@
|
|||||||
intel-extension-for-pytorch
|
intel-extension-for-pytorch
|
||||||
torch
|
torch
|
||||||
optimum[openvino]
|
optimum[openvino]
|
||||||
grpcio==1.66.1
|
grpcio==1.66.2
|
||||||
protobuf
|
protobuf
|
||||||
librosa==0.9.1
|
librosa==0.9.1
|
||||||
faster-whisper==1.0.3
|
faster-whisper==1.0.3
|
||||||
@ -18,6 +18,6 @@ python-dotenv
|
|||||||
pypinyin==0.50.0
|
pypinyin==0.50.0
|
||||||
cn2an==0.5.22
|
cn2an==0.5.22
|
||||||
jieba==0.42.1
|
jieba==0.42.1
|
||||||
gradio==4.38.1
|
gradio==4.44.1
|
||||||
langid==1.1.6
|
langid==1.1.6
|
||||||
git+https://github.com/myshell-ai/MeloTTS.git
|
git+https://github.com/myshell-ai/MeloTTS.git
|
||||||
|
@ -1,4 +1,4 @@
|
|||||||
grpcio==1.66.1
|
grpcio==1.66.2
|
||||||
protobuf
|
protobuf
|
||||||
librosa
|
librosa
|
||||||
faster-whisper
|
faster-whisper
|
||||||
|
@ -15,5 +15,12 @@ installRequirements
|
|||||||
|
|
||||||
# https://github.com/descriptinc/audiotools/issues/101
|
# https://github.com/descriptinc/audiotools/issues/101
|
||||||
# incompatible protobuf versions.
|
# incompatible protobuf versions.
|
||||||
PYDIR=$(ls ${MY_DIR}/venv/lib)
|
PYDIR=python3.10
|
||||||
curl -L https://raw.githubusercontent.com/protocolbuffers/protobuf/main/python/google/protobuf/internal/builder.py -o ${MY_DIR}/venv/lib/${PYDIR}/site-packages/google/protobuf/internal/builder.py
|
pyenv="${MY_DIR}/venv/lib/${PYDIR}/site-packages/google/protobuf/internal/"
|
||||||
|
|
||||||
|
if [ ! -d ${pyenv} ]; then
|
||||||
|
echo "(parler-tts/install.sh): Error: ${pyenv} does not exist"
|
||||||
|
exit 1
|
||||||
|
fi
|
||||||
|
|
||||||
|
curl -L https://raw.githubusercontent.com/protocolbuffers/protobuf/main/python/google/protobuf/internal/builder.py -o ${pyenv}/builder.py
|
||||||
|
@ -3,6 +3,6 @@ intel-extension-for-pytorch
|
|||||||
torch
|
torch
|
||||||
torchaudio
|
torchaudio
|
||||||
optimum[openvino]
|
optimum[openvino]
|
||||||
setuptools==72.1.0 # https://github.com/mudler/LocalAI/issues/2406
|
setuptools==75.1.0 # https://github.com/mudler/LocalAI/issues/2406
|
||||||
transformers
|
transformers
|
||||||
accelerate
|
accelerate
|
@ -1,4 +1,4 @@
|
|||||||
grpcio==1.66.1
|
grpcio==1.66.2
|
||||||
protobuf
|
protobuf
|
||||||
certifi
|
certifi
|
||||||
llvmlite==0.43.0
|
llvmlite==0.43.0
|
@ -5,4 +5,4 @@ accelerate
|
|||||||
torch
|
torch
|
||||||
rerankers[transformers]
|
rerankers[transformers]
|
||||||
optimum[openvino]
|
optimum[openvino]
|
||||||
setuptools==72.1.0 # https://github.com/mudler/LocalAI/issues/2406
|
setuptools==75.1.0 # https://github.com/mudler/LocalAI/issues/2406
|
@ -1,3 +1,3 @@
|
|||||||
grpcio==1.66.1
|
grpcio==1.66.2
|
||||||
protobuf
|
protobuf
|
||||||
certifi
|
certifi
|
@ -55,7 +55,7 @@ class BackendServicer(backend_pb2_grpc.BackendServicer):
|
|||||||
"""
|
"""
|
||||||
model_name = request.Model
|
model_name = request.Model
|
||||||
try:
|
try:
|
||||||
self.model = SentenceTransformer(model_name)
|
self.model = SentenceTransformer(model_name, trust_remote_code=request.TrustRemoteCode)
|
||||||
except Exception as err:
|
except Exception as err:
|
||||||
return backend_pb2.Result(success=False, message=f"Unexpected {err=}, {type(err)=}")
|
return backend_pb2.Result(success=False, message=f"Unexpected {err=}, {type(err)=}")
|
||||||
|
|
||||||
|
@ -2,5 +2,5 @@ torch
|
|||||||
accelerate
|
accelerate
|
||||||
transformers
|
transformers
|
||||||
bitsandbytes
|
bitsandbytes
|
||||||
sentence-transformers==3.0.1
|
sentence-transformers==3.1.1
|
||||||
transformers
|
transformers
|
@ -1,5 +1,5 @@
|
|||||||
--extra-index-url https://download.pytorch.org/whl/cu118
|
--extra-index-url https://download.pytorch.org/whl/cu118
|
||||||
torch
|
torch
|
||||||
accelerate
|
accelerate
|
||||||
sentence-transformers==3.0.1
|
sentence-transformers==3.1.1
|
||||||
transformers
|
transformers
|
@ -1,4 +1,4 @@
|
|||||||
torch
|
torch
|
||||||
accelerate
|
accelerate
|
||||||
sentence-transformers==3.0.1
|
sentence-transformers==3.1.1
|
||||||
transformers
|
transformers
|
@ -1,5 +1,5 @@
|
|||||||
--extra-index-url https://download.pytorch.org/whl/rocm6.0
|
--extra-index-url https://download.pytorch.org/whl/rocm6.0
|
||||||
torch
|
torch
|
||||||
accelerate
|
accelerate
|
||||||
sentence-transformers==3.0.1
|
sentence-transformers==3.1.1
|
||||||
transformers
|
transformers
|
@ -4,5 +4,5 @@ torch
|
|||||||
optimum[openvino]
|
optimum[openvino]
|
||||||
setuptools==69.5.1 # https://github.com/mudler/LocalAI/issues/2406
|
setuptools==69.5.1 # https://github.com/mudler/LocalAI/issues/2406
|
||||||
accelerate
|
accelerate
|
||||||
sentence-transformers==3.0.1
|
sentence-transformers==3.1.1
|
||||||
transformers
|
transformers
|
@ -1,3 +1,5 @@
|
|||||||
grpcio==1.66.1
|
grpcio==1.66.2
|
||||||
protobuf
|
protobuf
|
||||||
certifi
|
certifi
|
||||||
|
datasets
|
||||||
|
einops
|
@ -4,4 +4,4 @@ transformers
|
|||||||
accelerate
|
accelerate
|
||||||
torch
|
torch
|
||||||
optimum[openvino]
|
optimum[openvino]
|
||||||
setuptools==69.5.1 # https://github.com/mudler/LocalAI/issues/2406
|
setuptools==75.1.0 # https://github.com/mudler/LocalAI/issues/2406
|
@ -1,4 +1,4 @@
|
|||||||
grpcio==1.66.1
|
grpcio==1.66.2
|
||||||
protobuf
|
protobuf
|
||||||
scipy==1.14.0
|
scipy==1.14.0
|
||||||
certifi
|
certifi
|
@ -1,4 +1,4 @@
|
|||||||
grpcio==1.66.1
|
grpcio==1.66.2
|
||||||
protobuf
|
protobuf
|
||||||
certifi
|
certifi
|
||||||
setuptools==69.5.1 # https://github.com/mudler/LocalAI/issues/2406
|
setuptools==69.5.1 # https://github.com/mudler/LocalAI/issues/2406
|
@ -4,4 +4,4 @@ accelerate
|
|||||||
torch
|
torch
|
||||||
torchaudio
|
torchaudio
|
||||||
optimum[openvino]
|
optimum[openvino]
|
||||||
setuptools==72.1.0 # https://github.com/mudler/LocalAI/issues/2406
|
setuptools==75.1.0 # https://github.com/mudler/LocalAI/issues/2406
|
@ -1,3 +1,3 @@
|
|||||||
grpcio==1.66.1
|
grpcio==1.66.2
|
||||||
protobuf
|
protobuf
|
||||||
certifi
|
certifi
|
@ -4,4 +4,4 @@ accelerate
|
|||||||
torch
|
torch
|
||||||
transformers
|
transformers
|
||||||
optimum[openvino]
|
optimum[openvino]
|
||||||
setuptools==70.3.0 # https://github.com/mudler/LocalAI/issues/2406
|
setuptools==75.1.0 # https://github.com/mudler/LocalAI/issues/2406
|
@ -1,4 +1,4 @@
|
|||||||
grpcio==1.66.1
|
grpcio==1.66.2
|
||||||
protobuf
|
protobuf
|
||||||
certifi
|
certifi
|
||||||
setuptools
|
setuptools
|
@ -12,7 +12,7 @@ import (
|
|||||||
func ModelEmbedding(s string, tokens []int, loader *model.ModelLoader, backendConfig config.BackendConfig, appConfig *config.ApplicationConfig) (func() ([]float32, error), error) {
|
func ModelEmbedding(s string, tokens []int, loader *model.ModelLoader, backendConfig config.BackendConfig, appConfig *config.ApplicationConfig) (func() ([]float32, error), error) {
|
||||||
modelFile := backendConfig.Model
|
modelFile := backendConfig.Model
|
||||||
|
|
||||||
grpcOpts := gRPCModelOpts(backendConfig)
|
grpcOpts := GRPCModelOpts(backendConfig)
|
||||||
|
|
||||||
var inferenceModel interface{}
|
var inferenceModel interface{}
|
||||||
var err error
|
var err error
|
||||||
|
@ -12,7 +12,7 @@ func ImageGeneration(height, width, mode, step, seed int, positive_prompt, negat
|
|||||||
if *threads == 0 && appConfig.Threads != 0 {
|
if *threads == 0 && appConfig.Threads != 0 {
|
||||||
threads = &appConfig.Threads
|
threads = &appConfig.Threads
|
||||||
}
|
}
|
||||||
gRPCOpts := gRPCModelOpts(backendConfig)
|
gRPCOpts := GRPCModelOpts(backendConfig)
|
||||||
opts := modelOpts(backendConfig, appConfig, []model.Option{
|
opts := modelOpts(backendConfig, appConfig, []model.Option{
|
||||||
model.WithBackendString(backendConfig.Backend),
|
model.WithBackendString(backendConfig.Backend),
|
||||||
model.WithAssetDir(appConfig.AssetsDestination),
|
model.WithAssetDir(appConfig.AssetsDestination),
|
||||||
|
@ -31,13 +31,13 @@ type TokenUsage struct {
|
|||||||
Completion int
|
Completion int
|
||||||
}
|
}
|
||||||
|
|
||||||
func ModelInference(ctx context.Context, s string, messages []schema.Message, images []string, loader *model.ModelLoader, c config.BackendConfig, o *config.ApplicationConfig, tokenCallback func(string, TokenUsage) bool) (func() (LLMResponse, error), error) {
|
func ModelInference(ctx context.Context, s string, messages []schema.Message, images, videos, audios []string, loader *model.ModelLoader, c config.BackendConfig, o *config.ApplicationConfig, tokenCallback func(string, TokenUsage) bool) (func() (LLMResponse, error), error) {
|
||||||
modelFile := c.Model
|
modelFile := c.Model
|
||||||
threads := c.Threads
|
threads := c.Threads
|
||||||
if *threads == 0 && o.Threads != 0 {
|
if *threads == 0 && o.Threads != 0 {
|
||||||
threads = &o.Threads
|
threads = &o.Threads
|
||||||
}
|
}
|
||||||
grpcOpts := gRPCModelOpts(c)
|
grpcOpts := GRPCModelOpts(c)
|
||||||
|
|
||||||
var inferenceModel grpc.Backend
|
var inferenceModel grpc.Backend
|
||||||
var err error
|
var err error
|
||||||
@ -101,6 +101,8 @@ func ModelInference(ctx context.Context, s string, messages []schema.Message, im
|
|||||||
opts.Messages = protoMessages
|
opts.Messages = protoMessages
|
||||||
opts.UseTokenizerTemplate = c.TemplateConfig.UseTokenizerTemplate
|
opts.UseTokenizerTemplate = c.TemplateConfig.UseTokenizerTemplate
|
||||||
opts.Images = images
|
opts.Images = images
|
||||||
|
opts.Videos = videos
|
||||||
|
opts.Audios = audios
|
||||||
|
|
||||||
tokenUsage := TokenUsage{}
|
tokenUsage := TokenUsage{}
|
||||||
|
|
||||||
|
@ -44,7 +44,7 @@ func getSeed(c config.BackendConfig) int32 {
|
|||||||
return seed
|
return seed
|
||||||
}
|
}
|
||||||
|
|
||||||
func gRPCModelOpts(c config.BackendConfig) *pb.ModelOptions {
|
func GRPCModelOpts(c config.BackendConfig) *pb.ModelOptions {
|
||||||
b := 512
|
b := 512
|
||||||
if c.Batch != 0 {
|
if c.Batch != 0 {
|
||||||
b = c.Batch
|
b = c.Batch
|
||||||
|
@ -15,7 +15,7 @@ func Rerank(backend, modelFile string, request *proto.RerankRequest, loader *mod
|
|||||||
return nil, fmt.Errorf("backend is required")
|
return nil, fmt.Errorf("backend is required")
|
||||||
}
|
}
|
||||||
|
|
||||||
grpcOpts := gRPCModelOpts(backendConfig)
|
grpcOpts := GRPCModelOpts(backendConfig)
|
||||||
|
|
||||||
opts := modelOpts(config.BackendConfig{}, appConfig, []model.Option{
|
opts := modelOpts(config.BackendConfig{}, appConfig, []model.Option{
|
||||||
model.WithBackendString(bb),
|
model.WithBackendString(bb),
|
||||||
|
@ -29,7 +29,7 @@ func SoundGeneration(
|
|||||||
return "", nil, fmt.Errorf("backend is a required parameter")
|
return "", nil, fmt.Errorf("backend is a required parameter")
|
||||||
}
|
}
|
||||||
|
|
||||||
grpcOpts := gRPCModelOpts(backendConfig)
|
grpcOpts := GRPCModelOpts(backendConfig)
|
||||||
opts := modelOpts(config.BackendConfig{}, appConfig, []model.Option{
|
opts := modelOpts(config.BackendConfig{}, appConfig, []model.Option{
|
||||||
model.WithBackendString(backend),
|
model.WithBackendString(backend),
|
||||||
model.WithModel(modelFile),
|
model.WithModel(modelFile),
|
||||||
|
@ -28,7 +28,7 @@ func ModelTTS(
|
|||||||
bb = model.PiperBackend
|
bb = model.PiperBackend
|
||||||
}
|
}
|
||||||
|
|
||||||
grpcOpts := gRPCModelOpts(backendConfig)
|
grpcOpts := GRPCModelOpts(backendConfig)
|
||||||
|
|
||||||
opts := modelOpts(config.BackendConfig{}, appConfig, []model.Option{
|
opts := modelOpts(config.BackendConfig{}, appConfig, []model.Option{
|
||||||
model.WithBackendString(bb),
|
model.WithBackendString(bb),
|
||||||
|
@ -51,6 +51,9 @@ type RunCMD struct {
|
|||||||
DisableWebUI bool `env:"LOCALAI_DISABLE_WEBUI,DISABLE_WEBUI" default:"false" help:"Disable webui" group:"api"`
|
DisableWebUI bool `env:"LOCALAI_DISABLE_WEBUI,DISABLE_WEBUI" default:"false" help:"Disable webui" group:"api"`
|
||||||
DisablePredownloadScan bool `env:"LOCALAI_DISABLE_PREDOWNLOAD_SCAN" help:"If true, disables the best-effort security scanner before downloading any files." group:"hardening" default:"false"`
|
DisablePredownloadScan bool `env:"LOCALAI_DISABLE_PREDOWNLOAD_SCAN" help:"If true, disables the best-effort security scanner before downloading any files." group:"hardening" default:"false"`
|
||||||
OpaqueErrors bool `env:"LOCALAI_OPAQUE_ERRORS" default:"false" help:"If true, all error responses are replaced with blank 500 errors. This is intended only for hardening against information leaks and is normally not recommended." group:"hardening"`
|
OpaqueErrors bool `env:"LOCALAI_OPAQUE_ERRORS" default:"false" help:"If true, all error responses are replaced with blank 500 errors. This is intended only for hardening against information leaks and is normally not recommended." group:"hardening"`
|
||||||
|
UseSubtleKeyComparison bool `env:"LOCALAI_SUBTLE_KEY_COMPARISON" default:"false" help:"If true, API Key validation comparisons will be performed using constant-time comparisons rather than simple equality. This trades off performance on each request for resiliancy against timing attacks." group:"hardening"`
|
||||||
|
DisableApiKeyRequirementForHttpGet bool `env:"LOCALAI_DISABLE_API_KEY_REQUIREMENT_FOR_HTTP_GET" default:"false" help:"If true, a valid API key is not required to issue GET requests to portions of the web ui. This should only be enabled in secure testing environments" group:"hardening"`
|
||||||
|
HttpGetExemptedEndpoints []string `env:"LOCALAI_HTTP_GET_EXEMPTED_ENDPOINTS" default:"^/$,^/browse/?$,^/talk/?$,^/p2p/?$,^/chat/?$,^/text2image/?$,^/tts/?$,^/static/.*$,^/swagger.*$" help:"If LOCALAI_DISABLE_API_KEY_REQUIREMENT_FOR_HTTP_GET is overriden to true, this is the list of endpoints to exempt. Only adjust this in case of a security incident or as a result of a personal security posture review" group:"hardening"`
|
||||||
Peer2Peer bool `env:"LOCALAI_P2P,P2P" name:"p2p" default:"false" help:"Enable P2P mode" group:"p2p"`
|
Peer2Peer bool `env:"LOCALAI_P2P,P2P" name:"p2p" default:"false" help:"Enable P2P mode" group:"p2p"`
|
||||||
Peer2PeerDHTInterval int `env:"LOCALAI_P2P_DHT_INTERVAL,P2P_DHT_INTERVAL" default:"360" name:"p2p-dht-interval" help:"Interval for DHT refresh (used during token generation)" group:"p2p"`
|
Peer2PeerDHTInterval int `env:"LOCALAI_P2P_DHT_INTERVAL,P2P_DHT_INTERVAL" default:"360" name:"p2p-dht-interval" help:"Interval for DHT refresh (used during token generation)" group:"p2p"`
|
||||||
Peer2PeerOTPInterval int `env:"LOCALAI_P2P_OTP_INTERVAL,P2P_OTP_INTERVAL" default:"9000" name:"p2p-otp-interval" help:"Interval for OTP refresh (used during token generation)" group:"p2p"`
|
Peer2PeerOTPInterval int `env:"LOCALAI_P2P_OTP_INTERVAL,P2P_OTP_INTERVAL" default:"9000" name:"p2p-otp-interval" help:"Interval for OTP refresh (used during token generation)" group:"p2p"`
|
||||||
@ -66,6 +69,7 @@ type RunCMD struct {
|
|||||||
WatchdogBusyTimeout string `env:"LOCALAI_WATCHDOG_BUSY_TIMEOUT,WATCHDOG_BUSY_TIMEOUT" default:"5m" help:"Threshold beyond which a busy backend should be stopped" group:"backends"`
|
WatchdogBusyTimeout string `env:"LOCALAI_WATCHDOG_BUSY_TIMEOUT,WATCHDOG_BUSY_TIMEOUT" default:"5m" help:"Threshold beyond which a busy backend should be stopped" group:"backends"`
|
||||||
Federated bool `env:"LOCALAI_FEDERATED,FEDERATED" help:"Enable federated instance" group:"federated"`
|
Federated bool `env:"LOCALAI_FEDERATED,FEDERATED" help:"Enable federated instance" group:"federated"`
|
||||||
DisableGalleryEndpoint bool `env:"LOCALAI_DISABLE_GALLERY_ENDPOINT,DISABLE_GALLERY_ENDPOINT" help:"Disable the gallery endpoints" group:"api"`
|
DisableGalleryEndpoint bool `env:"LOCALAI_DISABLE_GALLERY_ENDPOINT,DISABLE_GALLERY_ENDPOINT" help:"Disable the gallery endpoints" group:"api"`
|
||||||
|
LoadToMemory []string `env:"LOCALAI_LOAD_TO_MEMORY,LOAD_TO_MEMORY" help:"A list of models to load into memory at startup" group:"models"`
|
||||||
}
|
}
|
||||||
|
|
||||||
func (r *RunCMD) Run(ctx *cliContext.Context) error {
|
func (r *RunCMD) Run(ctx *cliContext.Context) error {
|
||||||
@ -97,7 +101,11 @@ func (r *RunCMD) Run(ctx *cliContext.Context) error {
|
|||||||
config.WithModelsURL(append(r.Models, r.ModelArgs...)...),
|
config.WithModelsURL(append(r.Models, r.ModelArgs...)...),
|
||||||
config.WithOpaqueErrors(r.OpaqueErrors),
|
config.WithOpaqueErrors(r.OpaqueErrors),
|
||||||
config.WithEnforcedPredownloadScans(!r.DisablePredownloadScan),
|
config.WithEnforcedPredownloadScans(!r.DisablePredownloadScan),
|
||||||
|
config.WithSubtleKeyComparison(r.UseSubtleKeyComparison),
|
||||||
|
config.WithDisableApiKeyRequirementForHttpGet(r.DisableApiKeyRequirementForHttpGet),
|
||||||
|
config.WithHttpGetExemptedEndpoints(r.HttpGetExemptedEndpoints),
|
||||||
config.WithP2PNetworkID(r.Peer2PeerNetworkID),
|
config.WithP2PNetworkID(r.Peer2PeerNetworkID),
|
||||||
|
config.WithLoadToMemory(r.LoadToMemory),
|
||||||
}
|
}
|
||||||
|
|
||||||
token := ""
|
token := ""
|
||||||
|
@ -4,6 +4,7 @@ import (
|
|||||||
"context"
|
"context"
|
||||||
"embed"
|
"embed"
|
||||||
"encoding/json"
|
"encoding/json"
|
||||||
|
"regexp"
|
||||||
"time"
|
"time"
|
||||||
|
|
||||||
"github.com/mudler/LocalAI/pkg/xsysinfo"
|
"github.com/mudler/LocalAI/pkg/xsysinfo"
|
||||||
@ -16,7 +17,6 @@ type ApplicationConfig struct {
|
|||||||
ModelPath string
|
ModelPath string
|
||||||
LibPath string
|
LibPath string
|
||||||
UploadLimitMB, Threads, ContextSize int
|
UploadLimitMB, Threads, ContextSize int
|
||||||
DisableWebUI bool
|
|
||||||
F16 bool
|
F16 bool
|
||||||
Debug bool
|
Debug bool
|
||||||
ImageDir string
|
ImageDir string
|
||||||
@ -31,11 +31,18 @@ type ApplicationConfig struct {
|
|||||||
PreloadModelsFromPath string
|
PreloadModelsFromPath string
|
||||||
CORSAllowOrigins string
|
CORSAllowOrigins string
|
||||||
ApiKeys []string
|
ApiKeys []string
|
||||||
EnforcePredownloadScans bool
|
|
||||||
OpaqueErrors bool
|
|
||||||
P2PToken string
|
P2PToken string
|
||||||
P2PNetworkID string
|
P2PNetworkID string
|
||||||
|
|
||||||
|
DisableWebUI bool
|
||||||
|
EnforcePredownloadScans bool
|
||||||
|
OpaqueErrors bool
|
||||||
|
UseSubtleKeyComparison bool
|
||||||
|
DisableApiKeyRequirementForHttpGet bool
|
||||||
|
HttpGetExemptedEndpoints []*regexp.Regexp
|
||||||
|
DisableGalleryEndpoint bool
|
||||||
|
LoadToMemory []string
|
||||||
|
|
||||||
ModelLibraryURL string
|
ModelLibraryURL string
|
||||||
|
|
||||||
Galleries []Gallery
|
Galleries []Gallery
|
||||||
@ -57,8 +64,6 @@ type ApplicationConfig struct {
|
|||||||
ModelsURL []string
|
ModelsURL []string
|
||||||
|
|
||||||
WatchDogBusyTimeout, WatchDogIdleTimeout time.Duration
|
WatchDogBusyTimeout, WatchDogIdleTimeout time.Duration
|
||||||
|
|
||||||
DisableGalleryEndpoint bool
|
|
||||||
}
|
}
|
||||||
|
|
||||||
type AppOption func(*ApplicationConfig)
|
type AppOption func(*ApplicationConfig)
|
||||||
@ -327,6 +332,38 @@ func WithOpaqueErrors(opaque bool) AppOption {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func WithLoadToMemory(models []string) AppOption {
|
||||||
|
return func(o *ApplicationConfig) {
|
||||||
|
o.LoadToMemory = models
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func WithSubtleKeyComparison(subtle bool) AppOption {
|
||||||
|
return func(o *ApplicationConfig) {
|
||||||
|
o.UseSubtleKeyComparison = subtle
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func WithDisableApiKeyRequirementForHttpGet(required bool) AppOption {
|
||||||
|
return func(o *ApplicationConfig) {
|
||||||
|
o.DisableApiKeyRequirementForHttpGet = required
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func WithHttpGetExemptedEndpoints(endpoints []string) AppOption {
|
||||||
|
return func(o *ApplicationConfig) {
|
||||||
|
o.HttpGetExemptedEndpoints = []*regexp.Regexp{}
|
||||||
|
for _, epr := range endpoints {
|
||||||
|
r, err := regexp.Compile(epr)
|
||||||
|
if err == nil && r != nil {
|
||||||
|
o.HttpGetExemptedEndpoints = append(o.HttpGetExemptedEndpoints, r)
|
||||||
|
} else {
|
||||||
|
log.Warn().Err(err).Str("regex", epr).Msg("Error while compiling HTTP Get Exemption regex, skipping this entry.")
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
// ToConfigLoaderOptions returns a slice of ConfigLoader Option.
|
// ToConfigLoaderOptions returns a slice of ConfigLoader Option.
|
||||||
// Some options defined at the application level are going to be passed as defaults for
|
// Some options defined at the application level are going to be passed as defaults for
|
||||||
// all the configuration for the models.
|
// all the configuration for the models.
|
||||||
|
@ -132,7 +132,7 @@ func AvailableGalleryModels(galleries []config.Gallery, basePath string) ([]*Gal
|
|||||||
func findGalleryURLFromReferenceURL(url string, basePath string) (string, error) {
|
func findGalleryURLFromReferenceURL(url string, basePath string) (string, error) {
|
||||||
var refFile string
|
var refFile string
|
||||||
uri := downloader.URI(url)
|
uri := downloader.URI(url)
|
||||||
err := uri.DownloadAndUnmarshal(basePath, func(url string, d []byte) error {
|
err := uri.DownloadWithCallback(basePath, func(url string, d []byte) error {
|
||||||
refFile = string(d)
|
refFile = string(d)
|
||||||
if len(refFile) == 0 {
|
if len(refFile) == 0 {
|
||||||
return fmt.Errorf("invalid reference file at url %s: %s", url, d)
|
return fmt.Errorf("invalid reference file at url %s: %s", url, d)
|
||||||
@ -156,7 +156,7 @@ func getGalleryModels(gallery config.Gallery, basePath string) ([]*GalleryModel,
|
|||||||
}
|
}
|
||||||
uri := downloader.URI(gallery.URL)
|
uri := downloader.URI(gallery.URL)
|
||||||
|
|
||||||
err := uri.DownloadAndUnmarshal(basePath, func(url string, d []byte) error {
|
err := uri.DownloadWithCallback(basePath, func(url string, d []byte) error {
|
||||||
return yaml.Unmarshal(d, &models)
|
return yaml.Unmarshal(d, &models)
|
||||||
})
|
})
|
||||||
if err != nil {
|
if err != nil {
|
||||||
|
@ -69,7 +69,7 @@ type PromptTemplate struct {
|
|||||||
func GetGalleryConfigFromURL(url string, basePath string) (Config, error) {
|
func GetGalleryConfigFromURL(url string, basePath string) (Config, error) {
|
||||||
var config Config
|
var config Config
|
||||||
uri := downloader.URI(url)
|
uri := downloader.URI(url)
|
||||||
err := uri.DownloadAndUnmarshal(basePath, func(url string, d []byte) error {
|
err := uri.DownloadWithCallback(basePath, func(url string, d []byte) error {
|
||||||
return yaml.Unmarshal(d, &config)
|
return yaml.Unmarshal(d, &config)
|
||||||
})
|
})
|
||||||
if err != nil {
|
if err != nil {
|
||||||
|
@ -3,13 +3,15 @@ package http
|
|||||||
import (
|
import (
|
||||||
"embed"
|
"embed"
|
||||||
"errors"
|
"errors"
|
||||||
|
"fmt"
|
||||||
"net/http"
|
"net/http"
|
||||||
"strings"
|
|
||||||
|
|
||||||
|
"github.com/dave-gray101/v2keyauth"
|
||||||
"github.com/mudler/LocalAI/pkg/utils"
|
"github.com/mudler/LocalAI/pkg/utils"
|
||||||
|
|
||||||
"github.com/mudler/LocalAI/core/http/endpoints/localai"
|
"github.com/mudler/LocalAI/core/http/endpoints/localai"
|
||||||
"github.com/mudler/LocalAI/core/http/endpoints/openai"
|
"github.com/mudler/LocalAI/core/http/endpoints/openai"
|
||||||
|
"github.com/mudler/LocalAI/core/http/middleware"
|
||||||
"github.com/mudler/LocalAI/core/http/routes"
|
"github.com/mudler/LocalAI/core/http/routes"
|
||||||
|
|
||||||
"github.com/mudler/LocalAI/core/config"
|
"github.com/mudler/LocalAI/core/config"
|
||||||
@ -29,24 +31,6 @@ import (
|
|||||||
"github.com/rs/zerolog/log"
|
"github.com/rs/zerolog/log"
|
||||||
)
|
)
|
||||||
|
|
||||||
func readAuthHeader(c *fiber.Ctx) string {
|
|
||||||
authHeader := c.Get("Authorization")
|
|
||||||
|
|
||||||
// elevenlabs
|
|
||||||
xApiKey := c.Get("xi-api-key")
|
|
||||||
if xApiKey != "" {
|
|
||||||
authHeader = "Bearer " + xApiKey
|
|
||||||
}
|
|
||||||
|
|
||||||
// anthropic
|
|
||||||
xApiKey = c.Get("x-api-key")
|
|
||||||
if xApiKey != "" {
|
|
||||||
authHeader = "Bearer " + xApiKey
|
|
||||||
}
|
|
||||||
|
|
||||||
return authHeader
|
|
||||||
}
|
|
||||||
|
|
||||||
// Embed a directory
|
// Embed a directory
|
||||||
//
|
//
|
||||||
//go:embed static/*
|
//go:embed static/*
|
||||||
@ -137,36 +121,16 @@ func App(cl *config.BackendConfigLoader, ml *model.ModelLoader, appConfig *confi
|
|||||||
})
|
})
|
||||||
}
|
}
|
||||||
|
|
||||||
// Auth middleware checking if API key is valid. If no API key is set, no auth is required.
|
// Health Checks should always be exempt from auth, so register these first
|
||||||
auth := func(c *fiber.Ctx) error {
|
routes.HealthRoutes(app)
|
||||||
if len(appConfig.ApiKeys) == 0 {
|
|
||||||
return c.Next()
|
kaConfig, err := middleware.GetKeyAuthConfig(appConfig)
|
||||||
|
if err != nil || kaConfig == nil {
|
||||||
|
return nil, fmt.Errorf("failed to create key auth config: %w", err)
|
||||||
}
|
}
|
||||||
|
|
||||||
if len(appConfig.ApiKeys) == 0 {
|
// Auth is applied to _all_ endpoints. No exceptions. Filtering out endpoints to bypass is the role of the Filter property of the KeyAuth Configuration
|
||||||
return c.Next()
|
app.Use(v2keyauth.New(*kaConfig))
|
||||||
}
|
|
||||||
|
|
||||||
authHeader := readAuthHeader(c)
|
|
||||||
if authHeader == "" {
|
|
||||||
return c.Status(fiber.StatusUnauthorized).JSON(fiber.Map{"message": "Authorization header missing"})
|
|
||||||
}
|
|
||||||
|
|
||||||
// If it's a bearer token
|
|
||||||
authHeaderParts := strings.Split(authHeader, " ")
|
|
||||||
if len(authHeaderParts) != 2 || authHeaderParts[0] != "Bearer" {
|
|
||||||
return c.Status(fiber.StatusUnauthorized).JSON(fiber.Map{"message": "Invalid Authorization header format"})
|
|
||||||
}
|
|
||||||
|
|
||||||
apiKey := authHeaderParts[1]
|
|
||||||
for _, key := range appConfig.ApiKeys {
|
|
||||||
if apiKey == key {
|
|
||||||
return c.Next()
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
return c.Status(fiber.StatusUnauthorized).JSON(fiber.Map{"message": "Invalid API key"})
|
|
||||||
}
|
|
||||||
|
|
||||||
if appConfig.CORS {
|
if appConfig.CORS {
|
||||||
var c func(ctx *fiber.Ctx) error
|
var c func(ctx *fiber.Ctx) error
|
||||||
@ -192,13 +156,13 @@ func App(cl *config.BackendConfigLoader, ml *model.ModelLoader, appConfig *confi
|
|||||||
galleryService := services.NewGalleryService(appConfig)
|
galleryService := services.NewGalleryService(appConfig)
|
||||||
galleryService.Start(appConfig.Context, cl)
|
galleryService.Start(appConfig.Context, cl)
|
||||||
|
|
||||||
routes.RegisterElevenLabsRoutes(app, cl, ml, appConfig, auth)
|
routes.RegisterElevenLabsRoutes(app, cl, ml, appConfig)
|
||||||
routes.RegisterLocalAIRoutes(app, cl, ml, appConfig, galleryService, auth)
|
routes.RegisterLocalAIRoutes(app, cl, ml, appConfig, galleryService)
|
||||||
routes.RegisterOpenAIRoutes(app, cl, ml, appConfig, auth)
|
routes.RegisterOpenAIRoutes(app, cl, ml, appConfig)
|
||||||
if !appConfig.DisableWebUI {
|
if !appConfig.DisableWebUI {
|
||||||
routes.RegisterUIRoutes(app, cl, ml, appConfig, galleryService, auth)
|
routes.RegisterUIRoutes(app, cl, ml, appConfig, galleryService)
|
||||||
}
|
}
|
||||||
routes.RegisterJINARoutes(app, cl, ml, appConfig, auth)
|
routes.RegisterJINARoutes(app, cl, ml, appConfig)
|
||||||
|
|
||||||
httpFS := http.FS(embedDirStatic)
|
httpFS := http.FS(embedDirStatic)
|
||||||
|
|
||||||
|
@ -31,6 +31,9 @@ import (
|
|||||||
"github.com/sashabaranov/go-openai/jsonschema"
|
"github.com/sashabaranov/go-openai/jsonschema"
|
||||||
)
|
)
|
||||||
|
|
||||||
|
const apiKey = "joshua"
|
||||||
|
const bearerKey = "Bearer " + apiKey
|
||||||
|
|
||||||
const testPrompt = `### System:
|
const testPrompt = `### System:
|
||||||
You are an AI assistant that follows instruction extremely well. Help as much as you can.
|
You are an AI assistant that follows instruction extremely well. Help as much as you can.
|
||||||
|
|
||||||
@ -50,11 +53,19 @@ type modelApplyRequest struct {
|
|||||||
|
|
||||||
func getModelStatus(url string) (response map[string]interface{}) {
|
func getModelStatus(url string) (response map[string]interface{}) {
|
||||||
// Create the HTTP request
|
// Create the HTTP request
|
||||||
resp, err := http.Get(url)
|
req, err := http.NewRequest("GET", url, nil)
|
||||||
|
req.Header.Set("Content-Type", "application/json")
|
||||||
|
req.Header.Set("Authorization", bearerKey)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
fmt.Println("Error creating request:", err)
|
fmt.Println("Error creating request:", err)
|
||||||
return
|
return
|
||||||
}
|
}
|
||||||
|
client := &http.Client{}
|
||||||
|
resp, err := client.Do(req)
|
||||||
|
if err != nil {
|
||||||
|
fmt.Println("Error sending request:", err)
|
||||||
|
return
|
||||||
|
}
|
||||||
defer resp.Body.Close()
|
defer resp.Body.Close()
|
||||||
|
|
||||||
body, err := io.ReadAll(resp.Body)
|
body, err := io.ReadAll(resp.Body)
|
||||||
@ -72,14 +83,15 @@ func getModelStatus(url string) (response map[string]interface{}) {
|
|||||||
return
|
return
|
||||||
}
|
}
|
||||||
|
|
||||||
func getModels(url string) (response []gallery.GalleryModel) {
|
func getModels(url string) ([]gallery.GalleryModel, error) {
|
||||||
|
response := []gallery.GalleryModel{}
|
||||||
uri := downloader.URI(url)
|
uri := downloader.URI(url)
|
||||||
// TODO: No tests currently seem to exercise file:// urls. Fix?
|
// TODO: No tests currently seem to exercise file:// urls. Fix?
|
||||||
uri.DownloadAndUnmarshal("", func(url string, i []byte) error {
|
err := uri.DownloadWithAuthorizationAndCallback("", bearerKey, func(url string, i []byte) error {
|
||||||
// Unmarshal YAML data into a struct
|
// Unmarshal YAML data into a struct
|
||||||
return json.Unmarshal(i, &response)
|
return json.Unmarshal(i, &response)
|
||||||
})
|
})
|
||||||
return
|
return response, err
|
||||||
}
|
}
|
||||||
|
|
||||||
func postModelApplyRequest(url string, request modelApplyRequest) (response map[string]interface{}) {
|
func postModelApplyRequest(url string, request modelApplyRequest) (response map[string]interface{}) {
|
||||||
@ -101,6 +113,7 @@ func postModelApplyRequest(url string, request modelApplyRequest) (response map[
|
|||||||
return
|
return
|
||||||
}
|
}
|
||||||
req.Header.Set("Content-Type", "application/json")
|
req.Header.Set("Content-Type", "application/json")
|
||||||
|
req.Header.Set("Authorization", bearerKey)
|
||||||
|
|
||||||
// Make the request
|
// Make the request
|
||||||
client := &http.Client{}
|
client := &http.Client{}
|
||||||
@ -140,6 +153,7 @@ func postRequestJSON[B any](url string, bodyJson *B) error {
|
|||||||
}
|
}
|
||||||
|
|
||||||
req.Header.Set("Content-Type", "application/json")
|
req.Header.Set("Content-Type", "application/json")
|
||||||
|
req.Header.Set("Authorization", bearerKey)
|
||||||
|
|
||||||
client := &http.Client{}
|
client := &http.Client{}
|
||||||
resp, err := client.Do(req)
|
resp, err := client.Do(req)
|
||||||
@ -175,6 +189,7 @@ func postRequestResponseJSON[B1 any, B2 any](url string, reqJson *B1, respJson *
|
|||||||
}
|
}
|
||||||
|
|
||||||
req.Header.Set("Content-Type", "application/json")
|
req.Header.Set("Content-Type", "application/json")
|
||||||
|
req.Header.Set("Authorization", bearerKey)
|
||||||
|
|
||||||
client := &http.Client{}
|
client := &http.Client{}
|
||||||
resp, err := client.Do(req)
|
resp, err := client.Do(req)
|
||||||
@ -195,6 +210,35 @@ func postRequestResponseJSON[B1 any, B2 any](url string, reqJson *B1, respJson *
|
|||||||
return json.Unmarshal(body, respJson)
|
return json.Unmarshal(body, respJson)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func postInvalidRequest(url string) (error, int) {
|
||||||
|
|
||||||
|
req, err := http.NewRequest("POST", url, bytes.NewBufferString("invalid request"))
|
||||||
|
if err != nil {
|
||||||
|
return err, -1
|
||||||
|
}
|
||||||
|
|
||||||
|
req.Header.Set("Content-Type", "application/json")
|
||||||
|
|
||||||
|
client := &http.Client{}
|
||||||
|
resp, err := client.Do(req)
|
||||||
|
if err != nil {
|
||||||
|
return err, -1
|
||||||
|
}
|
||||||
|
|
||||||
|
defer resp.Body.Close()
|
||||||
|
|
||||||
|
body, err := io.ReadAll(resp.Body)
|
||||||
|
if err != nil {
|
||||||
|
return err, -1
|
||||||
|
}
|
||||||
|
|
||||||
|
if resp.StatusCode < 200 || resp.StatusCode >= 400 {
|
||||||
|
return fmt.Errorf("unexpected status code: %d, body: %s", resp.StatusCode, string(body)), resp.StatusCode
|
||||||
|
}
|
||||||
|
|
||||||
|
return nil, resp.StatusCode
|
||||||
|
}
|
||||||
|
|
||||||
//go:embed backend-assets/*
|
//go:embed backend-assets/*
|
||||||
var backendAssets embed.FS
|
var backendAssets embed.FS
|
||||||
|
|
||||||
@ -260,6 +304,7 @@ var _ = Describe("API test", func() {
|
|||||||
config.WithContext(c),
|
config.WithContext(c),
|
||||||
config.WithGalleries(galleries),
|
config.WithGalleries(galleries),
|
||||||
config.WithModelPath(modelDir),
|
config.WithModelPath(modelDir),
|
||||||
|
config.WithApiKeys([]string{apiKey}),
|
||||||
config.WithBackendAssets(backendAssets),
|
config.WithBackendAssets(backendAssets),
|
||||||
config.WithBackendAssetsOutput(backendAssetsDir))...)
|
config.WithBackendAssetsOutput(backendAssetsDir))...)
|
||||||
Expect(err).ToNot(HaveOccurred())
|
Expect(err).ToNot(HaveOccurred())
|
||||||
@ -269,7 +314,7 @@ var _ = Describe("API test", func() {
|
|||||||
|
|
||||||
go app.Listen("127.0.0.1:9090")
|
go app.Listen("127.0.0.1:9090")
|
||||||
|
|
||||||
defaultConfig := openai.DefaultConfig("")
|
defaultConfig := openai.DefaultConfig(apiKey)
|
||||||
defaultConfig.BaseURL = "http://127.0.0.1:9090/v1"
|
defaultConfig.BaseURL = "http://127.0.0.1:9090/v1"
|
||||||
|
|
||||||
client2 = openaigo.NewClient("")
|
client2 = openaigo.NewClient("")
|
||||||
@ -295,10 +340,19 @@ var _ = Describe("API test", func() {
|
|||||||
Expect(err).To(HaveOccurred())
|
Expect(err).To(HaveOccurred())
|
||||||
})
|
})
|
||||||
|
|
||||||
|
Context("Auth Tests", func() {
|
||||||
|
It("Should fail if the api key is missing", func() {
|
||||||
|
err, sc := postInvalidRequest("http://127.0.0.1:9090/models/available")
|
||||||
|
Expect(err).ToNot(BeNil())
|
||||||
|
Expect(sc).To(Equal(403))
|
||||||
|
})
|
||||||
|
})
|
||||||
|
|
||||||
Context("Applying models", func() {
|
Context("Applying models", func() {
|
||||||
|
|
||||||
It("applies models from a gallery", func() {
|
It("applies models from a gallery", func() {
|
||||||
models := getModels("http://127.0.0.1:9090/models/available")
|
models, err := getModels("http://127.0.0.1:9090/models/available")
|
||||||
|
Expect(err).To(BeNil())
|
||||||
Expect(len(models)).To(Equal(2), fmt.Sprint(models))
|
Expect(len(models)).To(Equal(2), fmt.Sprint(models))
|
||||||
Expect(models[0].Installed).To(BeFalse(), fmt.Sprint(models))
|
Expect(models[0].Installed).To(BeFalse(), fmt.Sprint(models))
|
||||||
Expect(models[1].Installed).To(BeFalse(), fmt.Sprint(models))
|
Expect(models[1].Installed).To(BeFalse(), fmt.Sprint(models))
|
||||||
@ -331,7 +385,8 @@ var _ = Describe("API test", func() {
|
|||||||
Expect(content["backend"]).To(Equal("bert-embeddings"))
|
Expect(content["backend"]).To(Equal("bert-embeddings"))
|
||||||
Expect(content["foo"]).To(Equal("bar"))
|
Expect(content["foo"]).To(Equal("bar"))
|
||||||
|
|
||||||
models = getModels("http://127.0.0.1:9090/models/available")
|
models, err = getModels("http://127.0.0.1:9090/models/available")
|
||||||
|
Expect(err).To(BeNil())
|
||||||
Expect(len(models)).To(Equal(2), fmt.Sprint(models))
|
Expect(len(models)).To(Equal(2), fmt.Sprint(models))
|
||||||
Expect(models[0].Name).To(Or(Equal("bert"), Equal("bert2")))
|
Expect(models[0].Name).To(Or(Equal("bert"), Equal("bert2")))
|
||||||
Expect(models[1].Name).To(Or(Equal("bert"), Equal("bert2")))
|
Expect(models[1].Name).To(Or(Equal("bert"), Equal("bert2")))
|
||||||
|
@ -17,12 +17,14 @@ func SystemInformations(ml *model.ModelLoader, appConfig *config.ApplicationConf
|
|||||||
if err != nil {
|
if err != nil {
|
||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
|
loadedModels := ml.ListModels()
|
||||||
for b := range appConfig.ExternalGRPCBackends {
|
for b := range appConfig.ExternalGRPCBackends {
|
||||||
availableBackends = append(availableBackends, b)
|
availableBackends = append(availableBackends, b)
|
||||||
}
|
}
|
||||||
return c.JSON(
|
return c.JSON(
|
||||||
schema.SystemInformationResponse{
|
schema.SystemInformationResponse{
|
||||||
Backends: availableBackends,
|
Backends: availableBackends,
|
||||||
|
Models: loadedModels,
|
||||||
},
|
},
|
||||||
)
|
)
|
||||||
}
|
}
|
||||||
|
@ -161,6 +161,12 @@ func ChatEndpoint(cl *config.BackendConfigLoader, ml *model.ModelLoader, startup
|
|||||||
textContentToReturn = ""
|
textContentToReturn = ""
|
||||||
id = uuid.New().String()
|
id = uuid.New().String()
|
||||||
created = int(time.Now().Unix())
|
created = int(time.Now().Unix())
|
||||||
|
// Set CorrelationID
|
||||||
|
correlationID := c.Get("X-Correlation-ID")
|
||||||
|
if len(strings.TrimSpace(correlationID)) == 0 {
|
||||||
|
correlationID = id
|
||||||
|
}
|
||||||
|
c.Set("X-Correlation-ID", correlationID)
|
||||||
|
|
||||||
modelFile, input, err := readRequest(c, cl, ml, startupOptions, true)
|
modelFile, input, err := readRequest(c, cl, ml, startupOptions, true)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
@ -444,6 +450,7 @@ func ChatEndpoint(cl *config.BackendConfigLoader, ml *model.ModelLoader, startup
|
|||||||
c.Set("Cache-Control", "no-cache")
|
c.Set("Cache-Control", "no-cache")
|
||||||
c.Set("Connection", "keep-alive")
|
c.Set("Connection", "keep-alive")
|
||||||
c.Set("Transfer-Encoding", "chunked")
|
c.Set("Transfer-Encoding", "chunked")
|
||||||
|
c.Set("X-Correlation-ID", id)
|
||||||
|
|
||||||
responses := make(chan schema.OpenAIResponse)
|
responses := make(chan schema.OpenAIResponse)
|
||||||
|
|
||||||
@ -640,8 +647,16 @@ func handleQuestion(config *config.BackendConfig, input *schema.OpenAIRequest, m
|
|||||||
for _, m := range input.Messages {
|
for _, m := range input.Messages {
|
||||||
images = append(images, m.StringImages...)
|
images = append(images, m.StringImages...)
|
||||||
}
|
}
|
||||||
|
videos := []string{}
|
||||||
|
for _, m := range input.Messages {
|
||||||
|
videos = append(videos, m.StringVideos...)
|
||||||
|
}
|
||||||
|
audios := []string{}
|
||||||
|
for _, m := range input.Messages {
|
||||||
|
audios = append(audios, m.StringAudios...)
|
||||||
|
}
|
||||||
|
|
||||||
predFunc, err := backend.ModelInference(input.Context, prompt, input.Messages, images, ml, *config, o, nil)
|
predFunc, err := backend.ModelInference(input.Context, prompt, input.Messages, images, videos, audios, ml, *config, o, nil)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
log.Error().Err(err).Msg("model inference failed")
|
log.Error().Err(err).Msg("model inference failed")
|
||||||
return "", err
|
return "", err
|
||||||
|
@ -57,6 +57,8 @@ func CompletionEndpoint(cl *config.BackendConfigLoader, ml *model.ModelLoader, a
|
|||||||
}
|
}
|
||||||
|
|
||||||
return func(c *fiber.Ctx) error {
|
return func(c *fiber.Ctx) error {
|
||||||
|
// Add Correlation
|
||||||
|
c.Set("X-Correlation-ID", id)
|
||||||
modelFile, input, err := readRequest(c, cl, ml, appConfig, true)
|
modelFile, input, err := readRequest(c, cl, ml, appConfig, true)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return fmt.Errorf("failed reading parameters from request:%w", err)
|
return fmt.Errorf("failed reading parameters from request:%w", err)
|
||||||
|
@ -27,9 +27,17 @@ func ComputeChoices(
|
|||||||
for _, m := range req.Messages {
|
for _, m := range req.Messages {
|
||||||
images = append(images, m.StringImages...)
|
images = append(images, m.StringImages...)
|
||||||
}
|
}
|
||||||
|
videos := []string{}
|
||||||
|
for _, m := range req.Messages {
|
||||||
|
videos = append(videos, m.StringVideos...)
|
||||||
|
}
|
||||||
|
audios := []string{}
|
||||||
|
for _, m := range req.Messages {
|
||||||
|
audios = append(audios, m.StringAudios...)
|
||||||
|
}
|
||||||
|
|
||||||
// get the model function to call for the result
|
// get the model function to call for the result
|
||||||
predFunc, err := backend.ModelInference(req.Context, predInput, req.Messages, images, loader, *config, o, tokenCallback)
|
predFunc, err := backend.ModelInference(req.Context, predInput, req.Messages, images, videos, audios, loader, *config, o, tokenCallback)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return result, backend.TokenUsage{}, err
|
return result, backend.TokenUsage{}, err
|
||||||
}
|
}
|
||||||
|
@ -6,6 +6,7 @@ import (
|
|||||||
"fmt"
|
"fmt"
|
||||||
|
|
||||||
"github.com/gofiber/fiber/v2"
|
"github.com/gofiber/fiber/v2"
|
||||||
|
"github.com/google/uuid"
|
||||||
"github.com/mudler/LocalAI/core/config"
|
"github.com/mudler/LocalAI/core/config"
|
||||||
fiberContext "github.com/mudler/LocalAI/core/http/ctx"
|
fiberContext "github.com/mudler/LocalAI/core/http/ctx"
|
||||||
"github.com/mudler/LocalAI/core/schema"
|
"github.com/mudler/LocalAI/core/schema"
|
||||||
@ -15,6 +16,11 @@ import (
|
|||||||
"github.com/rs/zerolog/log"
|
"github.com/rs/zerolog/log"
|
||||||
)
|
)
|
||||||
|
|
||||||
|
type correlationIDKeyType string
|
||||||
|
|
||||||
|
// CorrelationIDKey to track request across process boundary
|
||||||
|
const CorrelationIDKey correlationIDKeyType = "correlationID"
|
||||||
|
|
||||||
func readRequest(c *fiber.Ctx, cl *config.BackendConfigLoader, ml *model.ModelLoader, o *config.ApplicationConfig, firstModel bool) (string, *schema.OpenAIRequest, error) {
|
func readRequest(c *fiber.Ctx, cl *config.BackendConfigLoader, ml *model.ModelLoader, o *config.ApplicationConfig, firstModel bool) (string, *schema.OpenAIRequest, error) {
|
||||||
input := new(schema.OpenAIRequest)
|
input := new(schema.OpenAIRequest)
|
||||||
|
|
||||||
@ -24,9 +30,14 @@ func readRequest(c *fiber.Ctx, cl *config.BackendConfigLoader, ml *model.ModelLo
|
|||||||
}
|
}
|
||||||
|
|
||||||
received, _ := json.Marshal(input)
|
received, _ := json.Marshal(input)
|
||||||
|
// Extract or generate the correlation ID
|
||||||
|
correlationID := c.Get("X-Correlation-ID", uuid.New().String())
|
||||||
|
|
||||||
ctx, cancel := context.WithCancel(o.Context)
|
ctx, cancel := context.WithCancel(o.Context)
|
||||||
input.Context = ctx
|
// Add the correlation ID to the new context
|
||||||
|
ctxWithCorrelationID := context.WithValue(ctx, CorrelationIDKey, correlationID)
|
||||||
|
|
||||||
|
input.Context = ctxWithCorrelationID
|
||||||
input.Cancel = cancel
|
input.Cancel = cancel
|
||||||
|
|
||||||
log.Debug().Msgf("Request received: %s", string(received))
|
log.Debug().Msgf("Request received: %s", string(received))
|
||||||
@ -135,7 +146,7 @@ func updateRequestConfig(config *config.BackendConfig, input *schema.OpenAIReque
|
|||||||
}
|
}
|
||||||
|
|
||||||
// Decode each request's message content
|
// Decode each request's message content
|
||||||
index := 0
|
imgIndex, vidIndex, audioIndex := 0, 0, 0
|
||||||
for i, m := range input.Messages {
|
for i, m := range input.Messages {
|
||||||
switch content := m.Content.(type) {
|
switch content := m.Content.(type) {
|
||||||
case string:
|
case string:
|
||||||
@ -144,20 +155,44 @@ func updateRequestConfig(config *config.BackendConfig, input *schema.OpenAIReque
|
|||||||
dat, _ := json.Marshal(content)
|
dat, _ := json.Marshal(content)
|
||||||
c := []schema.Content{}
|
c := []schema.Content{}
|
||||||
json.Unmarshal(dat, &c)
|
json.Unmarshal(dat, &c)
|
||||||
|
CONTENT:
|
||||||
for _, pp := range c {
|
for _, pp := range c {
|
||||||
if pp.Type == "text" {
|
switch pp.Type {
|
||||||
|
case "text":
|
||||||
input.Messages[i].StringContent = pp.Text
|
input.Messages[i].StringContent = pp.Text
|
||||||
} else if pp.Type == "image_url" {
|
case "video", "video_url":
|
||||||
// Detect if pp.ImageURL is an URL, if it is download the image and encode it in base64:
|
// Decode content as base64 either if it's an URL or base64 text
|
||||||
base64, err := utils.GetImageURLAsBase64(pp.ImageURL.URL)
|
base64, err := utils.GetContentURIAsBase64(pp.VideoURL.URL)
|
||||||
if err == nil {
|
if err != nil {
|
||||||
|
log.Error().Msgf("Failed encoding video: %s", err)
|
||||||
|
continue CONTENT
|
||||||
|
}
|
||||||
|
input.Messages[i].StringVideos = append(input.Messages[i].StringVideos, base64) // TODO: make sure that we only return base64 stuff
|
||||||
|
// set a placeholder for each image
|
||||||
|
input.Messages[i].StringContent = fmt.Sprintf("[vid-%d]", vidIndex) + input.Messages[i].StringContent
|
||||||
|
vidIndex++
|
||||||
|
case "audio_url", "audio":
|
||||||
|
// Decode content as base64 either if it's an URL or base64 text
|
||||||
|
base64, err := utils.GetContentURIAsBase64(pp.AudioURL.URL)
|
||||||
|
if err != nil {
|
||||||
|
log.Error().Msgf("Failed encoding image: %s", err)
|
||||||
|
continue CONTENT
|
||||||
|
}
|
||||||
|
input.Messages[i].StringAudios = append(input.Messages[i].StringAudios, base64) // TODO: make sure that we only return base64 stuff
|
||||||
|
// set a placeholder for each image
|
||||||
|
input.Messages[i].StringContent = fmt.Sprintf("[audio-%d]", audioIndex) + input.Messages[i].StringContent
|
||||||
|
audioIndex++
|
||||||
|
case "image_url", "image":
|
||||||
|
// Decode content as base64 either if it's an URL or base64 text
|
||||||
|
base64, err := utils.GetContentURIAsBase64(pp.ImageURL.URL)
|
||||||
|
if err != nil {
|
||||||
|
log.Error().Msgf("Failed encoding image: %s", err)
|
||||||
|
continue CONTENT
|
||||||
|
}
|
||||||
input.Messages[i].StringImages = append(input.Messages[i].StringImages, base64) // TODO: make sure that we only return base64 stuff
|
input.Messages[i].StringImages = append(input.Messages[i].StringImages, base64) // TODO: make sure that we only return base64 stuff
|
||||||
// set a placeholder for each image
|
// set a placeholder for each image
|
||||||
input.Messages[i].StringContent = fmt.Sprintf("[img-%d]", index) + input.Messages[i].StringContent
|
input.Messages[i].StringContent = fmt.Sprintf("[img-%d]", imgIndex) + input.Messages[i].StringContent
|
||||||
index++
|
imgIndex++
|
||||||
} else {
|
|
||||||
log.Error().Msgf("Failed encoding image: %s", err)
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
94
core/http/middleware/auth.go
Normal file
94
core/http/middleware/auth.go
Normal file
@ -0,0 +1,94 @@
|
|||||||
|
package middleware
|
||||||
|
|
||||||
|
import (
|
||||||
|
"crypto/subtle"
|
||||||
|
"errors"
|
||||||
|
|
||||||
|
"github.com/dave-gray101/v2keyauth"
|
||||||
|
"github.com/gofiber/fiber/v2"
|
||||||
|
"github.com/gofiber/fiber/v2/middleware/keyauth"
|
||||||
|
"github.com/mudler/LocalAI/core/config"
|
||||||
|
)
|
||||||
|
|
||||||
|
// This file contains the configuration generators and handler functions that are used along with the fiber/keyauth middleware
|
||||||
|
// Currently this requires an upstream patch - and feature patches are no longer accepted to v2
|
||||||
|
// Therefore `dave-gray101/v2keyauth` contains the v2 backport of the middleware until v3 stabilizes and we migrate.
|
||||||
|
|
||||||
|
func GetKeyAuthConfig(applicationConfig *config.ApplicationConfig) (*v2keyauth.Config, error) {
|
||||||
|
customLookup, err := v2keyauth.MultipleKeySourceLookup([]string{"header:Authorization", "header:x-api-key", "header:xi-api-key"}, keyauth.ConfigDefault.AuthScheme)
|
||||||
|
if err != nil {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
|
||||||
|
return &v2keyauth.Config{
|
||||||
|
CustomKeyLookup: customLookup,
|
||||||
|
Next: getApiKeyRequiredFilterFunction(applicationConfig),
|
||||||
|
Validator: getApiKeyValidationFunction(applicationConfig),
|
||||||
|
ErrorHandler: getApiKeyErrorHandler(applicationConfig),
|
||||||
|
AuthScheme: "Bearer",
|
||||||
|
}, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
func getApiKeyErrorHandler(applicationConfig *config.ApplicationConfig) fiber.ErrorHandler {
|
||||||
|
return func(ctx *fiber.Ctx, err error) error {
|
||||||
|
if errors.Is(err, v2keyauth.ErrMissingOrMalformedAPIKey) {
|
||||||
|
if len(applicationConfig.ApiKeys) == 0 {
|
||||||
|
return ctx.Next() // if no keys are set up, any error we get here is not an error.
|
||||||
|
}
|
||||||
|
if applicationConfig.OpaqueErrors {
|
||||||
|
return ctx.SendStatus(403)
|
||||||
|
}
|
||||||
|
return ctx.Status(403).SendString(err.Error())
|
||||||
|
}
|
||||||
|
if applicationConfig.OpaqueErrors {
|
||||||
|
return ctx.SendStatus(500)
|
||||||
|
}
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func getApiKeyValidationFunction(applicationConfig *config.ApplicationConfig) func(*fiber.Ctx, string) (bool, error) {
|
||||||
|
|
||||||
|
if applicationConfig.UseSubtleKeyComparison {
|
||||||
|
return func(ctx *fiber.Ctx, apiKey string) (bool, error) {
|
||||||
|
if len(applicationConfig.ApiKeys) == 0 {
|
||||||
|
return true, nil // If no keys are setup, accept everything
|
||||||
|
}
|
||||||
|
for _, validKey := range applicationConfig.ApiKeys {
|
||||||
|
if subtle.ConstantTimeCompare([]byte(apiKey), []byte(validKey)) == 1 {
|
||||||
|
return true, nil
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return false, v2keyauth.ErrMissingOrMalformedAPIKey
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return func(ctx *fiber.Ctx, apiKey string) (bool, error) {
|
||||||
|
if len(applicationConfig.ApiKeys) == 0 {
|
||||||
|
return true, nil // If no keys are setup, accept everything
|
||||||
|
}
|
||||||
|
for _, validKey := range applicationConfig.ApiKeys {
|
||||||
|
if apiKey == validKey {
|
||||||
|
return true, nil
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return false, v2keyauth.ErrMissingOrMalformedAPIKey
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func getApiKeyRequiredFilterFunction(applicationConfig *config.ApplicationConfig) func(*fiber.Ctx) bool {
|
||||||
|
if applicationConfig.DisableApiKeyRequirementForHttpGet {
|
||||||
|
return func(c *fiber.Ctx) bool {
|
||||||
|
if c.Method() != "GET" {
|
||||||
|
return false
|
||||||
|
}
|
||||||
|
for _, rx := range applicationConfig.HttpGetExemptedEndpoints {
|
||||||
|
if rx.MatchString(c.Path()) {
|
||||||
|
return true
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return false
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return func(c *fiber.Ctx) bool { return false }
|
||||||
|
}
|
@ -10,12 +10,11 @@ import (
|
|||||||
func RegisterElevenLabsRoutes(app *fiber.App,
|
func RegisterElevenLabsRoutes(app *fiber.App,
|
||||||
cl *config.BackendConfigLoader,
|
cl *config.BackendConfigLoader,
|
||||||
ml *model.ModelLoader,
|
ml *model.ModelLoader,
|
||||||
appConfig *config.ApplicationConfig,
|
appConfig *config.ApplicationConfig) {
|
||||||
auth func(*fiber.Ctx) error) {
|
|
||||||
|
|
||||||
// Elevenlabs
|
// Elevenlabs
|
||||||
app.Post("/v1/text-to-speech/:voice-id", auth, elevenlabs.TTSEndpoint(cl, ml, appConfig))
|
app.Post("/v1/text-to-speech/:voice-id", elevenlabs.TTSEndpoint(cl, ml, appConfig))
|
||||||
|
|
||||||
app.Post("/v1/sound-generation", auth, elevenlabs.SoundGenerationEndpoint(cl, ml, appConfig))
|
app.Post("/v1/sound-generation", elevenlabs.SoundGenerationEndpoint(cl, ml, appConfig))
|
||||||
|
|
||||||
}
|
}
|
||||||
|
13
core/http/routes/health.go
Normal file
13
core/http/routes/health.go
Normal file
@ -0,0 +1,13 @@
|
|||||||
|
package routes
|
||||||
|
|
||||||
|
import "github.com/gofiber/fiber/v2"
|
||||||
|
|
||||||
|
func HealthRoutes(app *fiber.App) {
|
||||||
|
// Service health checks
|
||||||
|
ok := func(c *fiber.Ctx) error {
|
||||||
|
return c.SendStatus(200)
|
||||||
|
}
|
||||||
|
|
||||||
|
app.Get("/healthz", ok)
|
||||||
|
app.Get("/readyz", ok)
|
||||||
|
}
|
@ -11,8 +11,7 @@ import (
|
|||||||
func RegisterJINARoutes(app *fiber.App,
|
func RegisterJINARoutes(app *fiber.App,
|
||||||
cl *config.BackendConfigLoader,
|
cl *config.BackendConfigLoader,
|
||||||
ml *model.ModelLoader,
|
ml *model.ModelLoader,
|
||||||
appConfig *config.ApplicationConfig,
|
appConfig *config.ApplicationConfig) {
|
||||||
auth func(*fiber.Ctx) error) {
|
|
||||||
|
|
||||||
// POST endpoint to mimic the reranking
|
// POST endpoint to mimic the reranking
|
||||||
app.Post("/v1/rerank", jina.JINARerankEndpoint(cl, ml, appConfig))
|
app.Post("/v1/rerank", jina.JINARerankEndpoint(cl, ml, appConfig))
|
||||||
|
@ -15,61 +15,52 @@ func RegisterLocalAIRoutes(app *fiber.App,
|
|||||||
cl *config.BackendConfigLoader,
|
cl *config.BackendConfigLoader,
|
||||||
ml *model.ModelLoader,
|
ml *model.ModelLoader,
|
||||||
appConfig *config.ApplicationConfig,
|
appConfig *config.ApplicationConfig,
|
||||||
galleryService *services.GalleryService,
|
galleryService *services.GalleryService) {
|
||||||
auth func(*fiber.Ctx) error) {
|
|
||||||
|
|
||||||
app.Get("/swagger/*", swagger.HandlerDefault) // default
|
app.Get("/swagger/*", swagger.HandlerDefault) // default
|
||||||
|
|
||||||
// LocalAI API endpoints
|
// LocalAI API endpoints
|
||||||
if !appConfig.DisableGalleryEndpoint {
|
if !appConfig.DisableGalleryEndpoint {
|
||||||
modelGalleryEndpointService := localai.CreateModelGalleryEndpointService(appConfig.Galleries, appConfig.ModelPath, galleryService)
|
modelGalleryEndpointService := localai.CreateModelGalleryEndpointService(appConfig.Galleries, appConfig.ModelPath, galleryService)
|
||||||
app.Post("/models/apply", auth, modelGalleryEndpointService.ApplyModelGalleryEndpoint())
|
app.Post("/models/apply", modelGalleryEndpointService.ApplyModelGalleryEndpoint())
|
||||||
app.Post("/models/delete/:name", auth, modelGalleryEndpointService.DeleteModelGalleryEndpoint())
|
app.Post("/models/delete/:name", modelGalleryEndpointService.DeleteModelGalleryEndpoint())
|
||||||
|
|
||||||
app.Get("/models/available", auth, modelGalleryEndpointService.ListModelFromGalleryEndpoint())
|
app.Get("/models/available", modelGalleryEndpointService.ListModelFromGalleryEndpoint())
|
||||||
app.Get("/models/galleries", auth, modelGalleryEndpointService.ListModelGalleriesEndpoint())
|
app.Get("/models/galleries", modelGalleryEndpointService.ListModelGalleriesEndpoint())
|
||||||
app.Post("/models/galleries", auth, modelGalleryEndpointService.AddModelGalleryEndpoint())
|
app.Post("/models/galleries", modelGalleryEndpointService.AddModelGalleryEndpoint())
|
||||||
app.Delete("/models/galleries", auth, modelGalleryEndpointService.RemoveModelGalleryEndpoint())
|
app.Delete("/models/galleries", modelGalleryEndpointService.RemoveModelGalleryEndpoint())
|
||||||
app.Get("/models/jobs/:uuid", auth, modelGalleryEndpointService.GetOpStatusEndpoint())
|
app.Get("/models/jobs/:uuid", modelGalleryEndpointService.GetOpStatusEndpoint())
|
||||||
app.Get("/models/jobs", auth, modelGalleryEndpointService.GetAllStatusEndpoint())
|
app.Get("/models/jobs", modelGalleryEndpointService.GetAllStatusEndpoint())
|
||||||
}
|
}
|
||||||
|
|
||||||
app.Post("/tts", auth, localai.TTSEndpoint(cl, ml, appConfig))
|
app.Post("/tts", localai.TTSEndpoint(cl, ml, appConfig))
|
||||||
|
|
||||||
// Stores
|
// Stores
|
||||||
sl := model.NewModelLoader("")
|
sl := model.NewModelLoader("")
|
||||||
app.Post("/stores/set", auth, localai.StoresSetEndpoint(sl, appConfig))
|
app.Post("/stores/set", localai.StoresSetEndpoint(sl, appConfig))
|
||||||
app.Post("/stores/delete", auth, localai.StoresDeleteEndpoint(sl, appConfig))
|
app.Post("/stores/delete", localai.StoresDeleteEndpoint(sl, appConfig))
|
||||||
app.Post("/stores/get", auth, localai.StoresGetEndpoint(sl, appConfig))
|
app.Post("/stores/get", localai.StoresGetEndpoint(sl, appConfig))
|
||||||
app.Post("/stores/find", auth, localai.StoresFindEndpoint(sl, appConfig))
|
app.Post("/stores/find", localai.StoresFindEndpoint(sl, appConfig))
|
||||||
|
|
||||||
// Kubernetes health checks
|
app.Get("/metrics", localai.LocalAIMetricsEndpoint())
|
||||||
ok := func(c *fiber.Ctx) error {
|
|
||||||
return c.SendStatus(200)
|
|
||||||
}
|
|
||||||
|
|
||||||
app.Get("/healthz", ok)
|
|
||||||
app.Get("/readyz", ok)
|
|
||||||
|
|
||||||
app.Get("/metrics", auth, localai.LocalAIMetricsEndpoint())
|
|
||||||
|
|
||||||
// Experimental Backend Statistics Module
|
// Experimental Backend Statistics Module
|
||||||
backendMonitorService := services.NewBackendMonitorService(ml, cl, appConfig) // Split out for now
|
backendMonitorService := services.NewBackendMonitorService(ml, cl, appConfig) // Split out for now
|
||||||
app.Get("/backend/monitor", auth, localai.BackendMonitorEndpoint(backendMonitorService))
|
app.Get("/backend/monitor", localai.BackendMonitorEndpoint(backendMonitorService))
|
||||||
app.Post("/backend/shutdown", auth, localai.BackendShutdownEndpoint(backendMonitorService))
|
app.Post("/backend/shutdown", localai.BackendShutdownEndpoint(backendMonitorService))
|
||||||
|
|
||||||
// p2p
|
// p2p
|
||||||
if p2p.IsP2PEnabled() {
|
if p2p.IsP2PEnabled() {
|
||||||
app.Get("/api/p2p", auth, localai.ShowP2PNodes(appConfig))
|
app.Get("/api/p2p", localai.ShowP2PNodes(appConfig))
|
||||||
app.Get("/api/p2p/token", auth, localai.ShowP2PToken(appConfig))
|
app.Get("/api/p2p/token", localai.ShowP2PToken(appConfig))
|
||||||
}
|
}
|
||||||
|
|
||||||
app.Get("/version", auth, func(c *fiber.Ctx) error {
|
app.Get("/version", func(c *fiber.Ctx) error {
|
||||||
return c.JSON(struct {
|
return c.JSON(struct {
|
||||||
Version string `json:"version"`
|
Version string `json:"version"`
|
||||||
}{Version: internal.PrintableVersion()})
|
}{Version: internal.PrintableVersion()})
|
||||||
})
|
})
|
||||||
|
|
||||||
app.Get("/system", auth, localai.SystemInformations(ml, appConfig))
|
app.Get("/system", localai.SystemInformations(ml, appConfig))
|
||||||
|
|
||||||
}
|
}
|
||||||
|
@ -11,66 +11,65 @@ import (
|
|||||||
func RegisterOpenAIRoutes(app *fiber.App,
|
func RegisterOpenAIRoutes(app *fiber.App,
|
||||||
cl *config.BackendConfigLoader,
|
cl *config.BackendConfigLoader,
|
||||||
ml *model.ModelLoader,
|
ml *model.ModelLoader,
|
||||||
appConfig *config.ApplicationConfig,
|
appConfig *config.ApplicationConfig) {
|
||||||
auth func(*fiber.Ctx) error) {
|
|
||||||
// openAI compatible API endpoint
|
// openAI compatible API endpoint
|
||||||
|
|
||||||
// chat
|
// chat
|
||||||
app.Post("/v1/chat/completions", auth, openai.ChatEndpoint(cl, ml, appConfig))
|
app.Post("/v1/chat/completions", openai.ChatEndpoint(cl, ml, appConfig))
|
||||||
app.Post("/chat/completions", auth, openai.ChatEndpoint(cl, ml, appConfig))
|
app.Post("/chat/completions", openai.ChatEndpoint(cl, ml, appConfig))
|
||||||
|
|
||||||
// edit
|
// edit
|
||||||
app.Post("/v1/edits", auth, openai.EditEndpoint(cl, ml, appConfig))
|
app.Post("/v1/edits", openai.EditEndpoint(cl, ml, appConfig))
|
||||||
app.Post("/edits", auth, openai.EditEndpoint(cl, ml, appConfig))
|
app.Post("/edits", openai.EditEndpoint(cl, ml, appConfig))
|
||||||
|
|
||||||
// assistant
|
// assistant
|
||||||
app.Get("/v1/assistants", auth, openai.ListAssistantsEndpoint(cl, ml, appConfig))
|
app.Get("/v1/assistants", openai.ListAssistantsEndpoint(cl, ml, appConfig))
|
||||||
app.Get("/assistants", auth, openai.ListAssistantsEndpoint(cl, ml, appConfig))
|
app.Get("/assistants", openai.ListAssistantsEndpoint(cl, ml, appConfig))
|
||||||
app.Post("/v1/assistants", auth, openai.CreateAssistantEndpoint(cl, ml, appConfig))
|
app.Post("/v1/assistants", openai.CreateAssistantEndpoint(cl, ml, appConfig))
|
||||||
app.Post("/assistants", auth, openai.CreateAssistantEndpoint(cl, ml, appConfig))
|
app.Post("/assistants", openai.CreateAssistantEndpoint(cl, ml, appConfig))
|
||||||
app.Delete("/v1/assistants/:assistant_id", auth, openai.DeleteAssistantEndpoint(cl, ml, appConfig))
|
app.Delete("/v1/assistants/:assistant_id", openai.DeleteAssistantEndpoint(cl, ml, appConfig))
|
||||||
app.Delete("/assistants/:assistant_id", auth, openai.DeleteAssistantEndpoint(cl, ml, appConfig))
|
app.Delete("/assistants/:assistant_id", openai.DeleteAssistantEndpoint(cl, ml, appConfig))
|
||||||
app.Get("/v1/assistants/:assistant_id", auth, openai.GetAssistantEndpoint(cl, ml, appConfig))
|
app.Get("/v1/assistants/:assistant_id", openai.GetAssistantEndpoint(cl, ml, appConfig))
|
||||||
app.Get("/assistants/:assistant_id", auth, openai.GetAssistantEndpoint(cl, ml, appConfig))
|
app.Get("/assistants/:assistant_id", openai.GetAssistantEndpoint(cl, ml, appConfig))
|
||||||
app.Post("/v1/assistants/:assistant_id", auth, openai.ModifyAssistantEndpoint(cl, ml, appConfig))
|
app.Post("/v1/assistants/:assistant_id", openai.ModifyAssistantEndpoint(cl, ml, appConfig))
|
||||||
app.Post("/assistants/:assistant_id", auth, openai.ModifyAssistantEndpoint(cl, ml, appConfig))
|
app.Post("/assistants/:assistant_id", openai.ModifyAssistantEndpoint(cl, ml, appConfig))
|
||||||
app.Get("/v1/assistants/:assistant_id/files", auth, openai.ListAssistantFilesEndpoint(cl, ml, appConfig))
|
app.Get("/v1/assistants/:assistant_id/files", openai.ListAssistantFilesEndpoint(cl, ml, appConfig))
|
||||||
app.Get("/assistants/:assistant_id/files", auth, openai.ListAssistantFilesEndpoint(cl, ml, appConfig))
|
app.Get("/assistants/:assistant_id/files", openai.ListAssistantFilesEndpoint(cl, ml, appConfig))
|
||||||
app.Post("/v1/assistants/:assistant_id/files", auth, openai.CreateAssistantFileEndpoint(cl, ml, appConfig))
|
app.Post("/v1/assistants/:assistant_id/files", openai.CreateAssistantFileEndpoint(cl, ml, appConfig))
|
||||||
app.Post("/assistants/:assistant_id/files", auth, openai.CreateAssistantFileEndpoint(cl, ml, appConfig))
|
app.Post("/assistants/:assistant_id/files", openai.CreateAssistantFileEndpoint(cl, ml, appConfig))
|
||||||
app.Delete("/v1/assistants/:assistant_id/files/:file_id", auth, openai.DeleteAssistantFileEndpoint(cl, ml, appConfig))
|
app.Delete("/v1/assistants/:assistant_id/files/:file_id", openai.DeleteAssistantFileEndpoint(cl, ml, appConfig))
|
||||||
app.Delete("/assistants/:assistant_id/files/:file_id", auth, openai.DeleteAssistantFileEndpoint(cl, ml, appConfig))
|
app.Delete("/assistants/:assistant_id/files/:file_id", openai.DeleteAssistantFileEndpoint(cl, ml, appConfig))
|
||||||
app.Get("/v1/assistants/:assistant_id/files/:file_id", auth, openai.GetAssistantFileEndpoint(cl, ml, appConfig))
|
app.Get("/v1/assistants/:assistant_id/files/:file_id", openai.GetAssistantFileEndpoint(cl, ml, appConfig))
|
||||||
app.Get("/assistants/:assistant_id/files/:file_id", auth, openai.GetAssistantFileEndpoint(cl, ml, appConfig))
|
app.Get("/assistants/:assistant_id/files/:file_id", openai.GetAssistantFileEndpoint(cl, ml, appConfig))
|
||||||
|
|
||||||
// files
|
// files
|
||||||
app.Post("/v1/files", auth, openai.UploadFilesEndpoint(cl, appConfig))
|
app.Post("/v1/files", openai.UploadFilesEndpoint(cl, appConfig))
|
||||||
app.Post("/files", auth, openai.UploadFilesEndpoint(cl, appConfig))
|
app.Post("/files", openai.UploadFilesEndpoint(cl, appConfig))
|
||||||
app.Get("/v1/files", auth, openai.ListFilesEndpoint(cl, appConfig))
|
app.Get("/v1/files", openai.ListFilesEndpoint(cl, appConfig))
|
||||||
app.Get("/files", auth, openai.ListFilesEndpoint(cl, appConfig))
|
app.Get("/files", openai.ListFilesEndpoint(cl, appConfig))
|
||||||
app.Get("/v1/files/:file_id", auth, openai.GetFilesEndpoint(cl, appConfig))
|
app.Get("/v1/files/:file_id", openai.GetFilesEndpoint(cl, appConfig))
|
||||||
app.Get("/files/:file_id", auth, openai.GetFilesEndpoint(cl, appConfig))
|
app.Get("/files/:file_id", openai.GetFilesEndpoint(cl, appConfig))
|
||||||
app.Delete("/v1/files/:file_id", auth, openai.DeleteFilesEndpoint(cl, appConfig))
|
app.Delete("/v1/files/:file_id", openai.DeleteFilesEndpoint(cl, appConfig))
|
||||||
app.Delete("/files/:file_id", auth, openai.DeleteFilesEndpoint(cl, appConfig))
|
app.Delete("/files/:file_id", openai.DeleteFilesEndpoint(cl, appConfig))
|
||||||
app.Get("/v1/files/:file_id/content", auth, openai.GetFilesContentsEndpoint(cl, appConfig))
|
app.Get("/v1/files/:file_id/content", openai.GetFilesContentsEndpoint(cl, appConfig))
|
||||||
app.Get("/files/:file_id/content", auth, openai.GetFilesContentsEndpoint(cl, appConfig))
|
app.Get("/files/:file_id/content", openai.GetFilesContentsEndpoint(cl, appConfig))
|
||||||
|
|
||||||
// completion
|
// completion
|
||||||
app.Post("/v1/completions", auth, openai.CompletionEndpoint(cl, ml, appConfig))
|
app.Post("/v1/completions", openai.CompletionEndpoint(cl, ml, appConfig))
|
||||||
app.Post("/completions", auth, openai.CompletionEndpoint(cl, ml, appConfig))
|
app.Post("/completions", openai.CompletionEndpoint(cl, ml, appConfig))
|
||||||
app.Post("/v1/engines/:model/completions", auth, openai.CompletionEndpoint(cl, ml, appConfig))
|
app.Post("/v1/engines/:model/completions", openai.CompletionEndpoint(cl, ml, appConfig))
|
||||||
|
|
||||||
// embeddings
|
// embeddings
|
||||||
app.Post("/v1/embeddings", auth, openai.EmbeddingsEndpoint(cl, ml, appConfig))
|
app.Post("/v1/embeddings", openai.EmbeddingsEndpoint(cl, ml, appConfig))
|
||||||
app.Post("/embeddings", auth, openai.EmbeddingsEndpoint(cl, ml, appConfig))
|
app.Post("/embeddings", openai.EmbeddingsEndpoint(cl, ml, appConfig))
|
||||||
app.Post("/v1/engines/:model/embeddings", auth, openai.EmbeddingsEndpoint(cl, ml, appConfig))
|
app.Post("/v1/engines/:model/embeddings", openai.EmbeddingsEndpoint(cl, ml, appConfig))
|
||||||
|
|
||||||
// audio
|
// audio
|
||||||
app.Post("/v1/audio/transcriptions", auth, openai.TranscriptEndpoint(cl, ml, appConfig))
|
app.Post("/v1/audio/transcriptions", openai.TranscriptEndpoint(cl, ml, appConfig))
|
||||||
app.Post("/v1/audio/speech", auth, localai.TTSEndpoint(cl, ml, appConfig))
|
app.Post("/v1/audio/speech", localai.TTSEndpoint(cl, ml, appConfig))
|
||||||
|
|
||||||
// images
|
// images
|
||||||
app.Post("/v1/images/generations", auth, openai.ImageEndpoint(cl, ml, appConfig))
|
app.Post("/v1/images/generations", openai.ImageEndpoint(cl, ml, appConfig))
|
||||||
|
|
||||||
if appConfig.ImageDir != "" {
|
if appConfig.ImageDir != "" {
|
||||||
app.Static("/generated-images", appConfig.ImageDir)
|
app.Static("/generated-images", appConfig.ImageDir)
|
||||||
@ -81,6 +80,6 @@ func RegisterOpenAIRoutes(app *fiber.App,
|
|||||||
}
|
}
|
||||||
|
|
||||||
// List models
|
// List models
|
||||||
app.Get("/v1/models", auth, openai.ListModelsEndpoint(cl, ml))
|
app.Get("/v1/models", openai.ListModelsEndpoint(cl, ml))
|
||||||
app.Get("/models", auth, openai.ListModelsEndpoint(cl, ml))
|
app.Get("/models", openai.ListModelsEndpoint(cl, ml))
|
||||||
}
|
}
|
||||||
|
@ -59,8 +59,7 @@ func RegisterUIRoutes(app *fiber.App,
|
|||||||
cl *config.BackendConfigLoader,
|
cl *config.BackendConfigLoader,
|
||||||
ml *model.ModelLoader,
|
ml *model.ModelLoader,
|
||||||
appConfig *config.ApplicationConfig,
|
appConfig *config.ApplicationConfig,
|
||||||
galleryService *services.GalleryService,
|
galleryService *services.GalleryService) {
|
||||||
auth func(*fiber.Ctx) error) {
|
|
||||||
|
|
||||||
// keeps the state of models that are being installed from the UI
|
// keeps the state of models that are being installed from the UI
|
||||||
var processingModels = NewModelOpCache()
|
var processingModels = NewModelOpCache()
|
||||||
@ -85,10 +84,10 @@ func RegisterUIRoutes(app *fiber.App,
|
|||||||
return processingModelsData, taskTypes
|
return processingModelsData, taskTypes
|
||||||
}
|
}
|
||||||
|
|
||||||
app.Get("/", auth, localai.WelcomeEndpoint(appConfig, cl, ml, modelStatus))
|
app.Get("/", localai.WelcomeEndpoint(appConfig, cl, ml, modelStatus))
|
||||||
|
|
||||||
if p2p.IsP2PEnabled() {
|
if p2p.IsP2PEnabled() {
|
||||||
app.Get("/p2p", auth, func(c *fiber.Ctx) error {
|
app.Get("/p2p", func(c *fiber.Ctx) error {
|
||||||
summary := fiber.Map{
|
summary := fiber.Map{
|
||||||
"Title": "LocalAI - P2P dashboard",
|
"Title": "LocalAI - P2P dashboard",
|
||||||
"Version": internal.PrintableVersion(),
|
"Version": internal.PrintableVersion(),
|
||||||
@ -104,17 +103,17 @@ func RegisterUIRoutes(app *fiber.App,
|
|||||||
})
|
})
|
||||||
|
|
||||||
/* show nodes live! */
|
/* show nodes live! */
|
||||||
app.Get("/p2p/ui/workers", auth, func(c *fiber.Ctx) error {
|
app.Get("/p2p/ui/workers", func(c *fiber.Ctx) error {
|
||||||
return c.SendString(elements.P2PNodeBoxes(p2p.GetAvailableNodes(p2p.NetworkID(appConfig.P2PNetworkID, p2p.WorkerID))))
|
return c.SendString(elements.P2PNodeBoxes(p2p.GetAvailableNodes(p2p.NetworkID(appConfig.P2PNetworkID, p2p.WorkerID))))
|
||||||
})
|
})
|
||||||
app.Get("/p2p/ui/workers-federation", auth, func(c *fiber.Ctx) error {
|
app.Get("/p2p/ui/workers-federation", func(c *fiber.Ctx) error {
|
||||||
return c.SendString(elements.P2PNodeBoxes(p2p.GetAvailableNodes(p2p.NetworkID(appConfig.P2PNetworkID, p2p.FederatedID))))
|
return c.SendString(elements.P2PNodeBoxes(p2p.GetAvailableNodes(p2p.NetworkID(appConfig.P2PNetworkID, p2p.FederatedID))))
|
||||||
})
|
})
|
||||||
|
|
||||||
app.Get("/p2p/ui/workers-stats", auth, func(c *fiber.Ctx) error {
|
app.Get("/p2p/ui/workers-stats", func(c *fiber.Ctx) error {
|
||||||
return c.SendString(elements.P2PNodeStats(p2p.GetAvailableNodes(p2p.NetworkID(appConfig.P2PNetworkID, p2p.WorkerID))))
|
return c.SendString(elements.P2PNodeStats(p2p.GetAvailableNodes(p2p.NetworkID(appConfig.P2PNetworkID, p2p.WorkerID))))
|
||||||
})
|
})
|
||||||
app.Get("/p2p/ui/workers-federation-stats", auth, func(c *fiber.Ctx) error {
|
app.Get("/p2p/ui/workers-federation-stats", func(c *fiber.Ctx) error {
|
||||||
return c.SendString(elements.P2PNodeStats(p2p.GetAvailableNodes(p2p.NetworkID(appConfig.P2PNetworkID, p2p.FederatedID))))
|
return c.SendString(elements.P2PNodeStats(p2p.GetAvailableNodes(p2p.NetworkID(appConfig.P2PNetworkID, p2p.FederatedID))))
|
||||||
})
|
})
|
||||||
}
|
}
|
||||||
@ -122,7 +121,7 @@ func RegisterUIRoutes(app *fiber.App,
|
|||||||
if !appConfig.DisableGalleryEndpoint {
|
if !appConfig.DisableGalleryEndpoint {
|
||||||
|
|
||||||
// Show the Models page (all models)
|
// Show the Models page (all models)
|
||||||
app.Get("/browse", auth, func(c *fiber.Ctx) error {
|
app.Get("/browse", func(c *fiber.Ctx) error {
|
||||||
term := c.Query("term")
|
term := c.Query("term")
|
||||||
|
|
||||||
models, _ := gallery.AvailableGalleryModels(appConfig.Galleries, appConfig.ModelPath)
|
models, _ := gallery.AvailableGalleryModels(appConfig.Galleries, appConfig.ModelPath)
|
||||||
@ -167,7 +166,7 @@ func RegisterUIRoutes(app *fiber.App,
|
|||||||
|
|
||||||
// Show the models, filtered from the user input
|
// Show the models, filtered from the user input
|
||||||
// https://htmx.org/examples/active-search/
|
// https://htmx.org/examples/active-search/
|
||||||
app.Post("/browse/search/models", auth, func(c *fiber.Ctx) error {
|
app.Post("/browse/search/models", func(c *fiber.Ctx) error {
|
||||||
form := struct {
|
form := struct {
|
||||||
Search string `form:"search"`
|
Search string `form:"search"`
|
||||||
}{}
|
}{}
|
||||||
@ -188,7 +187,7 @@ func RegisterUIRoutes(app *fiber.App,
|
|||||||
|
|
||||||
// This route is used when the "Install" button is pressed, we submit here a new job to the gallery service
|
// This route is used when the "Install" button is pressed, we submit here a new job to the gallery service
|
||||||
// https://htmx.org/examples/progress-bar/
|
// https://htmx.org/examples/progress-bar/
|
||||||
app.Post("/browse/install/model/:id", auth, func(c *fiber.Ctx) error {
|
app.Post("/browse/install/model/:id", func(c *fiber.Ctx) error {
|
||||||
galleryID := strings.Clone(c.Params("id")) // note: strings.Clone is required for multiple requests!
|
galleryID := strings.Clone(c.Params("id")) // note: strings.Clone is required for multiple requests!
|
||||||
log.Debug().Msgf("UI job submitted to install : %+v\n", galleryID)
|
log.Debug().Msgf("UI job submitted to install : %+v\n", galleryID)
|
||||||
|
|
||||||
@ -215,7 +214,7 @@ func RegisterUIRoutes(app *fiber.App,
|
|||||||
|
|
||||||
// This route is used when the "Install" button is pressed, we submit here a new job to the gallery service
|
// This route is used when the "Install" button is pressed, we submit here a new job to the gallery service
|
||||||
// https://htmx.org/examples/progress-bar/
|
// https://htmx.org/examples/progress-bar/
|
||||||
app.Post("/browse/delete/model/:id", auth, func(c *fiber.Ctx) error {
|
app.Post("/browse/delete/model/:id", func(c *fiber.Ctx) error {
|
||||||
galleryID := strings.Clone(c.Params("id")) // note: strings.Clone is required for multiple requests!
|
galleryID := strings.Clone(c.Params("id")) // note: strings.Clone is required for multiple requests!
|
||||||
log.Debug().Msgf("UI job submitted to delete : %+v\n", galleryID)
|
log.Debug().Msgf("UI job submitted to delete : %+v\n", galleryID)
|
||||||
var galleryName = galleryID
|
var galleryName = galleryID
|
||||||
@ -255,7 +254,7 @@ func RegisterUIRoutes(app *fiber.App,
|
|||||||
// Display the job current progress status
|
// Display the job current progress status
|
||||||
// If the job is done, we trigger the /browse/job/:uid route
|
// If the job is done, we trigger the /browse/job/:uid route
|
||||||
// https://htmx.org/examples/progress-bar/
|
// https://htmx.org/examples/progress-bar/
|
||||||
app.Get("/browse/job/progress/:uid", auth, func(c *fiber.Ctx) error {
|
app.Get("/browse/job/progress/:uid", func(c *fiber.Ctx) error {
|
||||||
jobUID := strings.Clone(c.Params("uid")) // note: strings.Clone is required for multiple requests!
|
jobUID := strings.Clone(c.Params("uid")) // note: strings.Clone is required for multiple requests!
|
||||||
|
|
||||||
status := galleryService.GetStatus(jobUID)
|
status := galleryService.GetStatus(jobUID)
|
||||||
@ -279,7 +278,7 @@ func RegisterUIRoutes(app *fiber.App,
|
|||||||
|
|
||||||
// this route is hit when the job is done, and we display the
|
// this route is hit when the job is done, and we display the
|
||||||
// final state (for now just displays "Installation completed")
|
// final state (for now just displays "Installation completed")
|
||||||
app.Get("/browse/job/:uid", auth, func(c *fiber.Ctx) error {
|
app.Get("/browse/job/:uid", func(c *fiber.Ctx) error {
|
||||||
jobUID := strings.Clone(c.Params("uid")) // note: strings.Clone is required for multiple requests!
|
jobUID := strings.Clone(c.Params("uid")) // note: strings.Clone is required for multiple requests!
|
||||||
|
|
||||||
status := galleryService.GetStatus(jobUID)
|
status := galleryService.GetStatus(jobUID)
|
||||||
@ -303,7 +302,7 @@ func RegisterUIRoutes(app *fiber.App,
|
|||||||
}
|
}
|
||||||
|
|
||||||
// Show the Chat page
|
// Show the Chat page
|
||||||
app.Get("/chat/:model", auth, func(c *fiber.Ctx) error {
|
app.Get("/chat/:model", func(c *fiber.Ctx) error {
|
||||||
backendConfigs, _ := services.ListModels(cl, ml, "", true)
|
backendConfigs, _ := services.ListModels(cl, ml, "", true)
|
||||||
|
|
||||||
summary := fiber.Map{
|
summary := fiber.Map{
|
||||||
@ -318,7 +317,7 @@ func RegisterUIRoutes(app *fiber.App,
|
|||||||
return c.Render("views/chat", summary)
|
return c.Render("views/chat", summary)
|
||||||
})
|
})
|
||||||
|
|
||||||
app.Get("/talk/", auth, func(c *fiber.Ctx) error {
|
app.Get("/talk/", func(c *fiber.Ctx) error {
|
||||||
backendConfigs, _ := services.ListModels(cl, ml, "", true)
|
backendConfigs, _ := services.ListModels(cl, ml, "", true)
|
||||||
|
|
||||||
if len(backendConfigs) == 0 {
|
if len(backendConfigs) == 0 {
|
||||||
@ -338,7 +337,7 @@ func RegisterUIRoutes(app *fiber.App,
|
|||||||
return c.Render("views/talk", summary)
|
return c.Render("views/talk", summary)
|
||||||
})
|
})
|
||||||
|
|
||||||
app.Get("/chat/", auth, func(c *fiber.Ctx) error {
|
app.Get("/chat/", func(c *fiber.Ctx) error {
|
||||||
|
|
||||||
backendConfigs, _ := services.ListModels(cl, ml, "", true)
|
backendConfigs, _ := services.ListModels(cl, ml, "", true)
|
||||||
|
|
||||||
@ -359,7 +358,7 @@ func RegisterUIRoutes(app *fiber.App,
|
|||||||
return c.Render("views/chat", summary)
|
return c.Render("views/chat", summary)
|
||||||
})
|
})
|
||||||
|
|
||||||
app.Get("/text2image/:model", auth, func(c *fiber.Ctx) error {
|
app.Get("/text2image/:model", func(c *fiber.Ctx) error {
|
||||||
backendConfigs := cl.GetAllBackendConfigs()
|
backendConfigs := cl.GetAllBackendConfigs()
|
||||||
|
|
||||||
summary := fiber.Map{
|
summary := fiber.Map{
|
||||||
@ -374,7 +373,7 @@ func RegisterUIRoutes(app *fiber.App,
|
|||||||
return c.Render("views/text2image", summary)
|
return c.Render("views/text2image", summary)
|
||||||
})
|
})
|
||||||
|
|
||||||
app.Get("/text2image/", auth, func(c *fiber.Ctx) error {
|
app.Get("/text2image/", func(c *fiber.Ctx) error {
|
||||||
|
|
||||||
backendConfigs := cl.GetAllBackendConfigs()
|
backendConfigs := cl.GetAllBackendConfigs()
|
||||||
|
|
||||||
@ -395,7 +394,7 @@ func RegisterUIRoutes(app *fiber.App,
|
|||||||
return c.Render("views/text2image", summary)
|
return c.Render("views/text2image", summary)
|
||||||
})
|
})
|
||||||
|
|
||||||
app.Get("/tts/:model", auth, func(c *fiber.Ctx) error {
|
app.Get("/tts/:model", func(c *fiber.Ctx) error {
|
||||||
backendConfigs := cl.GetAllBackendConfigs()
|
backendConfigs := cl.GetAllBackendConfigs()
|
||||||
|
|
||||||
summary := fiber.Map{
|
summary := fiber.Map{
|
||||||
@ -410,7 +409,7 @@ func RegisterUIRoutes(app *fiber.App,
|
|||||||
return c.Render("views/tts", summary)
|
return c.Render("views/tts", summary)
|
||||||
})
|
})
|
||||||
|
|
||||||
app.Get("/tts/", auth, func(c *fiber.Ctx) error {
|
app.Get("/tts/", func(c *fiber.Ctx) error {
|
||||||
|
|
||||||
backendConfigs := cl.GetAllBackendConfigs()
|
backendConfigs := cl.GetAllBackendConfigs()
|
||||||
|
|
||||||
|
@ -2,6 +2,7 @@ package schema
|
|||||||
|
|
||||||
import (
|
import (
|
||||||
"github.com/mudler/LocalAI/core/p2p"
|
"github.com/mudler/LocalAI/core/p2p"
|
||||||
|
"github.com/mudler/LocalAI/pkg/model"
|
||||||
gopsutil "github.com/shirou/gopsutil/v3/process"
|
gopsutil "github.com/shirou/gopsutil/v3/process"
|
||||||
)
|
)
|
||||||
|
|
||||||
@ -73,4 +74,5 @@ type P2PNodesResponse struct {
|
|||||||
|
|
||||||
type SystemInformationResponse struct {
|
type SystemInformationResponse struct {
|
||||||
Backends []string `json:"backends"`
|
Backends []string `json:"backends"`
|
||||||
|
Models []model.Model `json:"loaded_models"`
|
||||||
}
|
}
|
||||||
|
@ -58,6 +58,8 @@ type Content struct {
|
|||||||
Type string `json:"type" yaml:"type"`
|
Type string `json:"type" yaml:"type"`
|
||||||
Text string `json:"text" yaml:"text"`
|
Text string `json:"text" yaml:"text"`
|
||||||
ImageURL ContentURL `json:"image_url" yaml:"image_url"`
|
ImageURL ContentURL `json:"image_url" yaml:"image_url"`
|
||||||
|
AudioURL ContentURL `json:"audio_url" yaml:"audio_url"`
|
||||||
|
VideoURL ContentURL `json:"video_url" yaml:"video_url"`
|
||||||
}
|
}
|
||||||
|
|
||||||
type ContentURL struct {
|
type ContentURL struct {
|
||||||
@ -76,6 +78,8 @@ type Message struct {
|
|||||||
|
|
||||||
StringContent string `json:"string_content,omitempty" yaml:"string_content,omitempty"`
|
StringContent string `json:"string_content,omitempty" yaml:"string_content,omitempty"`
|
||||||
StringImages []string `json:"string_images,omitempty" yaml:"string_images,omitempty"`
|
StringImages []string `json:"string_images,omitempty" yaml:"string_images,omitempty"`
|
||||||
|
StringVideos []string `json:"string_videos,omitempty" yaml:"string_videos,omitempty"`
|
||||||
|
StringAudios []string `json:"string_audios,omitempty" yaml:"string_audios,omitempty"`
|
||||||
|
|
||||||
// A result of a function call
|
// A result of a function call
|
||||||
FunctionCall interface{} `json:"function_call,omitempty" yaml:"function_call,omitempty"`
|
FunctionCall interface{} `json:"function_call,omitempty" yaml:"function_call,omitempty"`
|
||||||
|
@ -5,6 +5,7 @@ import (
|
|||||||
"os"
|
"os"
|
||||||
|
|
||||||
"github.com/mudler/LocalAI/core"
|
"github.com/mudler/LocalAI/core"
|
||||||
|
"github.com/mudler/LocalAI/core/backend"
|
||||||
"github.com/mudler/LocalAI/core/config"
|
"github.com/mudler/LocalAI/core/config"
|
||||||
"github.com/mudler/LocalAI/core/services"
|
"github.com/mudler/LocalAI/core/services"
|
||||||
"github.com/mudler/LocalAI/internal"
|
"github.com/mudler/LocalAI/internal"
|
||||||
@ -144,6 +145,42 @@ func Startup(opts ...config.AppOption) (*config.BackendConfigLoader, *model.Mode
|
|||||||
}()
|
}()
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if options.LoadToMemory != nil {
|
||||||
|
for _, m := range options.LoadToMemory {
|
||||||
|
cfg, err := cl.LoadBackendConfigFileByName(m, options.ModelPath,
|
||||||
|
config.LoadOptionDebug(options.Debug),
|
||||||
|
config.LoadOptionThreads(options.Threads),
|
||||||
|
config.LoadOptionContextSize(options.ContextSize),
|
||||||
|
config.LoadOptionF16(options.F16),
|
||||||
|
config.ModelPath(options.ModelPath),
|
||||||
|
)
|
||||||
|
if err != nil {
|
||||||
|
return nil, nil, nil, err
|
||||||
|
}
|
||||||
|
|
||||||
|
log.Debug().Msgf("Auto loading model %s into memory from file: %s", m, cfg.Model)
|
||||||
|
|
||||||
|
grpcOpts := backend.GRPCModelOpts(*cfg)
|
||||||
|
o := []model.Option{
|
||||||
|
model.WithModel(cfg.Model),
|
||||||
|
model.WithAssetDir(options.AssetsDestination),
|
||||||
|
model.WithThreads(uint32(options.Threads)),
|
||||||
|
model.WithLoadGRPCLoadModelOpts(grpcOpts),
|
||||||
|
}
|
||||||
|
|
||||||
|
var backendErr error
|
||||||
|
if cfg.Backend != "" {
|
||||||
|
o = append(o, model.WithBackendString(cfg.Backend))
|
||||||
|
_, backendErr = ml.BackendLoader(o...)
|
||||||
|
} else {
|
||||||
|
_, backendErr = ml.GreedyLoader(o...)
|
||||||
|
}
|
||||||
|
if backendErr != nil {
|
||||||
|
return nil, nil, nil, err
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
// Watch the configuration directory
|
// Watch the configuration directory
|
||||||
startWatcher(options)
|
startWatcher(options)
|
||||||
|
|
||||||
|
@ -154,7 +154,7 @@ Images are available with and without python dependencies. Note that images with
|
|||||||
|
|
||||||
Images with `core` in the tag are smaller and do not contain any python dependencies.
|
Images with `core` in the tag are smaller and do not contain any python dependencies.
|
||||||
|
|
||||||
{{< tabs tabTotal="6" >}}
|
{{< tabs tabTotal="7" >}}
|
||||||
{{% tab tabName="Vanilla / CPU Images" %}}
|
{{% tab tabName="Vanilla / CPU Images" %}}
|
||||||
|
|
||||||
| Description | Quay | Docker Hub |
|
| Description | Quay | Docker Hub |
|
||||||
@ -227,6 +227,15 @@ Images with `core` in the tag are smaller and do not contain any python dependen
|
|||||||
|
|
||||||
{{% /tab %}}
|
{{% /tab %}}
|
||||||
|
|
||||||
|
|
||||||
|
{{% tab tabName="Vulkan Images" %}}
|
||||||
|
| Description | Quay | Docker Hub |
|
||||||
|
| --- | --- |-------------------------------------------------------------|
|
||||||
|
| Latest images from the branch (development) | `quay.io/go-skynet/local-ai: master-vulkan-ffmpeg-core ` | `localai/localai: master-vulkan-ffmpeg-core ` |
|
||||||
|
| Latest tag | `quay.io/go-skynet/local-ai: latest-vulkan-ffmpeg-core ` | `localai/localai: latest-vulkan-ffmpeg-core` |
|
||||||
|
| Versioned image including FFMpeg, no python | `quay.io/go-skynet/local-ai:{{< version >}}-vulkan-fmpeg-core` | `localai/localai:{{< version >}}-vulkan-fmpeg-core` |
|
||||||
|
{{% /tab %}}
|
||||||
|
|
||||||
{{< /tabs >}}
|
{{< /tabs >}}
|
||||||
|
|
||||||
## See Also
|
## See Also
|
||||||
|
@ -1,3 +1,3 @@
|
|||||||
{
|
{
|
||||||
"version": "v2.20.1"
|
"version": "v2.21.1"
|
||||||
}
|
}
|
||||||
|
2
docs/themes/hugo-theme-relearn
vendored
2
docs/themes/hugo-theme-relearn
vendored
@ -1 +1 @@
|
|||||||
Subproject commit f696f60f4e44e18a34512b895a7b65a72c801bd8
|
Subproject commit d5a0ee04ad986394d6d2f1e1a57f2334d24bf317
|
@ -39,7 +39,7 @@ func init() {
|
|||||||
func GetRemoteLibraryShorteners(url string, basePath string) (map[string]string, error) {
|
func GetRemoteLibraryShorteners(url string, basePath string) (map[string]string, error) {
|
||||||
remoteLibrary := map[string]string{}
|
remoteLibrary := map[string]string{}
|
||||||
uri := downloader.URI(url)
|
uri := downloader.URI(url)
|
||||||
err := uri.DownloadAndUnmarshal(basePath, func(_ string, i []byte) error {
|
err := uri.DownloadWithCallback(basePath, func(_ string, i []byte) error {
|
||||||
return yaml.Unmarshal(i, &remoteLibrary)
|
return yaml.Unmarshal(i, &remoteLibrary)
|
||||||
})
|
})
|
||||||
if err != nil {
|
if err != nil {
|
||||||
|
@ -1,6 +1,6 @@
|
|||||||
llama_index==0.11.7
|
llama_index==0.11.14
|
||||||
requests==2.32.3
|
requests==2.32.3
|
||||||
weaviate_client==4.6.7
|
weaviate_client==4.8.1
|
||||||
transformers
|
transformers
|
||||||
torch
|
torch
|
||||||
chainlit
|
chainlit
|
||||||
|
@ -1,2 +1,2 @@
|
|||||||
langchain==0.2.16
|
langchain==0.3.1
|
||||||
openai==1.44.0
|
openai==1.50.2
|
||||||
|
@ -1,4 +1,4 @@
|
|||||||
langchain==0.2.16
|
langchain==0.3.1
|
||||||
openai==1.44.1
|
openai==1.50.2
|
||||||
chromadb==0.5.5
|
chromadb==0.5.11
|
||||||
llama-index==0.11.7
|
llama-index==0.11.14
|
@ -1,4 +1,4 @@
|
|||||||
aiohttp==3.10.3
|
aiohttp==3.10.8
|
||||||
aiosignal==1.3.1
|
aiosignal==1.3.1
|
||||||
async-timeout==4.0.3
|
async-timeout==4.0.3
|
||||||
attrs==24.2.0
|
attrs==24.2.0
|
||||||
@ -8,26 +8,26 @@ colorama==0.4.6
|
|||||||
dataclasses-json==0.6.7
|
dataclasses-json==0.6.7
|
||||||
debugpy==1.8.2
|
debugpy==1.8.2
|
||||||
frozenlist==1.4.1
|
frozenlist==1.4.1
|
||||||
greenlet==3.0.3
|
greenlet==3.1.1
|
||||||
idna==3.8
|
idna==3.10
|
||||||
langchain==0.2.16
|
langchain==0.3.1
|
||||||
langchain-community==0.2.16
|
langchain-community==0.3.1
|
||||||
marshmallow==3.22.0
|
marshmallow==3.22.0
|
||||||
marshmallow-enum==1.5.1
|
marshmallow-enum==1.5.1
|
||||||
multidict==6.0.5
|
multidict==6.0.5
|
||||||
mypy-extensions==1.0.0
|
mypy-extensions==1.0.0
|
||||||
numexpr==2.10.1
|
numexpr==2.10.1
|
||||||
numpy==2.1.1
|
numpy==2.1.1
|
||||||
openai==1.44.0
|
openai==1.45.1
|
||||||
openapi-schema-pydantic==1.2.4
|
openapi-schema-pydantic==1.2.4
|
||||||
packaging>=23.2
|
packaging>=23.2
|
||||||
pydantic==2.8.2
|
pydantic==2.9.2
|
||||||
PyYAML==6.0.2
|
PyYAML==6.0.2
|
||||||
requests==2.32.3
|
requests==2.32.3
|
||||||
SQLAlchemy==2.0.32
|
SQLAlchemy==2.0.35
|
||||||
tenacity==8.5.0
|
tenacity==8.5.0
|
||||||
tqdm==4.66.5
|
tqdm==4.66.5
|
||||||
typing-inspect==0.9.0
|
typing-inspect==0.9.0
|
||||||
typing_extensions==4.12.2
|
typing_extensions==4.12.2
|
||||||
urllib3==2.2.2
|
urllib3==2.2.3
|
||||||
yarl==1.11.0
|
yarl==1.13.1
|
||||||
|
@ -1,6 +1,302 @@
|
|||||||
---
|
---
|
||||||
## SmolLM
|
## llama3.2
|
||||||
|
- &llama32
|
||||||
|
url: "github:mudler/LocalAI/gallery/llama3.1-instruct.yaml@master"
|
||||||
|
icon: https://cdn-uploads.huggingface.co/production/uploads/642cc1c253e76b4c2286c58e/aJJxKus1wP5N-euvHEUq7.png
|
||||||
|
license: llama3.2
|
||||||
|
description: |
|
||||||
|
The Meta Llama 3.2 collection of multilingual large language models (LLMs) is a collection of pretrained and instruction-tuned generative models in 1B and 3B sizes (text in/text out). The Llama 3.2 instruction-tuned text only models are optimized for multilingual dialogue use cases, including agentic retrieval and summarization tasks. They outperform many of the available open source and closed chat models on common industry benchmarks.
|
||||||
|
|
||||||
|
Model Developer: Meta
|
||||||
|
|
||||||
|
Model Architecture: Llama 3.2 is an auto-regressive language model that uses an optimized transformer architecture. The tuned versions use supervised fine-tuning (SFT) and reinforcement learning with human feedback (RLHF) to align with human preferences for helpfulness and safety.
|
||||||
|
tags:
|
||||||
|
- llm
|
||||||
|
- gguf
|
||||||
|
- gpu
|
||||||
|
- cpu
|
||||||
|
- llama3.2
|
||||||
|
name: "llama-3.2-1b-instruct:q4_k_m"
|
||||||
|
urls:
|
||||||
|
- https://huggingface.co/hugging-quants/Llama-3.2-1B-Instruct-Q4_K_M-GGUF
|
||||||
|
overrides:
|
||||||
|
parameters:
|
||||||
|
model: llama-3.2-1b-instruct-q4_k_m.gguf
|
||||||
|
files:
|
||||||
|
- filename: llama-3.2-1b-instruct-q4_k_m.gguf
|
||||||
|
sha256: 1d0e9419ec4e12aef73ccf4ffd122703e94c48344a96bc7c5f0f2772c2152ce3
|
||||||
|
uri: huggingface://hugging-quants/Llama-3.2-1B-Instruct-Q4_K_M-GGUF/llama-3.2-1b-instruct-q4_k_m.gguf
|
||||||
|
- !!merge <<: *llama32
|
||||||
|
name: "llama-3.2-3b-instruct:q4_k_m"
|
||||||
|
urls:
|
||||||
|
- https://huggingface.co/hugging-quants/Llama-3.2-3B-Instruct-Q4_K_M-GGUF
|
||||||
|
overrides:
|
||||||
|
parameters:
|
||||||
|
model: llama-3.2-3b-instruct-q4_k_m.gguf
|
||||||
|
files:
|
||||||
|
- filename: llama-3.2-3b-instruct-q4_k_m.gguf
|
||||||
|
sha256: c55a83bfb6396799337853ca69918a0b9bbb2917621078c34570bc17d20fd7a1
|
||||||
|
uri: huggingface://hugging-quants/Llama-3.2-3B-Instruct-Q4_K_M-GGUF/llama-3.2-3b-instruct-q4_k_m.gguf
|
||||||
|
- !!merge <<: *llama32
|
||||||
|
name: "llama-3.2-3b-instruct:q8_0"
|
||||||
|
urls:
|
||||||
|
- https://huggingface.co/hugging-quants/Llama-3.2-3B-Instruct-Q8_0-GGUF
|
||||||
|
overrides:
|
||||||
|
parameters:
|
||||||
|
model: llama-3.2-3b-instruct-q8_0.gguf
|
||||||
|
files:
|
||||||
|
- filename: llama-3.2-3b-instruct-q8_0.gguf
|
||||||
|
sha256: 51725f77f997a5080c3d8dd66e073da22ddf48ab5264f21f05ded9b202c3680e
|
||||||
|
uri: huggingface://hugging-quants/Llama-3.2-3B-Instruct-Q8_0-GGUF/llama-3.2-3b-instruct-q8_0.gguf
|
||||||
|
- !!merge <<: *llama32
|
||||||
|
name: "llama-3.2-1b-instruct:q8_0"
|
||||||
|
urls:
|
||||||
|
- https://huggingface.co/hugging-quants/Llama-3.2-1B-Instruct-Q8_0-GGUF
|
||||||
|
overrides:
|
||||||
|
parameters:
|
||||||
|
model: llama-3.2-1b-instruct-q8_0.gguf
|
||||||
|
files:
|
||||||
|
- filename: llama-3.2-1b-instruct-q8_0.gguf
|
||||||
|
sha256: ba345c83bf5cc679c653b853c46517eea5a34f03ed2205449db77184d9ae62a9
|
||||||
|
uri: huggingface://hugging-quants/Llama-3.2-1B-Instruct-Q8_0-GGUF/llama-3.2-1b-instruct-q8_0.gguf
|
||||||
|
- &qwen25
|
||||||
|
## Qwen2.5
|
||||||
|
name: "qwen2.5-14b-instruct"
|
||||||
|
url: "github:mudler/LocalAI/gallery/chatml.yaml@master"
|
||||||
|
license: apache-2.0
|
||||||
|
description: |
|
||||||
|
Qwen2.5 is the latest series of Qwen large language models. For Qwen2.5, we release a number of base language models and instruction-tuned language models ranging from 0.5 to 72 billion parameters.
|
||||||
|
tags:
|
||||||
|
- llm
|
||||||
|
- gguf
|
||||||
|
- gpu
|
||||||
|
- qwen
|
||||||
|
- qwen2.5
|
||||||
|
- cpu
|
||||||
|
urls:
|
||||||
|
- https://huggingface.co/bartowski/Qwen2.5-14B-Instruct-GGUF
|
||||||
|
- https://huggingface.co/Qwen/Qwen2.5-7B-Instruct
|
||||||
|
overrides:
|
||||||
|
parameters:
|
||||||
|
model: Qwen2.5-14B-Instruct-Q4_K_M.gguf
|
||||||
|
files:
|
||||||
|
- filename: Qwen2.5-14B-Instruct-Q4_K_M.gguf
|
||||||
|
sha256: e47ad95dad6ff848b431053b375adb5d39321290ea2c638682577dafca87c008
|
||||||
|
uri: huggingface://bartowski/Qwen2.5-14B-Instruct-GGUF/Qwen2.5-14B-Instruct-Q4_K_M.gguf
|
||||||
|
- !!merge <<: *qwen25
|
||||||
|
name: "qwen2.5-math-7b-instruct"
|
||||||
|
urls:
|
||||||
|
- https://huggingface.co/bartowski/Qwen2.5-Math-7B-Instruct-GGUF
|
||||||
|
- https://huggingface.co/Qwen/Qwen2.5-Math-7B-Instruct
|
||||||
|
description: |
|
||||||
|
In August 2024, we released the first series of mathematical LLMs - Qwen2-Math - of our Qwen family. A month later, we have upgraded it and open-sourced Qwen2.5-Math series, including base models Qwen2.5-Math-1.5B/7B/72B, instruction-tuned models Qwen2.5-Math-1.5B/7B/72B-Instruct, and mathematical reward model Qwen2.5-Math-RM-72B.
|
||||||
|
|
||||||
|
Unlike Qwen2-Math series which only supports using Chain-of-Thught (CoT) to solve English math problems, Qwen2.5-Math series is expanded to support using both CoT and Tool-integrated Reasoning (TIR) to solve math problems in both Chinese and English. The Qwen2.5-Math series models have achieved significant performance improvements compared to the Qwen2-Math series models on the Chinese and English mathematics benchmarks with CoT.
|
||||||
|
|
||||||
|
The base models of Qwen2-Math are initialized with Qwen2-1.5B/7B/72B, and then pretrained on a meticulously designed Mathematics-specific Corpus. This corpus contains large-scale high-quality mathematical web texts, books, codes, exam questions, and mathematical pre-training data synthesized by Qwen2.
|
||||||
|
overrides:
|
||||||
|
parameters:
|
||||||
|
model: Qwen2.5-Math-7B-Instruct-Q4_K_M.gguf
|
||||||
|
files:
|
||||||
|
- filename: Qwen2.5-Math-7B-Instruct-Q4_K_M.gguf
|
||||||
|
sha256: 7e03cee8c65b9ebf9ca14ddb010aca27b6b18e6c70f2779e94e7451d9529c091
|
||||||
|
uri: huggingface://bartowski/Qwen2.5-Math-7B-Instruct-GGUF/Qwen2.5-Math-7B-Instruct-Q4_K_M.gguf
|
||||||
|
- !!merge <<: *qwen25
|
||||||
|
name: "qwen2.5-14b_uncencored"
|
||||||
|
icon: https://huggingface.co/SicariusSicariiStuff/Phi-3.5-mini-instruct_Uncensored/resolve/main/Misc/Uncensored.png
|
||||||
|
urls:
|
||||||
|
- https://huggingface.co/SicariusSicariiStuff/Qwen2.5-14B_Uncencored
|
||||||
|
- https://huggingface.co/bartowski/Qwen2.5-14B_Uncencored-GGUF
|
||||||
|
description: |
|
||||||
|
Qwen2.5 is the latest series of Qwen large language models. For Qwen2.5, we release a number of base language models and instruction-tuned language models ranging from 0.5 to 72 billion parameters.
|
||||||
|
|
||||||
|
Uncensored qwen2.5
|
||||||
|
tags:
|
||||||
|
- llm
|
||||||
|
- gguf
|
||||||
|
- gpu
|
||||||
|
- qwen
|
||||||
|
- qwen2.5
|
||||||
|
- cpu
|
||||||
|
- uncensored
|
||||||
|
overrides:
|
||||||
|
parameters:
|
||||||
|
model: Qwen2.5-14B_Uncencored-Q4_K_M.gguf
|
||||||
|
files:
|
||||||
|
- filename: Qwen2.5-14B_Uncencored-Q4_K_M.gguf
|
||||||
|
sha256: 066b9341b67e0fd0956de3576a3b7988574a5b9a0028aef2b9c8edeadd6dbbd1
|
||||||
|
uri: huggingface://bartowski/Qwen2.5-14B_Uncencored-GGUF/Qwen2.5-14B_Uncencored-Q4_K_M.gguf
|
||||||
|
- !!merge <<: *qwen25
|
||||||
|
name: "qwen2.5-coder-7b-instruct"
|
||||||
|
urls:
|
||||||
|
- https://huggingface.co/Qwen/Qwen2.5-Coder-7B-Instruct
|
||||||
|
- https://huggingface.co/bartowski/Qwen2.5-Coder-7B-Instruct-GGUF
|
||||||
|
description: |
|
||||||
|
Qwen2.5-Coder is the latest series of Code-Specific Qwen large language models (formerly known as CodeQwen). For Qwen2.5-Coder, we release three base language models and instruction-tuned language models, 1.5, 7 and 32 (coming soon) billion parameters. Qwen2.5-Coder brings the following improvements upon CodeQwen1.5:
|
||||||
|
|
||||||
|
Significantly improvements in code generation, code reasoning and code fixing. Base on the strong Qwen2.5, we scale up the training tokens into 5.5 trillion including source code, text-code grounding, Synthetic data, etc.
|
||||||
|
A more comprehensive foundation for real-world applications such as Code Agents. Not only enhancing coding capabilities but also maintaining its strengths in mathematics and general competencies.
|
||||||
|
Long-context Support up to 128K tokens.
|
||||||
|
overrides:
|
||||||
|
parameters:
|
||||||
|
model: Qwen2.5-Coder-7B-Instruct-Q4_K_M.gguf
|
||||||
|
files:
|
||||||
|
- filename: Qwen2.5-Coder-7B-Instruct-Q4_K_M.gguf
|
||||||
|
sha256: 1664fccab734674a50763490a8c6931b70e3f2f8ec10031b54806d30e5f956b6
|
||||||
|
uri: huggingface://bartowski/Qwen2.5-Coder-7B-Instruct-GGUF/Qwen2.5-Coder-7B-Instruct-Q4_K_M.gguf
|
||||||
|
- !!merge <<: *qwen25
|
||||||
|
name: "qwen2.5-math-72b-instruct"
|
||||||
|
icon: http://qianwen-res.oss-accelerate-overseas.aliyuncs.com/Qwen2.5/qwen2.5-math-pipeline.jpeg
|
||||||
|
urls:
|
||||||
|
- https://huggingface.co/Qwen/Qwen2.5-Math-72B-Instruct
|
||||||
|
- https://huggingface.co/bartowski/Qwen2.5-Math-72B-Instruct-GGUF
|
||||||
|
description: |
|
||||||
|
In August 2024, we released the first series of mathematical LLMs - Qwen2-Math - of our Qwen family. A month later, we have upgraded it and open-sourced Qwen2.5-Math series, including base models Qwen2.5-Math-1.5B/7B/72B, instruction-tuned models Qwen2.5-Math-1.5B/7B/72B-Instruct, and mathematical reward model Qwen2.5-Math-RM-72B.
|
||||||
|
|
||||||
|
Unlike Qwen2-Math series which only supports using Chain-of-Thught (CoT) to solve English math problems, Qwen2.5-Math series is expanded to support using both CoT and Tool-integrated Reasoning (TIR) to solve math problems in both Chinese and English. The Qwen2.5-Math series models have achieved significant performance improvements compared to the Qwen2-Math series models on the Chinese and English mathematics benchmarks with CoT
|
||||||
|
overrides:
|
||||||
|
parameters:
|
||||||
|
model: Qwen2.5-Math-72B-Instruct-Q4_K_M.gguf
|
||||||
|
files:
|
||||||
|
- filename: Qwen2.5-Math-72B-Instruct-Q4_K_M.gguf
|
||||||
|
sha256: 5dee8a6e21d555577712b4f65565a3c3737a0d5d92f5a82970728c6d8e237f17
|
||||||
|
uri: huggingface://bartowski/Qwen2.5-Math-72B-Instruct-GGUF/Qwen2.5-Math-72B-Instruct-Q4_K_M.gguf
|
||||||
|
- !!merge <<: *qwen25
|
||||||
|
name: "qwen2.5-0.5b-instruct"
|
||||||
|
urls:
|
||||||
|
- https://huggingface.co/Qwen/Qwen2.5-0.5B-Instruct
|
||||||
|
- https://huggingface.co/bartowski/Qwen2.5-0.5B-Instruct-GGUF
|
||||||
|
overrides:
|
||||||
|
parameters:
|
||||||
|
model: Qwen2.5-0.5B-Instruct-Q4_K_M.gguf
|
||||||
|
files:
|
||||||
|
- filename: Qwen2.5-0.5B-Instruct-Q4_K_M.gguf
|
||||||
|
sha256: 6eb923e7d26e9cea28811e1a8e852009b21242fb157b26149d3b188f3a8c8653
|
||||||
|
uri: huggingface://bartowski/Qwen2.5-0.5B-Instruct-GGUF/Qwen2.5-0.5B-Instruct-Q4_K_M.gguf
|
||||||
|
- !!merge <<: *qwen25
|
||||||
|
name: "qwen2.5-1.5b-instruct"
|
||||||
|
urls:
|
||||||
|
- https://huggingface.co/Qwen/Qwen2.5-1.5B-Instruct
|
||||||
|
- https://huggingface.co/bartowski/Qwen2.5-1.5B-Instruct-GGUF
|
||||||
|
overrides:
|
||||||
|
parameters:
|
||||||
|
model: Qwen2.5-1.5B-Instruct-Q4_K_M.gguf
|
||||||
|
files:
|
||||||
|
- filename: Qwen2.5-1.5B-Instruct-Q4_K_M.gguf
|
||||||
|
sha256: 1adf0b11065d8ad2e8123ea110d1ec956dab4ab038eab665614adba04b6c3370
|
||||||
|
uri: huggingface://bartowski/Qwen2.5-1.5B-Instruct-GGUF/Qwen2.5-1.5B-Instruct-Q4_K_M.gguf
|
||||||
|
- !!merge <<: *qwen25
|
||||||
|
name: "qwen2.5-32b"
|
||||||
|
urls:
|
||||||
|
- https://huggingface.co/Qwen/Qwen2.5-32B
|
||||||
|
- https://huggingface.co/mradermacher/Qwen2.5-32B-GGUF
|
||||||
|
overrides:
|
||||||
|
parameters:
|
||||||
|
model: Qwen2.5-32B.Q4_K_M.gguf
|
||||||
|
files:
|
||||||
|
- filename: Qwen2.5-32B.Q4_K_M.gguf
|
||||||
|
uri: huggingface://mradermacher/Qwen2.5-32B-GGUF/Qwen2.5-32B.Q4_K_M.gguf
|
||||||
|
sha256: fa42a4067e3630929202b6bb1ef5cebc43c1898494aedfd567b7d53c7a9d84a6
|
||||||
|
- !!merge <<: *qwen25
|
||||||
|
name: "qwen2.5-32b-instruct"
|
||||||
|
urls:
|
||||||
|
- https://huggingface.co/Qwen/Qwen2.5-32B-Instruct
|
||||||
|
- https://huggingface.co/bartowski/Qwen2.5-32B-Instruct-GGUF
|
||||||
|
overrides:
|
||||||
|
parameters:
|
||||||
|
model: Qwen2.5-32B-Instruct-Q4_K_M.gguf
|
||||||
|
files:
|
||||||
|
- filename: Qwen2.5-32B-Instruct-Q4_K_M.gguf
|
||||||
|
sha256: 2e5f6daea180dbc59f65a40641e94d3973b5dbaa32b3c0acf54647fa874e519e
|
||||||
|
uri: huggingface://bartowski/Qwen2.5-32B-Instruct-GGUF/Qwen2.5-32B-Instruct-Q4_K_M.gguf
|
||||||
|
- !!merge <<: *qwen25
|
||||||
|
name: "qwen2.5-72b-instruct"
|
||||||
|
urls:
|
||||||
|
- https://huggingface.co/Qwen/Qwen2.5-72B-Instruct
|
||||||
|
- https://huggingface.co/bartowski/Qwen2.5-72B-Instruct-GGUF
|
||||||
|
overrides:
|
||||||
|
parameters:
|
||||||
|
model: Qwen2.5-72B-Instruct-Q4_K_M.gguf
|
||||||
|
files:
|
||||||
|
- filename: Qwen2.5-72B-Instruct-Q4_K_M.gguf
|
||||||
|
sha256: e4c8fad16946be8cf0bbf67eb8f4e18fc7415a5a6d2854b4cda453edb4082545
|
||||||
|
uri: huggingface://bartowski/Qwen2.5-72B-Instruct-GGUF/Qwen2.5-72B-Instruct-Q4_K_M.gguf
|
||||||
|
- !!merge <<: *qwen25
|
||||||
|
name: "bigqwen2.5-52b-instruct"
|
||||||
|
icon: https://cdn-uploads.huggingface.co/production/uploads/61b8e2ba285851687028d395/98GiKtmH1AtHHbIbOUH4Y.jpeg
|
||||||
|
urls:
|
||||||
|
- https://huggingface.co/mlabonne/BigQwen2.5-52B-Instruct
|
||||||
|
- https://huggingface.co/bartowski/BigQwen2.5-52B-Instruct-GGUF
|
||||||
|
description: |
|
||||||
|
BigQwen2.5-52B-Instruct is a Qwen/Qwen2-32B-Instruct self-merge made with MergeKit.
|
||||||
|
It applies the mlabonne/Meta-Llama-3-120B-Instruct recipe.
|
||||||
|
overrides:
|
||||||
|
parameters:
|
||||||
|
model: BigQwen2.5-52B-Instruct-Q4_K_M.gguf
|
||||||
|
files:
|
||||||
|
- filename: BigQwen2.5-52B-Instruct-Q4_K_M.gguf
|
||||||
|
sha256: 9c939f08e366b51b07096eb2ecb5cc2a82894ac7baf639e446237ad39889c896
|
||||||
|
uri: huggingface://bartowski/BigQwen2.5-52B-Instruct-GGUF/BigQwen2.5-52B-Instruct-Q4_K_M.gguf
|
||||||
|
- !!merge <<: *qwen25
|
||||||
|
name: "replete-llm-v2.5-qwen-14b"
|
||||||
|
icon: https://cdn-uploads.huggingface.co/production/uploads/642cc1c253e76b4c2286c58e/ihnWXDEgV-ZKN_B036U1J.png
|
||||||
|
urls:
|
||||||
|
- https://huggingface.co/Replete-AI/Replete-LLM-V2.5-Qwen-14b
|
||||||
|
- https://huggingface.co/bartowski/Replete-LLM-V2.5-Qwen-14b-GGUF
|
||||||
|
description: |
|
||||||
|
Replete-LLM-V2.5-Qwen-14b is a continues finetuned version of Qwen2.5-14B. I noticed recently that the Qwen team did not learn from my methods of continuous finetuning, the great benefits, and no downsides of it. So I took it upon myself to merge the instruct model with the base model myself using the Ties merge method
|
||||||
|
|
||||||
|
This version of the model shows higher performance than the original instruct and base models.
|
||||||
|
overrides:
|
||||||
|
parameters:
|
||||||
|
model: Replete-LLM-V2.5-Qwen-14b-Q4_K_M.gguf
|
||||||
|
files:
|
||||||
|
- filename: Replete-LLM-V2.5-Qwen-14b-Q4_K_M.gguf
|
||||||
|
sha256: 17d0792ff5e3062aecb965629f66e679ceb407e4542e8045993dcfe9e7e14d9d
|
||||||
|
uri: huggingface://bartowski/Replete-LLM-V2.5-Qwen-14b-GGUF/Replete-LLM-V2.5-Qwen-14b-Q4_K_M.gguf
|
||||||
|
- !!merge <<: *qwen25
|
||||||
|
name: "replete-llm-v2.5-qwen-7b"
|
||||||
|
icon: https://cdn-uploads.huggingface.co/production/uploads/642cc1c253e76b4c2286c58e/ihnWXDEgV-ZKN_B036U1J.png
|
||||||
|
urls:
|
||||||
|
- https://huggingface.co/Replete-AI/Replete-LLM-V2.5-Qwen-7b
|
||||||
|
- https://huggingface.co/bartowski/Replete-LLM-V2.5-Qwen-7b-GGUF
|
||||||
|
description: |
|
||||||
|
Replete-LLM-V2.5-Qwen-7b is a continues finetuned version of Qwen2.5-14B. I noticed recently that the Qwen team did not learn from my methods of continuous finetuning, the great benefits, and no downsides of it. So I took it upon myself to merge the instruct model with the base model myself using the Ties merge method
|
||||||
|
|
||||||
|
This version of the model shows higher performance than the original instruct and base models.
|
||||||
|
overrides:
|
||||||
|
parameters:
|
||||||
|
model: Replete-LLM-V2.5-Qwen-7b-Q4_K_M.gguf
|
||||||
|
files:
|
||||||
|
- filename: Replete-LLM-V2.5-Qwen-7b-Q4_K_M.gguf
|
||||||
|
sha256: 054d54972259c0398b4e0af3f408f608e1166837b1d7535d08fc440d1daf8639
|
||||||
|
uri: huggingface://bartowski/Replete-LLM-V2.5-Qwen-7b-GGUF/Replete-LLM-V2.5-Qwen-7b-Q4_K_M.gguf
|
||||||
|
- !!merge <<: *qwen25
|
||||||
|
name: "calme-2.2-qwen2.5-72b-i1"
|
||||||
|
icon: https://huggingface.co/MaziyarPanahi/calme-2.2-qwen2.5-72b/resolve/main/calme-2.webp
|
||||||
|
urls:
|
||||||
|
- https://huggingface.co/MaziyarPanahi/calme-2.2-qwen2.5-72b
|
||||||
|
- https://huggingface.co/mradermacher/calme-2.2-qwen2.5-72b-i1-GGUF
|
||||||
|
description: |
|
||||||
|
This model is a fine-tuned version of the powerful Qwen/Qwen2.5-72B-Instruct, pushing the boundaries of natural language understanding and generation even further. My goal was to create a versatile and robust model that excels across a wide range of benchmarks and real-world applications.
|
||||||
|
Use Cases
|
||||||
|
|
||||||
|
This model is suitable for a wide range of applications, including but not limited to:
|
||||||
|
|
||||||
|
Advanced question-answering systems
|
||||||
|
Intelligent chatbots and virtual assistants
|
||||||
|
Content generation and summarization
|
||||||
|
Code generation and analysis
|
||||||
|
Complex problem-solving and decision support
|
||||||
|
overrides:
|
||||||
|
parameters:
|
||||||
|
model: calme-2.2-qwen2.5-72b.i1-Q4_K_M.gguf
|
||||||
|
files:
|
||||||
|
- filename: calme-2.2-qwen2.5-72b.i1-Q4_K_M.gguf
|
||||||
|
sha256: 5fdfa599724d7c78502c477ced1d294e92781b91d3265bd0748fbf15a6fefde6
|
||||||
|
uri: huggingface://mradermacher/calme-2.2-qwen2.5-72b-i1-GGUF/calme-2.2-qwen2.5-72b.i1-Q4_K_M.gguf
|
||||||
- &smollm
|
- &smollm
|
||||||
|
## SmolLM
|
||||||
url: "github:mudler/LocalAI/gallery/chatml.yaml@master"
|
url: "github:mudler/LocalAI/gallery/chatml.yaml@master"
|
||||||
name: "smollm-1.7b-instruct"
|
name: "smollm-1.7b-instruct"
|
||||||
icon: https://huggingface.co/datasets/HuggingFaceTB/images/resolve/main/banner_smol.png
|
icon: https://huggingface.co/datasets/HuggingFaceTB/images/resolve/main/banner_smol.png
|
||||||
@ -439,6 +735,75 @@
|
|||||||
- filename: Reflection-Llama-3.1-70B-q4_k_m.gguf
|
- filename: Reflection-Llama-3.1-70B-q4_k_m.gguf
|
||||||
sha256: 16064e07037883a750cfeae9a7be41143aa857dbac81c2e93c68e2f941dee7b2
|
sha256: 16064e07037883a750cfeae9a7be41143aa857dbac81c2e93c68e2f941dee7b2
|
||||||
uri: huggingface://senseable/Reflection-Llama-3.1-70B-gguf/Reflection-Llama-3.1-70B-q4_k_m.gguf
|
uri: huggingface://senseable/Reflection-Llama-3.1-70B-gguf/Reflection-Llama-3.1-70B-q4_k_m.gguf
|
||||||
|
- !!merge <<: *llama31
|
||||||
|
name: "llama-3.1-supernova-lite-reflection-v1.0-i1"
|
||||||
|
url: "github:mudler/LocalAI/gallery/llama3.1-reflective.yaml@master"
|
||||||
|
icon: https://i.ibb.co/r072p7j/eopi-ZVu-SQ0-G-Cav78-Byq-Tg.png
|
||||||
|
urls:
|
||||||
|
- https://huggingface.co/SE6446/Llama-3.1-SuperNova-Lite-Reflection-V1.0
|
||||||
|
- https://huggingface.co/mradermacher/Llama-3.1-SuperNova-Lite-Reflection-V1.0-i1-GGUF
|
||||||
|
description: |
|
||||||
|
This model is a LoRA adaptation of arcee-ai/Llama-3.1-SuperNova-Lite on thesven/Reflective-MAGLLAMA-v0.1.1. This has been a simple experiment into reflection and the model appears to perform adequately, though I am unsure if it is a large improvement.
|
||||||
|
overrides:
|
||||||
|
parameters:
|
||||||
|
model: Llama-3.1-SuperNova-Lite-Reflection-V1.0.i1-Q4_K_M.gguf
|
||||||
|
files:
|
||||||
|
- filename: Llama-3.1-SuperNova-Lite-Reflection-V1.0.i1-Q4_K_M.gguf
|
||||||
|
sha256: 0c4531fe553d00142808e1bc7348ae92d400794c5b64d2db1a974718324dfe9a
|
||||||
|
uri: huggingface://mradermacher/Llama-3.1-SuperNova-Lite-Reflection-V1.0-i1-GGUF/Llama-3.1-SuperNova-Lite-Reflection-V1.0.i1-Q4_K_M.gguf
|
||||||
|
- !!merge <<: *llama31
|
||||||
|
name: "llama-3.1-supernova-lite"
|
||||||
|
icon: https://i.ibb.co/r072p7j/eopi-ZVu-SQ0-G-Cav78-Byq-Tg.png
|
||||||
|
urls:
|
||||||
|
- https://huggingface.co/arcee-ai/Llama-3.1-SuperNova-Lite
|
||||||
|
- https://huggingface.co/arcee-ai/Llama-3.1-SuperNova-Lite-GGUF
|
||||||
|
description: |
|
||||||
|
Llama-3.1-SuperNova-Lite is an 8B parameter model developed by Arcee.ai, based on the Llama-3.1-8B-Instruct architecture. It is a distilled version of the larger Llama-3.1-405B-Instruct model, leveraging offline logits extracted from the 405B parameter variant. This 8B variation of Llama-3.1-SuperNova maintains high performance while offering exceptional instruction-following capabilities and domain-specific adaptability.
|
||||||
|
|
||||||
|
The model was trained using a state-of-the-art distillation pipeline and an instruction dataset generated with EvolKit, ensuring accuracy and efficiency across a wide range of tasks. For more information on its training, visit blog.arcee.ai.
|
||||||
|
|
||||||
|
Llama-3.1-SuperNova-Lite excels in both benchmark performance and real-world applications, providing the power of large-scale models in a more compact, efficient form ideal for organizations seeking high performance with reduced resource requirements.
|
||||||
|
overrides:
|
||||||
|
parameters:
|
||||||
|
model: supernova-lite-v1.Q4_K_M.gguf
|
||||||
|
files:
|
||||||
|
- filename: supernova-lite-v1.Q4_K_M.gguf
|
||||||
|
sha256: 237b7b0b704d294f92f36c576cc8fdc10592f95168a5ad0f075a2d8edf20da4d
|
||||||
|
uri: huggingface://arcee-ai/Llama-3.1-SuperNova-Lite-GGUF/supernova-lite-v1.Q4_K_M.gguf
|
||||||
|
- !!merge <<: *llama31
|
||||||
|
name: "llama3.1-8b-shiningvaliant2"
|
||||||
|
icon: https://cdn-uploads.huggingface.co/production/uploads/63444f2687964b331809eb55/EXX7TKbB-R6arxww2mk0R.jpeg
|
||||||
|
urls:
|
||||||
|
- https://huggingface.co/ValiantLabs/Llama3.1-8B-ShiningValiant2
|
||||||
|
- https://huggingface.co/bartowski/Llama3.1-8B-ShiningValiant2-GGUF
|
||||||
|
description: |
|
||||||
|
Shining Valiant 2 is a chat model built on Llama 3.1 8b, finetuned on our data for friendship, insight, knowledge and enthusiasm.
|
||||||
|
|
||||||
|
Finetuned on meta-llama/Meta-Llama-3.1-8B-Instruct for best available general performance
|
||||||
|
Trained on a variety of high quality data; focused on science, engineering, technical knowledge, and structured reasoning
|
||||||
|
overrides:
|
||||||
|
parameters:
|
||||||
|
model: Llama3.1-8B-ShiningValiant2-Q4_K_M.gguf
|
||||||
|
files:
|
||||||
|
- filename: Llama3.1-8B-ShiningValiant2-Q4_K_M.gguf
|
||||||
|
sha256: 9369eb97922a9f01e4eae610e3d7aaeca30762d78d9239884179451d60bdbdd2
|
||||||
|
uri: huggingface://bartowski/Llama3.1-8B-ShiningValiant2-GGUF/Llama3.1-8B-ShiningValiant2-Q4_K_M.gguf
|
||||||
|
- !!merge <<: *llama31
|
||||||
|
name: "nightygurps-14b-v1.1"
|
||||||
|
icon: https://cdn-uploads.huggingface.co/production/uploads/6336c5b3e3ac69e6a90581da/FvfjK7bKqsWdaBkB3eWgP.png
|
||||||
|
urls:
|
||||||
|
- https://huggingface.co/AlexBefest/NightyGurps-14b-v1.1
|
||||||
|
- https://huggingface.co/bartowski/NightyGurps-14b-v1.1-GGUF
|
||||||
|
description: |
|
||||||
|
This model works with Russian only.
|
||||||
|
This model is designed to run GURPS roleplaying games, as well as consult and assist. This model was trained on an augmented dataset of the GURPS Basic Set rulebook. Its primary purpose was initially to become an assistant consultant and assistant Game Master for the GURPS roleplaying system, but it can also be used as a GM for running solo games as a player.
|
||||||
|
overrides:
|
||||||
|
parameters:
|
||||||
|
model: NightyGurps-14b-v1.1-Q4_K_M.gguf
|
||||||
|
files:
|
||||||
|
- filename: NightyGurps-14b-v1.1-Q4_K_M.gguf
|
||||||
|
sha256: d09d53259ad2c0298150fa8c2db98fe42f11731af89fdc80ad0e255a19adc4b0
|
||||||
|
uri: huggingface://bartowski/NightyGurps-14b-v1.1-GGUF/NightyGurps-14b-v1.1-Q4_K_M.gguf
|
||||||
## Uncensored models
|
## Uncensored models
|
||||||
- !!merge <<: *llama31
|
- !!merge <<: *llama31
|
||||||
name: "humanish-roleplay-llama-3.1-8b-i1"
|
name: "humanish-roleplay-llama-3.1-8b-i1"
|
||||||
@ -741,6 +1106,20 @@
|
|||||||
- filename: Llama-3.1-8B-Stheno-v3.4-Q4_K_M-imat.gguf
|
- filename: Llama-3.1-8B-Stheno-v3.4-Q4_K_M-imat.gguf
|
||||||
sha256: 830d4858aa11a654f82f69fa40dee819edf9ecf54213057648304eb84b8dd5eb
|
sha256: 830d4858aa11a654f82f69fa40dee819edf9ecf54213057648304eb84b8dd5eb
|
||||||
uri: huggingface://Lewdiculous/Llama-3.1-8B-Stheno-v3.4-GGUF-IQ-Imatrix/Llama-3.1-8B-Stheno-v3.4-Q4_K_M-imat.gguf
|
uri: huggingface://Lewdiculous/Llama-3.1-8B-Stheno-v3.4-GGUF-IQ-Imatrix/Llama-3.1-8B-Stheno-v3.4-Q4_K_M-imat.gguf
|
||||||
|
- !!merge <<: *llama31
|
||||||
|
name: "llama-3.1-8b-arliai-rpmax-v1.1"
|
||||||
|
urls:
|
||||||
|
- https://huggingface.co/ArliAI/Llama-3.1-8B-ArliAI-RPMax-v1.1
|
||||||
|
- https://huggingface.co/bartowski/Llama-3.1-8B-ArliAI-RPMax-v1.1-GGUF
|
||||||
|
description: |
|
||||||
|
RPMax is a series of models that are trained on a diverse set of curated creative writing and RP datasets with a focus on variety and deduplication. This model is designed to be highly creative and non-repetitive by making sure no two entries in the dataset have repeated characters or situations, which makes sure the model does not latch on to a certain personality and be capable of understanding and acting appropriately to any characters or situations.
|
||||||
|
overrides:
|
||||||
|
parameters:
|
||||||
|
model: Llama-3.1-8B-ArliAI-RPMax-v1.1-Q4_K_M.gguf
|
||||||
|
files:
|
||||||
|
- filename: Llama-3.1-8B-ArliAI-RPMax-v1.1-Q4_K_M.gguf
|
||||||
|
sha256: 0a601c7341228d9160332965298d799369a1dc2b7080771fb8051bdeb556b30c
|
||||||
|
uri: huggingface://bartowski/Llama-3.1-8B-ArliAI-RPMax-v1.1-GGUF/Llama-3.1-8B-ArliAI-RPMax-v1.1-Q4_K_M.gguf
|
||||||
- &deepseek
|
- &deepseek
|
||||||
## Deepseek
|
## Deepseek
|
||||||
url: "github:mudler/LocalAI/gallery/deepseek.yaml@master"
|
url: "github:mudler/LocalAI/gallery/deepseek.yaml@master"
|
||||||
@ -1278,6 +1657,21 @@
|
|||||||
- filename: Pantheon-RP-1.6-12b-Nemo-Q4_K_M.gguf
|
- filename: Pantheon-RP-1.6-12b-Nemo-Q4_K_M.gguf
|
||||||
sha256: cf3465c183bf4ecbccd1b6b480f687e0160475b04c87e2f1e5ebc8baa0f4c7aa
|
sha256: cf3465c183bf4ecbccd1b6b480f687e0160475b04c87e2f1e5ebc8baa0f4c7aa
|
||||||
uri: huggingface://bartowski/Pantheon-RP-1.6-12b-Nemo-GGUF/Pantheon-RP-1.6-12b-Nemo-Q4_K_M.gguf
|
uri: huggingface://bartowski/Pantheon-RP-1.6-12b-Nemo-GGUF/Pantheon-RP-1.6-12b-Nemo-Q4_K_M.gguf
|
||||||
|
- !!merge <<: *mistral03
|
||||||
|
name: "acolyte-22b-i1"
|
||||||
|
icon: https://cdn-uploads.huggingface.co/production/uploads/6569a4ed2419be6072890cf8/3dcGMcrWK2-2vQh9QBt3o.png
|
||||||
|
urls:
|
||||||
|
- https://huggingface.co/rAIfle/Acolyte-22B
|
||||||
|
- https://huggingface.co/mradermacher/Acolyte-22B-i1-GGUF
|
||||||
|
description: |
|
||||||
|
LoRA of a bunch of random datasets on top of Mistral-Small-Instruct-2409, then SLERPed onto base at 0.5. Decent enough for its size. Check the LoRA for dataset info.
|
||||||
|
overrides:
|
||||||
|
parameters:
|
||||||
|
model: Acolyte-22B.i1-Q4_K_M.gguf
|
||||||
|
files:
|
||||||
|
- filename: Acolyte-22B.i1-Q4_K_M.gguf
|
||||||
|
sha256: 5a454405b98b6f886e8e4c695488d8ea098162bb8c46f2a7723fc2553c6e2f6e
|
||||||
|
uri: huggingface://mradermacher/Acolyte-22B-i1-GGUF/Acolyte-22B.i1-Q4_K_M.gguf
|
||||||
- !!merge <<: *mistral03
|
- !!merge <<: *mistral03
|
||||||
name: "mn-12b-lyra-v4-iq-imatrix"
|
name: "mn-12b-lyra-v4-iq-imatrix"
|
||||||
icon: https://cdn-uploads.huggingface.co/production/uploads/65d4cf2693a0a3744a27536c/dVoru83WOpwVjMlgZ_xhA.png
|
icon: https://cdn-uploads.huggingface.co/production/uploads/65d4cf2693a0a3744a27536c/dVoru83WOpwVjMlgZ_xhA.png
|
||||||
@ -1295,6 +1689,27 @@
|
|||||||
- filename: MN-12B-Lyra-v4-Q4_K_M-imat.gguf
|
- filename: MN-12B-Lyra-v4-Q4_K_M-imat.gguf
|
||||||
sha256: 1989123481ca1936c8a2cbe278ff5d1d2b0ae63dbdc838bb36a6d7547b8087b3
|
sha256: 1989123481ca1936c8a2cbe278ff5d1d2b0ae63dbdc838bb36a6d7547b8087b3
|
||||||
uri: huggingface://Lewdiculous/MN-12B-Lyra-v4-GGUF-IQ-Imatrix/MN-12B-Lyra-v4-Q4_K_M-imat.gguf
|
uri: huggingface://Lewdiculous/MN-12B-Lyra-v4-GGUF-IQ-Imatrix/MN-12B-Lyra-v4-Q4_K_M-imat.gguf
|
||||||
|
- !!merge <<: *mistral03
|
||||||
|
name: "magnusintellectus-12b-v1-i1"
|
||||||
|
url: "github:mudler/LocalAI/gallery/chatml.yaml@master"
|
||||||
|
icon: https://cdn-uploads.huggingface.co/production/uploads/66b564058d9afb7a9d5607d5/hUVJI1Qa4tCMrZWMgYkoD.png
|
||||||
|
urls:
|
||||||
|
- https://huggingface.co/GalrionSoftworks/MagnusIntellectus-12B-v1
|
||||||
|
- https://huggingface.co/mradermacher/MagnusIntellectus-12B-v1-i1-GGUF
|
||||||
|
description: |
|
||||||
|
How pleasant, the rocks appear to have made a decent conglomerate. A-.
|
||||||
|
|
||||||
|
MagnusIntellectus is a merge of the following models using LazyMergekit:
|
||||||
|
|
||||||
|
UsernameJustAnother/Nemo-12B-Marlin-v5
|
||||||
|
anthracite-org/magnum-12b-v2
|
||||||
|
overrides:
|
||||||
|
parameters:
|
||||||
|
model: MagnusIntellectus-12B-v1.i1-Q4_K_M.gguf
|
||||||
|
files:
|
||||||
|
- filename: MagnusIntellectus-12B-v1.i1-Q4_K_M.gguf
|
||||||
|
sha256: c97107983b4edc5b6f2a592d227ca2dd4196e2af3d3bc0fe6b7a8954a1fb5870
|
||||||
|
uri: huggingface://mradermacher/MagnusIntellectus-12B-v1-i1-GGUF/MagnusIntellectus-12B-v1.i1-Q4_K_M.gguf
|
||||||
- &mudler
|
- &mudler
|
||||||
### START mudler's LocalAI specific-models
|
### START mudler's LocalAI specific-models
|
||||||
url: "github:mudler/LocalAI/gallery/mudler.yaml@master"
|
url: "github:mudler/LocalAI/gallery/mudler.yaml@master"
|
||||||
@ -1850,6 +2265,47 @@
|
|||||||
- filename: datagemma-rig-27b-it-Q4_K_M.gguf
|
- filename: datagemma-rig-27b-it-Q4_K_M.gguf
|
||||||
sha256: a6738ffbb49b6c46d220e2793df85c0538e9ac72398e32a0914ee5e55c3096ad
|
sha256: a6738ffbb49b6c46d220e2793df85c0538e9ac72398e32a0914ee5e55c3096ad
|
||||||
uri: huggingface://bartowski/datagemma-rig-27b-it-GGUF/datagemma-rig-27b-it-Q4_K_M.gguf
|
uri: huggingface://bartowski/datagemma-rig-27b-it-GGUF/datagemma-rig-27b-it-Q4_K_M.gguf
|
||||||
|
- !!merge <<: *gemma
|
||||||
|
name: "buddy-2b-v1"
|
||||||
|
urls:
|
||||||
|
- https://huggingface.co/TheDrummer/Buddy-2B-v1
|
||||||
|
- https://huggingface.co/bartowski/Buddy-2B-v1-GGUF
|
||||||
|
description: |
|
||||||
|
Buddy is designed as an empathetic language model, aimed at fostering introspection, self-reflection, and personal growth through thoughtful conversation. Buddy won't judge and it won't dismiss your concerns. Get some self-care with Buddy.
|
||||||
|
overrides:
|
||||||
|
parameters:
|
||||||
|
model: Buddy-2B-v1-Q4_K_M.gguf
|
||||||
|
files:
|
||||||
|
- filename: Buddy-2B-v1-Q4_K_M.gguf
|
||||||
|
sha256: 9bd25ed907d1a3c2e07fe09399a9b3aec107d368c29896e2c46facede5b7e3d5
|
||||||
|
uri: huggingface://bartowski/Buddy-2B-v1-GGUF/Buddy-2B-v1-Q4_K_M.gguf
|
||||||
|
- !!merge <<: *gemma
|
||||||
|
name: "gemma-2-9b-arliai-rpmax-v1.1"
|
||||||
|
urls:
|
||||||
|
- https://huggingface.co/ArliAI/Gemma-2-9B-ArliAI-RPMax-v1.1
|
||||||
|
- https://huggingface.co/bartowski/Gemma-2-9B-ArliAI-RPMax-v1.1-GGUF
|
||||||
|
description: |
|
||||||
|
RPMax is a series of models that are trained on a diverse set of curated creative writing and RP datasets with a focus on variety and deduplication. This model is designed to be highly creative and non-repetitive by making sure no two entries in the dataset have repeated characters or situations, which makes sure the model does not latch on to a certain personality and be capable of understanding and acting appropriately to any characters or situations.
|
||||||
|
overrides:
|
||||||
|
parameters:
|
||||||
|
model: Gemma-2-9B-ArliAI-RPMax-v1.1-Q4_K_M.gguf
|
||||||
|
files:
|
||||||
|
- filename: Gemma-2-9B-ArliAI-RPMax-v1.1-Q4_K_M.gguf
|
||||||
|
sha256: 1724aff0ad6f71bf4371d839aca55578f7ec6f030d8d25c0254126088e4c6250
|
||||||
|
uri: huggingface://bartowski/Gemma-2-9B-ArliAI-RPMax-v1.1-GGUF/Gemma-2-9B-ArliAI-RPMax-v1.1-Q4_K_M.gguf
|
||||||
|
- !!merge <<: *gemma
|
||||||
|
name: "gemma-2-2b-arliai-rpmax-v1.1"
|
||||||
|
urls:
|
||||||
|
- https://huggingface.co/bartowski/Gemma-2-2B-ArliAI-RPMax-v1.1-GGUF
|
||||||
|
description: |
|
||||||
|
RPMax is a series of models that are trained on a diverse set of curated creative writing and RP datasets with a focus on variety and deduplication. This model is designed to be highly creative and non-repetitive by making sure no two entries in the dataset have repeated characters or situations, which makes sure the model does not latch on to a certain personality and be capable of understanding and acting appropriately to any characters or situations.
|
||||||
|
overrides:
|
||||||
|
parameters:
|
||||||
|
model: Gemma-2-2B-ArliAI-RPMax-v1.1-Q4_K_M.gguf
|
||||||
|
files:
|
||||||
|
- filename: Gemma-2-2B-ArliAI-RPMax-v1.1-Q4_K_M.gguf
|
||||||
|
sha256: 89fe35345754d7e9de8d0c0d5bf35b2be9b12a09811b365b712b8b27112f7712
|
||||||
|
uri: huggingface://bartowski/Gemma-2-2B-ArliAI-RPMax-v1.1-GGUF/Gemma-2-2B-ArliAI-RPMax-v1.1-Q4_K_M.gguf
|
||||||
- &llama3
|
- &llama3
|
||||||
url: "github:mudler/LocalAI/gallery/llama3-instruct.yaml@master"
|
url: "github:mudler/LocalAI/gallery/llama3-instruct.yaml@master"
|
||||||
icon: https://cdn-uploads.huggingface.co/production/uploads/642cc1c253e76b4c2286c58e/aJJxKus1wP5N-euvHEUq7.png
|
icon: https://cdn-uploads.huggingface.co/production/uploads/642cc1c253e76b4c2286c58e/aJJxKus1wP5N-euvHEUq7.png
|
||||||
@ -3729,7 +4185,7 @@
|
|||||||
files:
|
files:
|
||||||
- filename: NeuralDaredevil-8B-abliterated.Q4_K_M.gguf
|
- filename: NeuralDaredevil-8B-abliterated.Q4_K_M.gguf
|
||||||
sha256: 12f4af9d66817d7d300bd9a181e4fe66f7ecf7ea972049f2cbd0554cdc3ecf05
|
sha256: 12f4af9d66817d7d300bd9a181e4fe66f7ecf7ea972049f2cbd0554cdc3ecf05
|
||||||
uri: huggingface://QuantFactory/NeuralDaredevil-8B-abliterated-GGUF/Poppy_Porpoise-0.85-L3-8B-Q4_K_M-imat.gguf
|
uri: huggingface://QuantFactory/NeuralDaredevil-8B-abliterated-GGUF/NeuralDaredevil-8B-abliterated.Q4_K_M.gguf
|
||||||
- !!merge <<: *llama3
|
- !!merge <<: *llama3
|
||||||
name: "llama-3-8b-instruct-mopeymule"
|
name: "llama-3-8b-instruct-mopeymule"
|
||||||
urls:
|
urls:
|
||||||
|
65
gallery/llama3.1-reflective.yaml
Normal file
65
gallery/llama3.1-reflective.yaml
Normal file
@ -0,0 +1,65 @@
|
|||||||
|
---
|
||||||
|
name: "llama3-instruct"
|
||||||
|
|
||||||
|
config_file: |
|
||||||
|
mmap: true
|
||||||
|
cutstrings:
|
||||||
|
- (.*?)</thinking>
|
||||||
|
function:
|
||||||
|
disable_no_action: true
|
||||||
|
grammar:
|
||||||
|
disable: true
|
||||||
|
response_regex:
|
||||||
|
- <function=(?P<name>\w+)>(?P<arguments>.*)</function>
|
||||||
|
template:
|
||||||
|
chat_message: |
|
||||||
|
<|start_header_id|>{{if eq .RoleName "assistant"}}assistant{{else if eq .RoleName "system"}}system{{else if eq .RoleName "tool"}}tool{{else if eq .RoleName "user"}}user{{end}}<|end_header_id|>
|
||||||
|
|
||||||
|
{{ if .FunctionCall -}}
|
||||||
|
Function call:
|
||||||
|
{{ else if eq .RoleName "tool" -}}
|
||||||
|
Function response:
|
||||||
|
{{ end -}}
|
||||||
|
{{ if .Content -}}
|
||||||
|
{{.Content -}}
|
||||||
|
{{ else if .FunctionCall -}}
|
||||||
|
{{ toJson .FunctionCall -}}
|
||||||
|
{{ end -}}
|
||||||
|
<|eot_id|>
|
||||||
|
function: |
|
||||||
|
<|start_header_id|>system<|end_header_id|>
|
||||||
|
|
||||||
|
You have access to the following functions:
|
||||||
|
|
||||||
|
{{range .Functions}}
|
||||||
|
Use the function '{{.Name}}' to '{{.Description}}'
|
||||||
|
{{toJson .Parameters}}
|
||||||
|
{{end}}
|
||||||
|
|
||||||
|
Think very carefully before calling functions.
|
||||||
|
If a you choose to call a function ONLY reply in the following format with no prefix or suffix:
|
||||||
|
|
||||||
|
<function=example_function_name>{{`{{"example_name": "example_value"}}`}}</function>
|
||||||
|
|
||||||
|
Reminder:
|
||||||
|
- If looking for real time information use relevant functions before falling back to searching on internet
|
||||||
|
- Function calls MUST follow the specified format, start with <function= and end with </function>
|
||||||
|
- Required parameters MUST be specified
|
||||||
|
- Only call one function at a time
|
||||||
|
- Put the entire function call reply on one line
|
||||||
|
<|eot_id|>
|
||||||
|
{{.Input }}
|
||||||
|
<|start_header_id|>assistant<|end_header_id|>
|
||||||
|
chat: |
|
||||||
|
{{.Input }}
|
||||||
|
<|start_header_id|>assistant<|end_header_id|>
|
||||||
|
<thinking>
|
||||||
|
completion: |
|
||||||
|
{{.Input}}
|
||||||
|
context_size: 8192
|
||||||
|
f16: true
|
||||||
|
stopwords:
|
||||||
|
- <|im_end|>
|
||||||
|
- <dummy32000>
|
||||||
|
- "<|eot_id|>"
|
||||||
|
- <|end_of_text|>
|
5
go.mod
5
go.mod
@ -1,8 +1,8 @@
|
|||||||
module github.com/mudler/LocalAI
|
module github.com/mudler/LocalAI
|
||||||
|
|
||||||
go 1.22.0
|
go 1.23
|
||||||
|
|
||||||
toolchain go1.22.4
|
toolchain go1.23.1
|
||||||
|
|
||||||
require (
|
require (
|
||||||
dario.cat/mergo v1.0.0
|
dario.cat/mergo v1.0.0
|
||||||
@ -74,6 +74,7 @@ require (
|
|||||||
cloud.google.com/go/auth/oauth2adapt v0.2.2 // indirect
|
cloud.google.com/go/auth/oauth2adapt v0.2.2 // indirect
|
||||||
cloud.google.com/go/compute/metadata v0.3.0 // indirect
|
cloud.google.com/go/compute/metadata v0.3.0 // indirect
|
||||||
github.com/cpuguy83/go-md2man/v2 v2.0.4 // indirect
|
github.com/cpuguy83/go-md2man/v2 v2.0.4 // indirect
|
||||||
|
github.com/dave-gray101/v2keyauth v0.0.0-20240624150259-c45d584d25e2 // indirect
|
||||||
github.com/envoyproxy/protoc-gen-validate v1.0.4 // indirect
|
github.com/envoyproxy/protoc-gen-validate v1.0.4 // indirect
|
||||||
github.com/felixge/httpsnoop v1.0.4 // indirect
|
github.com/felixge/httpsnoop v1.0.4 // indirect
|
||||||
github.com/go-task/slim-sprig/v3 v3.0.0 // indirect
|
github.com/go-task/slim-sprig/v3 v3.0.0 // indirect
|
||||||
|
2
go.sum
2
go.sum
@ -110,6 +110,8 @@ github.com/creachadair/otp v0.4.2 h1:ngNMaD6Tzd7UUNRFyed7ykZFn/Wr5sSs5ffqZWm9pu8
|
|||||||
github.com/creachadair/otp v0.4.2/go.mod h1:DqV9hJyUbcUme0pooYfiFvvMe72Aua5sfhNzwfZvk40=
|
github.com/creachadair/otp v0.4.2/go.mod h1:DqV9hJyUbcUme0pooYfiFvvMe72Aua5sfhNzwfZvk40=
|
||||||
github.com/creack/pty v1.1.18 h1:n56/Zwd5o6whRC5PMGretI4IdRLlmBXYNjScPaBgsbY=
|
github.com/creack/pty v1.1.18 h1:n56/Zwd5o6whRC5PMGretI4IdRLlmBXYNjScPaBgsbY=
|
||||||
github.com/creack/pty v1.1.18/go.mod h1:MOBLtS5ELjhRRrroQr9kyvTxUAFNvYEK993ew/Vr4O4=
|
github.com/creack/pty v1.1.18/go.mod h1:MOBLtS5ELjhRRrroQr9kyvTxUAFNvYEK993ew/Vr4O4=
|
||||||
|
github.com/dave-gray101/v2keyauth v0.0.0-20240624150259-c45d584d25e2 h1:flLYmnQFZNo04x2NPehMbf30m7Pli57xwZ0NFqR/hb0=
|
||||||
|
github.com/dave-gray101/v2keyauth v0.0.0-20240624150259-c45d584d25e2/go.mod h1:NtWqRzAp/1tw+twkW8uuBenEVVYndEAZACWU3F3xdoQ=
|
||||||
github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
|
github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
|
||||||
github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c=
|
github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c=
|
||||||
github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
|
github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
|
||||||
|
@ -31,7 +31,11 @@ const (
|
|||||||
|
|
||||||
type URI string
|
type URI string
|
||||||
|
|
||||||
func (uri URI) DownloadAndUnmarshal(basePath string, f func(url string, i []byte) error) error {
|
func (uri URI) DownloadWithCallback(basePath string, f func(url string, i []byte) error) error {
|
||||||
|
return uri.DownloadWithAuthorizationAndCallback(basePath, "", f)
|
||||||
|
}
|
||||||
|
|
||||||
|
func (uri URI) DownloadWithAuthorizationAndCallback(basePath string, authorization string, f func(url string, i []byte) error) error {
|
||||||
url := uri.ResolveURL()
|
url := uri.ResolveURL()
|
||||||
|
|
||||||
if strings.HasPrefix(url, LocalPrefix) {
|
if strings.HasPrefix(url, LocalPrefix) {
|
||||||
@ -41,7 +45,6 @@ func (uri URI) DownloadAndUnmarshal(basePath string, f func(url string, i []byte
|
|||||||
if err != nil {
|
if err != nil {
|
||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
// ???
|
|
||||||
resolvedBasePath, err := filepath.EvalSymlinks(basePath)
|
resolvedBasePath, err := filepath.EvalSymlinks(basePath)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return err
|
return err
|
||||||
@ -63,7 +66,16 @@ func (uri URI) DownloadAndUnmarshal(basePath string, f func(url string, i []byte
|
|||||||
}
|
}
|
||||||
|
|
||||||
// Send a GET request to the URL
|
// Send a GET request to the URL
|
||||||
response, err := http.Get(url)
|
|
||||||
|
req, err := http.NewRequest("GET", url, nil)
|
||||||
|
if err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
if authorization != "" {
|
||||||
|
req.Header.Add("Authorization", authorization)
|
||||||
|
}
|
||||||
|
|
||||||
|
response, err := http.DefaultClient.Do(req)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
|
@ -11,7 +11,7 @@ var _ = Describe("Gallery API tests", func() {
|
|||||||
It("parses github with a branch", func() {
|
It("parses github with a branch", func() {
|
||||||
uri := URI("github:go-skynet/model-gallery/gpt4all-j.yaml")
|
uri := URI("github:go-skynet/model-gallery/gpt4all-j.yaml")
|
||||||
Expect(
|
Expect(
|
||||||
uri.DownloadAndUnmarshal("", func(url string, i []byte) error {
|
uri.DownloadWithCallback("", func(url string, i []byte) error {
|
||||||
Expect(url).To(Equal("https://raw.githubusercontent.com/go-skynet/model-gallery/main/gpt4all-j.yaml"))
|
Expect(url).To(Equal("https://raw.githubusercontent.com/go-skynet/model-gallery/main/gpt4all-j.yaml"))
|
||||||
return nil
|
return nil
|
||||||
}),
|
}),
|
||||||
@ -21,7 +21,7 @@ var _ = Describe("Gallery API tests", func() {
|
|||||||
uri := URI("github:go-skynet/model-gallery/gpt4all-j.yaml@main")
|
uri := URI("github:go-skynet/model-gallery/gpt4all-j.yaml@main")
|
||||||
|
|
||||||
Expect(
|
Expect(
|
||||||
uri.DownloadAndUnmarshal("", func(url string, i []byte) error {
|
uri.DownloadWithCallback("", func(url string, i []byte) error {
|
||||||
Expect(url).To(Equal("https://raw.githubusercontent.com/go-skynet/model-gallery/main/gpt4all-j.yaml"))
|
Expect(url).To(Equal("https://raw.githubusercontent.com/go-skynet/model-gallery/main/gpt4all-j.yaml"))
|
||||||
return nil
|
return nil
|
||||||
}),
|
}),
|
||||||
@ -30,7 +30,7 @@ var _ = Describe("Gallery API tests", func() {
|
|||||||
It("parses github with urls", func() {
|
It("parses github with urls", func() {
|
||||||
uri := URI("https://raw.githubusercontent.com/go-skynet/model-gallery/main/gpt4all-j.yaml")
|
uri := URI("https://raw.githubusercontent.com/go-skynet/model-gallery/main/gpt4all-j.yaml")
|
||||||
Expect(
|
Expect(
|
||||||
uri.DownloadAndUnmarshal("", func(url string, i []byte) error {
|
uri.DownloadWithCallback("", func(url string, i []byte) error {
|
||||||
Expect(url).To(Equal("https://raw.githubusercontent.com/go-skynet/model-gallery/main/gpt4all-j.yaml"))
|
Expect(url).To(Equal("https://raw.githubusercontent.com/go-skynet/model-gallery/main/gpt4all-j.yaml"))
|
||||||
return nil
|
return nil
|
||||||
}),
|
}),
|
||||||
|
17
pkg/model/filters.go
Normal file
17
pkg/model/filters.go
Normal file
@ -0,0 +1,17 @@
|
|||||||
|
package model
|
||||||
|
|
||||||
|
import (
|
||||||
|
process "github.com/mudler/go-processmanager"
|
||||||
|
)
|
||||||
|
|
||||||
|
type GRPCProcessFilter = func(id string, p *process.Process) bool
|
||||||
|
|
||||||
|
func all(_ string, _ *process.Process) bool {
|
||||||
|
return true
|
||||||
|
}
|
||||||
|
|
||||||
|
func allExcept(s string) GRPCProcessFilter {
|
||||||
|
return func(id string, p *process.Process) bool {
|
||||||
|
return id != s
|
||||||
|
}
|
||||||
|
}
|
@ -304,23 +304,24 @@ func (ml *ModelLoader) grpcModel(backend string, o *Options) func(string, string
|
|||||||
return nil, fmt.Errorf("failed allocating free ports: %s", err.Error())
|
return nil, fmt.Errorf("failed allocating free ports: %s", err.Error())
|
||||||
}
|
}
|
||||||
// Make sure the process is executable
|
// Make sure the process is executable
|
||||||
if err := ml.startProcess(uri, o.model, serverAddress); err != nil {
|
process, err := ml.startProcess(uri, o.model, serverAddress)
|
||||||
|
if err != nil {
|
||||||
log.Error().Err(err).Str("path", uri).Msg("failed to launch ")
|
log.Error().Err(err).Str("path", uri).Msg("failed to launch ")
|
||||||
return nil, err
|
return nil, err
|
||||||
}
|
}
|
||||||
|
|
||||||
log.Debug().Msgf("GRPC Service Started")
|
log.Debug().Msgf("GRPC Service Started")
|
||||||
|
|
||||||
client = NewModel(serverAddress)
|
client = NewModel(modelName, serverAddress, process)
|
||||||
} else {
|
} else {
|
||||||
log.Debug().Msg("external backend is uri")
|
log.Debug().Msg("external backend is uri")
|
||||||
// address
|
// address
|
||||||
client = NewModel(uri)
|
client = NewModel(modelName, uri, nil)
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
grpcProcess := backendPath(o.assetDir, backend)
|
grpcProcess := backendPath(o.assetDir, backend)
|
||||||
if err := utils.VerifyPath(grpcProcess, o.assetDir); err != nil {
|
if err := utils.VerifyPath(grpcProcess, o.assetDir); err != nil {
|
||||||
return nil, fmt.Errorf("grpc process not found in assetdir: %s", err.Error())
|
return nil, fmt.Errorf("refering to a backend not in asset dir: %s", err.Error())
|
||||||
}
|
}
|
||||||
|
|
||||||
if autoDetect {
|
if autoDetect {
|
||||||
@ -332,7 +333,7 @@ func (ml *ModelLoader) grpcModel(backend string, o *Options) func(string, string
|
|||||||
|
|
||||||
// Check if the file exists
|
// Check if the file exists
|
||||||
if _, err := os.Stat(grpcProcess); os.IsNotExist(err) {
|
if _, err := os.Stat(grpcProcess); os.IsNotExist(err) {
|
||||||
return nil, fmt.Errorf("grpc process not found: %s. some backends(stablediffusion, tts) require LocalAI compiled with GO_TAGS", grpcProcess)
|
return nil, fmt.Errorf("backend not found: %s", grpcProcess)
|
||||||
}
|
}
|
||||||
|
|
||||||
serverAddress, err := getFreeAddress()
|
serverAddress, err := getFreeAddress()
|
||||||
@ -346,15 +347,18 @@ func (ml *ModelLoader) grpcModel(backend string, o *Options) func(string, string
|
|||||||
args, grpcProcess = library.LoadLDSO(o.assetDir, args, grpcProcess)
|
args, grpcProcess = library.LoadLDSO(o.assetDir, args, grpcProcess)
|
||||||
|
|
||||||
// Make sure the process is executable in any circumstance
|
// Make sure the process is executable in any circumstance
|
||||||
if err := ml.startProcess(grpcProcess, o.model, serverAddress, args...); err != nil {
|
process, err := ml.startProcess(grpcProcess, o.model, serverAddress, args...)
|
||||||
|
if err != nil {
|
||||||
return nil, err
|
return nil, err
|
||||||
}
|
}
|
||||||
|
|
||||||
log.Debug().Msgf("GRPC Service Started")
|
log.Debug().Msgf("GRPC Service Started")
|
||||||
|
|
||||||
client = NewModel(serverAddress)
|
client = NewModel(modelName, serverAddress, process)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
log.Debug().Msgf("Wait for the service to start up")
|
||||||
|
|
||||||
// Wait for the service to start up
|
// Wait for the service to start up
|
||||||
ready := false
|
ready := false
|
||||||
for i := 0; i < o.grpcAttempts; i++ {
|
for i := 0; i < o.grpcAttempts; i++ {
|
||||||
@ -372,6 +376,7 @@ func (ml *ModelLoader) grpcModel(backend string, o *Options) func(string, string
|
|||||||
|
|
||||||
if !ready {
|
if !ready {
|
||||||
log.Debug().Msgf("GRPC Service NOT ready")
|
log.Debug().Msgf("GRPC Service NOT ready")
|
||||||
|
ml.deleteProcess(o.model)
|
||||||
return nil, fmt.Errorf("grpc service not ready")
|
return nil, fmt.Errorf("grpc service not ready")
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -383,9 +388,11 @@ func (ml *ModelLoader) grpcModel(backend string, o *Options) func(string, string
|
|||||||
|
|
||||||
res, err := client.GRPC(o.parallelRequests, ml.wd).LoadModel(o.context, &options)
|
res, err := client.GRPC(o.parallelRequests, ml.wd).LoadModel(o.context, &options)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
|
ml.deleteProcess(o.model)
|
||||||
return nil, fmt.Errorf("could not load model: %w", err)
|
return nil, fmt.Errorf("could not load model: %w", err)
|
||||||
}
|
}
|
||||||
if !res.Success {
|
if !res.Success {
|
||||||
|
ml.deleteProcess(o.model)
|
||||||
return nil, fmt.Errorf("could not load model (no success): %s", res.Message)
|
return nil, fmt.Errorf("could not load model (no success): %s", res.Message)
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -413,13 +420,10 @@ func (ml *ModelLoader) BackendLoader(opts ...Option) (client grpc.Backend, err e
|
|||||||
}
|
}
|
||||||
|
|
||||||
if o.singleActiveBackend {
|
if o.singleActiveBackend {
|
||||||
ml.mu.Lock()
|
|
||||||
log.Debug().Msgf("Stopping all backends except '%s'", o.model)
|
log.Debug().Msgf("Stopping all backends except '%s'", o.model)
|
||||||
err := ml.StopAllExcept(o.model)
|
err := ml.StopGRPC(allExcept(o.model))
|
||||||
ml.mu.Unlock()
|
|
||||||
if err != nil {
|
if err != nil {
|
||||||
log.Error().Err(err).Str("keptModel", o.model).Msg("error while shutting down all backends except for the keptModel")
|
log.Error().Err(err).Str("keptModel", o.model).Msg("error while shutting down all backends except for the keptModel")
|
||||||
return nil, err
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -444,13 +448,10 @@ func (ml *ModelLoader) BackendLoader(opts ...Option) (client grpc.Backend, err e
|
|||||||
func (ml *ModelLoader) GreedyLoader(opts ...Option) (grpc.Backend, error) {
|
func (ml *ModelLoader) GreedyLoader(opts ...Option) (grpc.Backend, error) {
|
||||||
o := NewOptions(opts...)
|
o := NewOptions(opts...)
|
||||||
|
|
||||||
ml.mu.Lock()
|
|
||||||
|
|
||||||
// Return earlier if we have a model already loaded
|
// Return earlier if we have a model already loaded
|
||||||
// (avoid looping through all the backends)
|
// (avoid looping through all the backends)
|
||||||
if m := ml.CheckIsLoaded(o.model); m != nil {
|
if m := ml.CheckIsLoaded(o.model); m != nil {
|
||||||
log.Debug().Msgf("Model '%s' already loaded", o.model)
|
log.Debug().Msgf("Model '%s' already loaded", o.model)
|
||||||
ml.mu.Unlock()
|
|
||||||
|
|
||||||
return m.GRPC(o.parallelRequests, ml.wd), nil
|
return m.GRPC(o.parallelRequests, ml.wd), nil
|
||||||
}
|
}
|
||||||
@ -458,12 +459,11 @@ func (ml *ModelLoader) GreedyLoader(opts ...Option) (grpc.Backend, error) {
|
|||||||
// If we can have only one backend active, kill all the others (except external backends)
|
// If we can have only one backend active, kill all the others (except external backends)
|
||||||
if o.singleActiveBackend {
|
if o.singleActiveBackend {
|
||||||
log.Debug().Msgf("Stopping all backends except '%s'", o.model)
|
log.Debug().Msgf("Stopping all backends except '%s'", o.model)
|
||||||
err := ml.StopAllExcept(o.model)
|
err := ml.StopGRPC(allExcept(o.model))
|
||||||
if err != nil {
|
if err != nil {
|
||||||
log.Error().Err(err).Str("keptModel", o.model).Msg("error while shutting down all backends except for the keptModel - greedyloader continuing")
|
log.Error().Err(err).Str("keptModel", o.model).Msg("error while shutting down all backends except for the keptModel - greedyloader continuing")
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
ml.mu.Unlock()
|
|
||||||
|
|
||||||
var err error
|
var err error
|
||||||
|
|
||||||
|
@ -13,7 +13,6 @@ import (
|
|||||||
|
|
||||||
"github.com/mudler/LocalAI/pkg/utils"
|
"github.com/mudler/LocalAI/pkg/utils"
|
||||||
|
|
||||||
process "github.com/mudler/go-processmanager"
|
|
||||||
"github.com/rs/zerolog/log"
|
"github.com/rs/zerolog/log"
|
||||||
)
|
)
|
||||||
|
|
||||||
@ -24,7 +23,6 @@ type ModelLoader struct {
|
|||||||
ModelPath string
|
ModelPath string
|
||||||
mu sync.Mutex
|
mu sync.Mutex
|
||||||
models map[string]*Model
|
models map[string]*Model
|
||||||
grpcProcesses map[string]*process.Process
|
|
||||||
templates *templates.TemplateCache
|
templates *templates.TemplateCache
|
||||||
wd *WatchDog
|
wd *WatchDog
|
||||||
}
|
}
|
||||||
@ -34,7 +32,6 @@ func NewModelLoader(modelPath string) *ModelLoader {
|
|||||||
ModelPath: modelPath,
|
ModelPath: modelPath,
|
||||||
models: make(map[string]*Model),
|
models: make(map[string]*Model),
|
||||||
templates: templates.NewTemplateCache(modelPath),
|
templates: templates.NewTemplateCache(modelPath),
|
||||||
grpcProcesses: make(map[string]*process.Process),
|
|
||||||
}
|
}
|
||||||
|
|
||||||
return nml
|
return nml
|
||||||
@ -69,6 +66,8 @@ var knownModelsNameSuffixToSkip []string = []string{
|
|||||||
".tar.gz",
|
".tar.gz",
|
||||||
}
|
}
|
||||||
|
|
||||||
|
const retryTimeout = time.Duration(2 * time.Minute)
|
||||||
|
|
||||||
func (ml *ModelLoader) ListFilesInModelPath() ([]string, error) {
|
func (ml *ModelLoader) ListFilesInModelPath() ([]string, error) {
|
||||||
files, err := os.ReadDir(ml.ModelPath)
|
files, err := os.ReadDir(ml.ModelPath)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
@ -103,22 +102,19 @@ FILE:
|
|||||||
return models, nil
|
return models, nil
|
||||||
}
|
}
|
||||||
|
|
||||||
func (ml *ModelLoader) ListModels() []*Model {
|
func (ml *ModelLoader) ListModels() []Model {
|
||||||
ml.mu.Lock()
|
ml.mu.Lock()
|
||||||
defer ml.mu.Unlock()
|
defer ml.mu.Unlock()
|
||||||
|
|
||||||
models := []*Model{}
|
models := []Model{}
|
||||||
for _, model := range ml.models {
|
for _, model := range ml.models {
|
||||||
models = append(models, model)
|
models = append(models, *model)
|
||||||
}
|
}
|
||||||
|
|
||||||
return models
|
return models
|
||||||
}
|
}
|
||||||
|
|
||||||
func (ml *ModelLoader) LoadModel(modelName string, loader func(string, string) (*Model, error)) (*Model, error) {
|
func (ml *ModelLoader) LoadModel(modelName string, loader func(string, string) (*Model, error)) (*Model, error) {
|
||||||
ml.mu.Lock()
|
|
||||||
defer ml.mu.Unlock()
|
|
||||||
|
|
||||||
// Check if we already have a loaded model
|
// Check if we already have a loaded model
|
||||||
if model := ml.CheckIsLoaded(modelName); model != nil {
|
if model := ml.CheckIsLoaded(modelName); model != nil {
|
||||||
return model, nil
|
return model, nil
|
||||||
@ -128,6 +124,8 @@ func (ml *ModelLoader) LoadModel(modelName string, loader func(string, string) (
|
|||||||
modelFile := filepath.Join(ml.ModelPath, modelName)
|
modelFile := filepath.Join(ml.ModelPath, modelName)
|
||||||
log.Debug().Msgf("Loading model in memory from file: %s", modelFile)
|
log.Debug().Msgf("Loading model in memory from file: %s", modelFile)
|
||||||
|
|
||||||
|
ml.mu.Lock()
|
||||||
|
defer ml.mu.Unlock()
|
||||||
model, err := loader(modelName, modelFile)
|
model, err := loader(modelName, modelFile)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return nil, err
|
return nil, err
|
||||||
@ -145,19 +143,28 @@ func (ml *ModelLoader) LoadModel(modelName string, loader func(string, string) (
|
|||||||
func (ml *ModelLoader) ShutdownModel(modelName string) error {
|
func (ml *ModelLoader) ShutdownModel(modelName string) error {
|
||||||
ml.mu.Lock()
|
ml.mu.Lock()
|
||||||
defer ml.mu.Unlock()
|
defer ml.mu.Unlock()
|
||||||
|
model, ok := ml.models[modelName]
|
||||||
return ml.stopModel(modelName)
|
if !ok {
|
||||||
}
|
|
||||||
|
|
||||||
func (ml *ModelLoader) stopModel(modelName string) error {
|
|
||||||
defer ml.deleteProcess(modelName)
|
|
||||||
if _, ok := ml.models[modelName]; !ok {
|
|
||||||
return fmt.Errorf("model %s not found", modelName)
|
return fmt.Errorf("model %s not found", modelName)
|
||||||
}
|
}
|
||||||
return nil
|
|
||||||
|
retries := 1
|
||||||
|
for model.GRPC(false, ml.wd).IsBusy() {
|
||||||
|
log.Debug().Msgf("%s busy. Waiting.", modelName)
|
||||||
|
dur := time.Duration(retries*2) * time.Second
|
||||||
|
if dur > retryTimeout {
|
||||||
|
dur = retryTimeout
|
||||||
|
}
|
||||||
|
time.Sleep(dur)
|
||||||
|
retries++
|
||||||
|
}
|
||||||
|
|
||||||
|
return ml.deleteProcess(modelName)
|
||||||
}
|
}
|
||||||
|
|
||||||
func (ml *ModelLoader) CheckIsLoaded(s string) *Model {
|
func (ml *ModelLoader) CheckIsLoaded(s string) *Model {
|
||||||
|
ml.mu.Lock()
|
||||||
|
defer ml.mu.Unlock()
|
||||||
m, ok := ml.models[s]
|
m, ok := ml.models[s]
|
||||||
if !ok {
|
if !ok {
|
||||||
return nil
|
return nil
|
||||||
@ -174,8 +181,8 @@ func (ml *ModelLoader) CheckIsLoaded(s string) *Model {
|
|||||||
if !alive {
|
if !alive {
|
||||||
log.Warn().Msgf("GRPC Model not responding: %s", err.Error())
|
log.Warn().Msgf("GRPC Model not responding: %s", err.Error())
|
||||||
log.Warn().Msgf("Deleting the process in order to recreate it")
|
log.Warn().Msgf("Deleting the process in order to recreate it")
|
||||||
process, exists := ml.grpcProcesses[s]
|
process := m.Process()
|
||||||
if !exists {
|
if process == nil {
|
||||||
log.Error().Msgf("Process not found for '%s' and the model is not responding anymore !", s)
|
log.Error().Msgf("Process not found for '%s' and the model is not responding anymore !", s)
|
||||||
return m
|
return m
|
||||||
}
|
}
|
||||||
|
@ -63,7 +63,7 @@ var _ = Describe("ModelLoader", func() {
|
|||||||
|
|
||||||
Context("LoadModel", func() {
|
Context("LoadModel", func() {
|
||||||
It("should load a model and keep it in memory", func() {
|
It("should load a model and keep it in memory", func() {
|
||||||
mockModel = model.NewModel("test.model")
|
mockModel = model.NewModel("foo", "test.model", nil)
|
||||||
|
|
||||||
mockLoader := func(modelName, modelFile string) (*model.Model, error) {
|
mockLoader := func(modelName, modelFile string) (*model.Model, error) {
|
||||||
return mockModel, nil
|
return mockModel, nil
|
||||||
@ -88,7 +88,7 @@ var _ = Describe("ModelLoader", func() {
|
|||||||
|
|
||||||
Context("ShutdownModel", func() {
|
Context("ShutdownModel", func() {
|
||||||
It("should shutdown a loaded model", func() {
|
It("should shutdown a loaded model", func() {
|
||||||
mockModel = model.NewModel("test.model")
|
mockModel = model.NewModel("foo", "test.model", nil)
|
||||||
|
|
||||||
mockLoader := func(modelName, modelFile string) (*model.Model, error) {
|
mockLoader := func(modelName, modelFile string) (*model.Model, error) {
|
||||||
return mockModel, nil
|
return mockModel, nil
|
||||||
|
@ -1,18 +1,32 @@
|
|||||||
package model
|
package model
|
||||||
|
|
||||||
import grpc "github.com/mudler/LocalAI/pkg/grpc"
|
import (
|
||||||
|
"sync"
|
||||||
|
|
||||||
|
grpc "github.com/mudler/LocalAI/pkg/grpc"
|
||||||
|
process "github.com/mudler/go-processmanager"
|
||||||
|
)
|
||||||
|
|
||||||
type Model struct {
|
type Model struct {
|
||||||
|
ID string `json:"id"`
|
||||||
address string
|
address string
|
||||||
client grpc.Backend
|
client grpc.Backend
|
||||||
|
process *process.Process
|
||||||
|
sync.Mutex
|
||||||
}
|
}
|
||||||
|
|
||||||
func NewModel(address string) *Model {
|
func NewModel(ID, address string, process *process.Process) *Model {
|
||||||
return &Model{
|
return &Model{
|
||||||
|
ID: ID,
|
||||||
address: address,
|
address: address,
|
||||||
|
process: process,
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func (m *Model) Process() *process.Process {
|
||||||
|
return m.process
|
||||||
|
}
|
||||||
|
|
||||||
func (m *Model) GRPC(parallel bool, wd *WatchDog) grpc.Backend {
|
func (m *Model) GRPC(parallel bool, wd *WatchDog) grpc.Backend {
|
||||||
if m.client != nil {
|
if m.client != nil {
|
||||||
return m.client
|
return m.client
|
||||||
@ -23,6 +37,8 @@ func (m *Model) GRPC(parallel bool, wd *WatchDog) grpc.Backend {
|
|||||||
enableWD = true
|
enableWD = true
|
||||||
}
|
}
|
||||||
|
|
||||||
|
m.Lock()
|
||||||
|
defer m.Unlock()
|
||||||
m.client = grpc.NewClient(m.address, parallel, wd, enableWD)
|
m.client = grpc.NewClient(m.address, parallel, wd, enableWD)
|
||||||
return m.client
|
return m.client
|
||||||
}
|
}
|
||||||
|
@ -9,49 +9,30 @@ import (
|
|||||||
"strconv"
|
"strconv"
|
||||||
"strings"
|
"strings"
|
||||||
"syscall"
|
"syscall"
|
||||||
"time"
|
|
||||||
|
|
||||||
"github.com/hpcloud/tail"
|
"github.com/hpcloud/tail"
|
||||||
process "github.com/mudler/go-processmanager"
|
process "github.com/mudler/go-processmanager"
|
||||||
"github.com/rs/zerolog/log"
|
"github.com/rs/zerolog/log"
|
||||||
)
|
)
|
||||||
|
|
||||||
func (ml *ModelLoader) StopAllExcept(s string) error {
|
|
||||||
return ml.StopGRPC(func(id string, p *process.Process) bool {
|
|
||||||
if id != s {
|
|
||||||
for ml.models[id].GRPC(false, ml.wd).IsBusy() {
|
|
||||||
log.Debug().Msgf("%s busy. Waiting.", id)
|
|
||||||
time.Sleep(2 * time.Second)
|
|
||||||
}
|
|
||||||
log.Debug().Msgf("[single-backend] Stopping %s", id)
|
|
||||||
return true
|
|
||||||
}
|
|
||||||
return false
|
|
||||||
})
|
|
||||||
}
|
|
||||||
|
|
||||||
func (ml *ModelLoader) deleteProcess(s string) error {
|
func (ml *ModelLoader) deleteProcess(s string) error {
|
||||||
if _, exists := ml.grpcProcesses[s]; exists {
|
if m, exists := ml.models[s]; exists {
|
||||||
if err := ml.grpcProcesses[s].Stop(); err != nil {
|
process := m.Process()
|
||||||
log.Error().Err(err).Msgf("(deleteProcess) error while deleting grpc process %s", s)
|
if process != nil {
|
||||||
|
if err := process.Stop(); err != nil {
|
||||||
|
log.Error().Err(err).Msgf("(deleteProcess) error while deleting process %s", s)
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
delete(ml.grpcProcesses, s)
|
|
||||||
delete(ml.models, s)
|
delete(ml.models, s)
|
||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
|
|
||||||
type GRPCProcessFilter = func(id string, p *process.Process) bool
|
|
||||||
|
|
||||||
func includeAllProcesses(_ string, _ *process.Process) bool {
|
|
||||||
return true
|
|
||||||
}
|
|
||||||
|
|
||||||
func (ml *ModelLoader) StopGRPC(filter GRPCProcessFilter) error {
|
func (ml *ModelLoader) StopGRPC(filter GRPCProcessFilter) error {
|
||||||
var err error = nil
|
var err error = nil
|
||||||
for k, p := range ml.grpcProcesses {
|
for k, m := range ml.models {
|
||||||
if filter(k, p) {
|
if filter(k, m.Process()) {
|
||||||
e := ml.deleteProcess(k)
|
e := ml.ShutdownModel(k)
|
||||||
err = errors.Join(err, e)
|
err = errors.Join(err, e)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -59,21 +40,26 @@ func (ml *ModelLoader) StopGRPC(filter GRPCProcessFilter) error {
|
|||||||
}
|
}
|
||||||
|
|
||||||
func (ml *ModelLoader) StopAllGRPC() error {
|
func (ml *ModelLoader) StopAllGRPC() error {
|
||||||
return ml.StopGRPC(includeAllProcesses)
|
return ml.StopGRPC(all)
|
||||||
}
|
}
|
||||||
|
|
||||||
func (ml *ModelLoader) GetGRPCPID(id string) (int, error) {
|
func (ml *ModelLoader) GetGRPCPID(id string) (int, error) {
|
||||||
p, exists := ml.grpcProcesses[id]
|
ml.mu.Lock()
|
||||||
|
defer ml.mu.Unlock()
|
||||||
|
p, exists := ml.models[id]
|
||||||
if !exists {
|
if !exists {
|
||||||
return -1, fmt.Errorf("no grpc backend found for %s", id)
|
return -1, fmt.Errorf("no grpc backend found for %s", id)
|
||||||
}
|
}
|
||||||
return strconv.Atoi(p.PID)
|
if p.Process() == nil {
|
||||||
|
return -1, fmt.Errorf("no grpc backend found for %s", id)
|
||||||
|
}
|
||||||
|
return strconv.Atoi(p.Process().PID)
|
||||||
}
|
}
|
||||||
|
|
||||||
func (ml *ModelLoader) startProcess(grpcProcess, id string, serverAddress string, args ...string) error {
|
func (ml *ModelLoader) startProcess(grpcProcess, id string, serverAddress string, args ...string) (*process.Process, error) {
|
||||||
// Make sure the process is executable
|
// Make sure the process is executable
|
||||||
if err := os.Chmod(grpcProcess, 0700); err != nil {
|
if err := os.Chmod(grpcProcess, 0700); err != nil {
|
||||||
return err
|
return nil, err
|
||||||
}
|
}
|
||||||
|
|
||||||
log.Debug().Msgf("Loading GRPC Process: %s", grpcProcess)
|
log.Debug().Msgf("Loading GRPC Process: %s", grpcProcess)
|
||||||
@ -82,7 +68,7 @@ func (ml *ModelLoader) startProcess(grpcProcess, id string, serverAddress string
|
|||||||
|
|
||||||
workDir, err := filepath.Abs(filepath.Dir(grpcProcess))
|
workDir, err := filepath.Abs(filepath.Dir(grpcProcess))
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return err
|
return nil, err
|
||||||
}
|
}
|
||||||
|
|
||||||
grpcControlProcess := process.New(
|
grpcControlProcess := process.New(
|
||||||
@ -98,10 +84,8 @@ func (ml *ModelLoader) startProcess(grpcProcess, id string, serverAddress string
|
|||||||
ml.wd.AddAddressModelMap(serverAddress, id)
|
ml.wd.AddAddressModelMap(serverAddress, id)
|
||||||
}
|
}
|
||||||
|
|
||||||
ml.grpcProcesses[id] = grpcControlProcess
|
|
||||||
|
|
||||||
if err := grpcControlProcess.Run(); err != nil {
|
if err := grpcControlProcess.Run(); err != nil {
|
||||||
return err
|
return grpcControlProcess, err
|
||||||
}
|
}
|
||||||
|
|
||||||
log.Debug().Msgf("GRPC Service state dir: %s", grpcControlProcess.StateDir())
|
log.Debug().Msgf("GRPC Service state dir: %s", grpcControlProcess.StateDir())
|
||||||
@ -135,5 +119,5 @@ func (ml *ModelLoader) startProcess(grpcProcess, id string, serverAddress string
|
|||||||
}
|
}
|
||||||
}()
|
}()
|
||||||
|
|
||||||
return nil
|
return grpcControlProcess, nil
|
||||||
}
|
}
|
||||||
|
@ -13,14 +13,8 @@ var base64DownloadClient http.Client = http.Client{
|
|||||||
Timeout: 30 * time.Second,
|
Timeout: 30 * time.Second,
|
||||||
}
|
}
|
||||||
|
|
||||||
// this function check if the string is an URL, if it's an URL downloads the image in memory
|
// GetContentURIAsBase64 checks if the string is an URL, if it's an URL downloads the content in memory encodes it in base64 and returns the base64 string, otherwise returns the string by stripping base64 data headers
|
||||||
// encodes it in base64 and returns the base64 string
|
func GetContentURIAsBase64(s string) (string, error) {
|
||||||
|
|
||||||
// This may look weird down in pkg/utils while it is currently only used in core/config
|
|
||||||
//
|
|
||||||
// but I believe it may be useful for MQTT as well in the near future, so I'm
|
|
||||||
// extracting it while I'm thinking of it.
|
|
||||||
func GetImageURLAsBase64(s string) (string, error) {
|
|
||||||
if strings.HasPrefix(s, "http") {
|
if strings.HasPrefix(s, "http") {
|
||||||
// download the image
|
// download the image
|
||||||
resp, err := base64DownloadClient.Get(s)
|
resp, err := base64DownloadClient.Get(s)
|
||||||
|
@ -10,20 +10,20 @@ var _ = Describe("utils/base64 tests", func() {
|
|||||||
It("GetImageURLAsBase64 can strip jpeg data url prefixes", func() {
|
It("GetImageURLAsBase64 can strip jpeg data url prefixes", func() {
|
||||||
// This one doesn't actually _care_ that it's base64, so feed "bad" data in this test in order to catch a change in that behavior for informational purposes.
|
// This one doesn't actually _care_ that it's base64, so feed "bad" data in this test in order to catch a change in that behavior for informational purposes.
|
||||||
input := ""
|
input := ""
|
||||||
b64, err := GetImageURLAsBase64(input)
|
b64, err := GetContentURIAsBase64(input)
|
||||||
Expect(err).To(BeNil())
|
Expect(err).To(BeNil())
|
||||||
Expect(b64).To(Equal("FOO"))
|
Expect(b64).To(Equal("FOO"))
|
||||||
})
|
})
|
||||||
It("GetImageURLAsBase64 can strip png data url prefixes", func() {
|
It("GetImageURLAsBase64 can strip png data url prefixes", func() {
|
||||||
// This one doesn't actually _care_ that it's base64, so feed "bad" data in this test in order to catch a change in that behavior for informational purposes.
|
// This one doesn't actually _care_ that it's base64, so feed "bad" data in this test in order to catch a change in that behavior for informational purposes.
|
||||||
input := ""
|
input := ""
|
||||||
b64, err := GetImageURLAsBase64(input)
|
b64, err := GetContentURIAsBase64(input)
|
||||||
Expect(err).To(BeNil())
|
Expect(err).To(BeNil())
|
||||||
Expect(b64).To(Equal("BAR"))
|
Expect(b64).To(Equal("BAR"))
|
||||||
})
|
})
|
||||||
It("GetImageURLAsBase64 returns an error for bogus data", func() {
|
It("GetImageURLAsBase64 returns an error for bogus data", func() {
|
||||||
input := "FOO"
|
input := "FOO"
|
||||||
b64, err := GetImageURLAsBase64(input)
|
b64, err := GetContentURIAsBase64(input)
|
||||||
Expect(b64).To(Equal(""))
|
Expect(b64).To(Equal(""))
|
||||||
Expect(err).ToNot(BeNil())
|
Expect(err).ToNot(BeNil())
|
||||||
Expect(err).To(MatchError("not valid string"))
|
Expect(err).To(MatchError("not valid string"))
|
||||||
@ -31,7 +31,7 @@ var _ = Describe("utils/base64 tests", func() {
|
|||||||
It("GetImageURLAsBase64 can actually download images and calculates something", func() {
|
It("GetImageURLAsBase64 can actually download images and calculates something", func() {
|
||||||
// This test doesn't actually _check_ the results at this time, which is bad, but there wasn't a test at all before...
|
// This test doesn't actually _check_ the results at this time, which is bad, but there wasn't a test at all before...
|
||||||
input := "https://upload.wikimedia.org/wikipedia/en/2/29/Wargames.jpg"
|
input := "https://upload.wikimedia.org/wikipedia/en/2/29/Wargames.jpg"
|
||||||
b64, err := GetImageURLAsBase64(input)
|
b64, err := GetContentURIAsBase64(input)
|
||||||
Expect(err).To(BeNil())
|
Expect(err).To(BeNil())
|
||||||
Expect(b64).ToNot(BeNil())
|
Expect(b64).ToNot(BeNil())
|
||||||
})
|
})
|
||||||
|
@ -972,6 +972,14 @@ const docTemplate = `{
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
|
"model.Model": {
|
||||||
|
"type": "object",
|
||||||
|
"properties": {
|
||||||
|
"id": {
|
||||||
|
"type": "string"
|
||||||
|
}
|
||||||
|
}
|
||||||
|
},
|
||||||
"openai.Assistant": {
|
"openai.Assistant": {
|
||||||
"type": "object",
|
"type": "object",
|
||||||
"properties": {
|
"properties": {
|
||||||
@ -1394,6 +1402,12 @@ const docTemplate = `{
|
|||||||
"description": "The message role",
|
"description": "The message role",
|
||||||
"type": "string"
|
"type": "string"
|
||||||
},
|
},
|
||||||
|
"string_audios": {
|
||||||
|
"type": "array",
|
||||||
|
"items": {
|
||||||
|
"type": "string"
|
||||||
|
}
|
||||||
|
},
|
||||||
"string_content": {
|
"string_content": {
|
||||||
"type": "string"
|
"type": "string"
|
||||||
},
|
},
|
||||||
@ -1403,6 +1417,12 @@ const docTemplate = `{
|
|||||||
"type": "string"
|
"type": "string"
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
|
"string_videos": {
|
||||||
|
"type": "array",
|
||||||
|
"items": {
|
||||||
|
"type": "string"
|
||||||
|
}
|
||||||
|
},
|
||||||
"tool_calls": {
|
"tool_calls": {
|
||||||
"type": "array",
|
"type": "array",
|
||||||
"items": {
|
"items": {
|
||||||
@ -1670,6 +1690,12 @@ const docTemplate = `{
|
|||||||
"items": {
|
"items": {
|
||||||
"type": "string"
|
"type": "string"
|
||||||
}
|
}
|
||||||
|
},
|
||||||
|
"loaded_models": {
|
||||||
|
"type": "array",
|
||||||
|
"items": {
|
||||||
|
"$ref": "#/definitions/model.Model"
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
|
@ -965,6 +965,14 @@
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
|
"model.Model": {
|
||||||
|
"type": "object",
|
||||||
|
"properties": {
|
||||||
|
"id": {
|
||||||
|
"type": "string"
|
||||||
|
}
|
||||||
|
}
|
||||||
|
},
|
||||||
"openai.Assistant": {
|
"openai.Assistant": {
|
||||||
"type": "object",
|
"type": "object",
|
||||||
"properties": {
|
"properties": {
|
||||||
@ -1387,6 +1395,12 @@
|
|||||||
"description": "The message role",
|
"description": "The message role",
|
||||||
"type": "string"
|
"type": "string"
|
||||||
},
|
},
|
||||||
|
"string_audios": {
|
||||||
|
"type": "array",
|
||||||
|
"items": {
|
||||||
|
"type": "string"
|
||||||
|
}
|
||||||
|
},
|
||||||
"string_content": {
|
"string_content": {
|
||||||
"type": "string"
|
"type": "string"
|
||||||
},
|
},
|
||||||
@ -1396,6 +1410,12 @@
|
|||||||
"type": "string"
|
"type": "string"
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
|
"string_videos": {
|
||||||
|
"type": "array",
|
||||||
|
"items": {
|
||||||
|
"type": "string"
|
||||||
|
}
|
||||||
|
},
|
||||||
"tool_calls": {
|
"tool_calls": {
|
||||||
"type": "array",
|
"type": "array",
|
||||||
"items": {
|
"items": {
|
||||||
@ -1663,6 +1683,12 @@
|
|||||||
"items": {
|
"items": {
|
||||||
"type": "string"
|
"type": "string"
|
||||||
}
|
}
|
||||||
|
},
|
||||||
|
"loaded_models": {
|
||||||
|
"type": "array",
|
||||||
|
"items": {
|
||||||
|
"$ref": "#/definitions/model.Model"
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
|
@ -168,6 +168,11 @@ definitions:
|
|||||||
type: string
|
type: string
|
||||||
type: array
|
type: array
|
||||||
type: object
|
type: object
|
||||||
|
model.Model:
|
||||||
|
properties:
|
||||||
|
id:
|
||||||
|
type: string
|
||||||
|
type: object
|
||||||
openai.Assistant:
|
openai.Assistant:
|
||||||
properties:
|
properties:
|
||||||
created:
|
created:
|
||||||
@ -453,12 +458,20 @@ definitions:
|
|||||||
role:
|
role:
|
||||||
description: The message role
|
description: The message role
|
||||||
type: string
|
type: string
|
||||||
|
string_audios:
|
||||||
|
items:
|
||||||
|
type: string
|
||||||
|
type: array
|
||||||
string_content:
|
string_content:
|
||||||
type: string
|
type: string
|
||||||
string_images:
|
string_images:
|
||||||
items:
|
items:
|
||||||
type: string
|
type: string
|
||||||
type: array
|
type: array
|
||||||
|
string_videos:
|
||||||
|
items:
|
||||||
|
type: string
|
||||||
|
type: array
|
||||||
tool_calls:
|
tool_calls:
|
||||||
items:
|
items:
|
||||||
$ref: '#/definitions/schema.ToolCall'
|
$ref: '#/definitions/schema.ToolCall'
|
||||||
@ -644,6 +657,10 @@ definitions:
|
|||||||
items:
|
items:
|
||||||
type: string
|
type: string
|
||||||
type: array
|
type: array
|
||||||
|
loaded_models:
|
||||||
|
items:
|
||||||
|
$ref: '#/definitions/model.Model'
|
||||||
|
type: array
|
||||||
type: object
|
type: object
|
||||||
schema.TTSRequest:
|
schema.TTSRequest:
|
||||||
description: TTS request body
|
description: TTS request body
|
||||||
|
@ -171,7 +171,7 @@ var _ = Describe("E2E test", func() {
|
|||||||
})
|
})
|
||||||
Context("vision", func() {
|
Context("vision", func() {
|
||||||
It("correctly", func() {
|
It("correctly", func() {
|
||||||
model := "gpt-4-vision-preview"
|
model := "gpt-4o"
|
||||||
resp, err := client.CreateChatCompletion(context.TODO(),
|
resp, err := client.CreateChatCompletion(context.TODO(),
|
||||||
openai.ChatCompletionRequest{
|
openai.ChatCompletionRequest{
|
||||||
Model: model, Messages: []openai.ChatCompletionMessage{
|
Model: model, Messages: []openai.ChatCompletionMessage{
|
||||||
|
Loading…
Reference in New Issue
Block a user