mirror of
https://github.com/mudler/LocalAI.git
synced 2024-12-18 20:27:57 +00:00
feat(aio): entrypoint, update workflows (#1872)
This commit is contained in:
parent
743095b7d8
commit
abc9360dc6
5
.github/workflows/image.yml
vendored
5
.github/workflows/image.yml
vendored
@ -26,6 +26,7 @@ jobs:
|
|||||||
platforms: ${{ matrix.platforms }}
|
platforms: ${{ matrix.platforms }}
|
||||||
runs-on: ${{ matrix.runs-on }}
|
runs-on: ${{ matrix.runs-on }}
|
||||||
base-image: ${{ matrix.base-image }}
|
base-image: ${{ matrix.base-image }}
|
||||||
|
aio: ${{ matrix.aio }}
|
||||||
makeflags: "-j3"
|
makeflags: "-j3"
|
||||||
secrets:
|
secrets:
|
||||||
dockerUsername: ${{ secrets.DOCKERHUB_USERNAME }}
|
dockerUsername: ${{ secrets.DOCKERHUB_USERNAME }}
|
||||||
@ -86,6 +87,7 @@ jobs:
|
|||||||
image-type: 'extras'
|
image-type: 'extras'
|
||||||
runs-on: 'arc-runner-set'
|
runs-on: 'arc-runner-set'
|
||||||
base-image: "ubuntu:22.04"
|
base-image: "ubuntu:22.04"
|
||||||
|
aio: "-aio-gpu-nvidia-cuda-11"
|
||||||
- build-type: 'cublas'
|
- build-type: 'cublas'
|
||||||
cuda-major-version: "12"
|
cuda-major-version: "12"
|
||||||
cuda-minor-version: "1"
|
cuda-minor-version: "1"
|
||||||
@ -96,6 +98,7 @@ jobs:
|
|||||||
image-type: 'extras'
|
image-type: 'extras'
|
||||||
runs-on: 'arc-runner-set'
|
runs-on: 'arc-runner-set'
|
||||||
base-image: "ubuntu:22.04"
|
base-image: "ubuntu:22.04"
|
||||||
|
aio: "-aio-gpu-nvidia-cuda-12"
|
||||||
- build-type: ''
|
- build-type: ''
|
||||||
#platforms: 'linux/amd64,linux/arm64'
|
#platforms: 'linux/amd64,linux/arm64'
|
||||||
platforms: 'linux/amd64'
|
platforms: 'linux/amd64'
|
||||||
@ -199,6 +202,7 @@ jobs:
|
|||||||
cuda-minor-version: ${{ matrix.cuda-minor-version }}
|
cuda-minor-version: ${{ matrix.cuda-minor-version }}
|
||||||
platforms: ${{ matrix.platforms }}
|
platforms: ${{ matrix.platforms }}
|
||||||
runs-on: ${{ matrix.runs-on }}
|
runs-on: ${{ matrix.runs-on }}
|
||||||
|
aio: ${{ matrix.aio }}
|
||||||
base-image: ${{ matrix.base-image }}
|
base-image: ${{ matrix.base-image }}
|
||||||
makeflags: "-j3"
|
makeflags: "-j3"
|
||||||
secrets:
|
secrets:
|
||||||
@ -217,6 +221,7 @@ jobs:
|
|||||||
image-type: 'core'
|
image-type: 'core'
|
||||||
base-image: "ubuntu:22.04"
|
base-image: "ubuntu:22.04"
|
||||||
runs-on: 'ubuntu-latest'
|
runs-on: 'ubuntu-latest'
|
||||||
|
aio: "-aio-cpu"
|
||||||
- build-type: 'cublas'
|
- build-type: 'cublas'
|
||||||
cuda-major-version: "11"
|
cuda-major-version: "11"
|
||||||
cuda-minor-version: "7"
|
cuda-minor-version: "7"
|
||||||
|
68
.github/workflows/image_build.yml
vendored
68
.github/workflows/image_build.yml
vendored
@ -51,6 +51,11 @@ on:
|
|||||||
required: false
|
required: false
|
||||||
default: ''
|
default: ''
|
||||||
type: string
|
type: string
|
||||||
|
aio:
|
||||||
|
description: 'AIO Image Name'
|
||||||
|
required: false
|
||||||
|
default: ''
|
||||||
|
type: string
|
||||||
secrets:
|
secrets:
|
||||||
dockerUsername:
|
dockerUsername:
|
||||||
required: true
|
required: true
|
||||||
@ -129,7 +134,30 @@ jobs:
|
|||||||
flavor: |
|
flavor: |
|
||||||
latest=${{ inputs.tag-latest }}
|
latest=${{ inputs.tag-latest }}
|
||||||
suffix=${{ inputs.tag-suffix }}
|
suffix=${{ inputs.tag-suffix }}
|
||||||
|
- name: Docker meta AIO (quay.io)
|
||||||
|
if: inputs.aio != ''
|
||||||
|
id: meta_aio
|
||||||
|
uses: docker/metadata-action@v5
|
||||||
|
with:
|
||||||
|
images: |
|
||||||
|
quay.io/go-skynet/local-ai
|
||||||
|
tags: |
|
||||||
|
type=ref,event=branch
|
||||||
|
type=semver,pattern={{raw}}
|
||||||
|
flavor: |
|
||||||
|
suffix=${{ inputs.aio }}
|
||||||
|
- name: Docker meta AIO (dockerhub)
|
||||||
|
if: inputs.aio != ''
|
||||||
|
id: meta_aio_dockerhub
|
||||||
|
uses: docker/metadata-action@v5
|
||||||
|
with:
|
||||||
|
images: |
|
||||||
|
localai/localai
|
||||||
|
tags: |
|
||||||
|
type=ref,event=branch
|
||||||
|
type=semver,pattern={{raw}}
|
||||||
|
flavor: |
|
||||||
|
suffix=${{ inputs.aio }}
|
||||||
- name: Set up QEMU
|
- name: Set up QEMU
|
||||||
uses: docker/setup-qemu-action@master
|
uses: docker/setup-qemu-action@master
|
||||||
with:
|
with:
|
||||||
@ -172,6 +200,44 @@ jobs:
|
|||||||
push: ${{ github.event_name != 'pull_request' }}
|
push: ${{ github.event_name != 'pull_request' }}
|
||||||
tags: ${{ steps.meta.outputs.tags }}
|
tags: ${{ steps.meta.outputs.tags }}
|
||||||
labels: ${{ steps.meta.outputs.labels }}
|
labels: ${{ steps.meta.outputs.labels }}
|
||||||
|
-
|
||||||
|
name: Inspect image
|
||||||
|
if: github.event_name != 'pull_request'
|
||||||
|
run: |
|
||||||
|
docker pull localai/localai:${{ steps.meta.outputs.version }}
|
||||||
|
docker image inspect localai/localai:${{ steps.meta.outputs.version }}
|
||||||
|
docker pull quay.io/go-skynet/local-ai:${{ steps.meta.outputs.version }}
|
||||||
|
docker image inspect quay.io/go-skynet/local-ai:${{ steps.meta.outputs.version }}
|
||||||
|
- name: Build and push AIO image
|
||||||
|
if: inputs.aio != ''
|
||||||
|
uses: docker/build-push-action@v5
|
||||||
|
with:
|
||||||
|
builder: ${{ steps.buildx.outputs.name }}
|
||||||
|
build-args: |
|
||||||
|
BASE_IMAGE=quay.io/go-skynet/local-ai:${{ steps.meta.outputs.version }}
|
||||||
|
context: .
|
||||||
|
file: ./Dockerfile.aio
|
||||||
|
platforms: ${{ inputs.platforms }}
|
||||||
|
push: ${{ github.event_name != 'pull_request' }}
|
||||||
|
tags: ${{ steps.meta_aio.outputs.tags }}
|
||||||
|
labels: ${{ steps.meta_aio.outputs.labels }}
|
||||||
|
- name: Build and push AIO image (dockerhub)
|
||||||
|
if: inputs.aio != ''
|
||||||
|
uses: docker/build-push-action@v5
|
||||||
|
with:
|
||||||
|
builder: ${{ steps.buildx.outputs.name }}
|
||||||
|
build-args: |
|
||||||
|
BASE_IMAGE=localai/localai:${{ steps.meta_aio_dockerhub.outputs.version }}
|
||||||
|
context: .
|
||||||
|
file: ./Dockerfile.aio
|
||||||
|
platforms: ${{ inputs.platforms }}
|
||||||
|
push: ${{ github.event_name != 'pull_request' }}
|
||||||
|
tags: ${{ steps.meta_aio_dockerhub.outputs.tags }}
|
||||||
|
labels: ${{ steps.meta_aio_dockerhub.outputs.labels }}
|
||||||
- name: job summary
|
- name: job summary
|
||||||
run: |
|
run: |
|
||||||
echo "Built image: ${{ steps.meta.outputs.labels }}" >> $GITHUB_STEP_SUMMARY
|
echo "Built image: ${{ steps.meta.outputs.labels }}" >> $GITHUB_STEP_SUMMARY
|
||||||
|
- name: job summary(AIO)
|
||||||
|
if: inputs.aio != ''
|
||||||
|
run: |
|
||||||
|
echo "Built image: ${{ steps.meta_aio.outputs.labels }}" >> $GITHUB_STEP_SUMMARY
|
@ -1,9 +1,8 @@
|
|||||||
ARG BASE_IMAGE=ubuntu:22.04
|
ARG BASE_IMAGE=ubuntu:22.04
|
||||||
|
|
||||||
FROM ${BASE_IMAGE}
|
FROM ${BASE_IMAGE}
|
||||||
ARG SIZE=cpu
|
|
||||||
ENV MODELS="/aio-models/embeddings.yaml,/aio-models/text-to-speech.yaml,/aio-models/image-gen.yaml,/aio-models/text-to-text.yaml,/aio-models/speech-to-text.yaml,/aio-models/vision.yaml"
|
|
||||||
|
|
||||||
COPY aio/${SIZE} /aio-models
|
RUN apt-get update && apt-get install -y pciutils && apt-get clean
|
||||||
|
|
||||||
ENTRYPOINT [ "/build/entrypoint.sh" ]
|
COPY aio/ /aio
|
||||||
|
ENTRYPOINT [ "/aio/entrypoint.sh" ]
|
5
Makefile
5
Makefile
@ -536,7 +536,6 @@ grpcs: prepare $(GRPC_BACKENDS)
|
|||||||
|
|
||||||
DOCKER_IMAGE?=local-ai
|
DOCKER_IMAGE?=local-ai
|
||||||
DOCKER_AIO_IMAGE?=local-ai-aio
|
DOCKER_AIO_IMAGE?=local-ai-aio
|
||||||
DOCKER_AIO_SIZE?=cpu
|
|
||||||
IMAGE_TYPE?=core
|
IMAGE_TYPE?=core
|
||||||
BASE_IMAGE?=ubuntu:22.04
|
BASE_IMAGE?=ubuntu:22.04
|
||||||
|
|
||||||
@ -549,11 +548,9 @@ docker:
|
|||||||
-t $(DOCKER_IMAGE) .
|
-t $(DOCKER_IMAGE) .
|
||||||
|
|
||||||
docker-aio:
|
docker-aio:
|
||||||
@echo "Building AIO image with size $(DOCKER_AIO_SIZE)"
|
@echo "Building AIO image with base $(BASE_IMAGE) as $(DOCKER_AIO_IMAGE)"
|
||||||
@echo "Building AIO image with base image $(BASE_IMAGE)"
|
|
||||||
docker build \
|
docker build \
|
||||||
--build-arg BASE_IMAGE=$(BASE_IMAGE) \
|
--build-arg BASE_IMAGE=$(BASE_IMAGE) \
|
||||||
--build-arg SIZE=$(DOCKER_AIO_SIZE) \
|
|
||||||
-t $(DOCKER_AIO_IMAGE) -f Dockerfile.aio .
|
-t $(DOCKER_AIO_IMAGE) -f Dockerfile.aio .
|
||||||
|
|
||||||
docker-aio-all:
|
docker-aio-all:
|
||||||
|
5
aio/cpu/README.md
Normal file
5
aio/cpu/README.md
Normal file
@ -0,0 +1,5 @@
|
|||||||
|
## AIO CPU size
|
||||||
|
|
||||||
|
Use this image with CPU-only.
|
||||||
|
|
||||||
|
Please keep using only C++ backends so the base image is as small as possible (without CUDA, cuDNN, python, etc).
|
@ -1,13 +1,18 @@
|
|||||||
name: all-minilm-l6-v2
|
backend: bert-embeddings
|
||||||
backend: sentencetransformers
|
|
||||||
embeddings: true
|
embeddings: true
|
||||||
|
f16: true
|
||||||
|
|
||||||
|
gpu_layers: 90
|
||||||
|
mmap: true
|
||||||
|
name: text-embedding-ada-002
|
||||||
|
|
||||||
parameters:
|
parameters:
|
||||||
model: all-MiniLM-L6-v2
|
model: huggingface://mudler/all-MiniLM-L6-v2/ggml-model-q4_0.bin
|
||||||
|
|
||||||
usage: |
|
usage: |
|
||||||
You can test this model with curl like this:
|
You can test this model with curl like this:
|
||||||
|
|
||||||
curl http://localhost:8080/embeddings -X POST -H "Content-Type: application/json" -d '{
|
curl http://localhost:8080/embeddings -X POST -H "Content-Type: application/json" -d '{
|
||||||
"input": "Your text string goes here",
|
"input": "Your text string goes here",
|
||||||
"model": "all-minilm-l6-v2"
|
"model": "text-embedding-ada-002"
|
||||||
}'
|
}'
|
98
aio/entrypoint.sh
Executable file
98
aio/entrypoint.sh
Executable file
@ -0,0 +1,98 @@
|
|||||||
|
#!/bin/bash
|
||||||
|
|
||||||
|
echo "===> LocalAI All-in-One (AIO) container starting..."
|
||||||
|
|
||||||
|
GPU_ACCELERATION=false
|
||||||
|
GPU_VENDOR=""
|
||||||
|
|
||||||
|
function detect_gpu() {
|
||||||
|
case "$(uname -s)" in
|
||||||
|
Linux)
|
||||||
|
if lspci | grep -E 'VGA|3D' | grep -iq nvidia; then
|
||||||
|
echo "NVIDIA GPU detected"
|
||||||
|
# nvidia-smi should be installed in the container
|
||||||
|
if nvidia-smi; then
|
||||||
|
GPU_ACCELERATION=true
|
||||||
|
GPU_VENDOR=nvidia
|
||||||
|
else
|
||||||
|
echo "NVIDIA GPU detected, but nvidia-smi is not installed. GPU acceleration will not be available."
|
||||||
|
fi
|
||||||
|
elif lspci | grep -E 'VGA|3D' | grep -iq amd; then
|
||||||
|
echo "AMD GPU detected"
|
||||||
|
# Check if ROCm is installed
|
||||||
|
if [ -d /opt/rocm ]; then
|
||||||
|
GPU_ACCELERATION=true
|
||||||
|
GPU_VENDOR=amd
|
||||||
|
else
|
||||||
|
echo "AMD GPU detected, but ROCm is not installed. GPU acceleration will not be available."
|
||||||
|
fi
|
||||||
|
elif lspci | grep -E 'VGA|3D' | grep -iq intel; then
|
||||||
|
echo "Intel GPU detected"
|
||||||
|
if [ -d /opt/intel ]; then
|
||||||
|
GPU_ACCELERATION=true
|
||||||
|
else
|
||||||
|
echo "Intel GPU detected, but Intel GPU drivers are not installed. GPU acceleration will not be available."
|
||||||
|
fi
|
||||||
|
fi
|
||||||
|
;;
|
||||||
|
Darwin)
|
||||||
|
if system_profiler SPDisplaysDataType | grep -iq 'Metal'; then
|
||||||
|
echo "Apple Metal supported GPU detected"
|
||||||
|
GPU_ACCELERATION=true
|
||||||
|
GPU_VENDOR=apple
|
||||||
|
fi
|
||||||
|
;;
|
||||||
|
esac
|
||||||
|
}
|
||||||
|
|
||||||
|
function detect_gpu_size() {
|
||||||
|
if [ "$GPU_ACCELERATION" = true ]; then
|
||||||
|
GPU_SIZE=gpu-8g
|
||||||
|
fi
|
||||||
|
|
||||||
|
# Attempting to find GPU memory size for NVIDIA GPUs
|
||||||
|
if echo "$gpu_model" | grep -iq nvidia; then
|
||||||
|
echo "NVIDIA GPU detected. Attempting to find memory size..."
|
||||||
|
nvidia_sm=($(nvidia-smi --query-gpu=memory.total --format=csv,noheader,nounits))
|
||||||
|
if [ ! -z "$nvidia_sm" ]; then
|
||||||
|
echo "Total GPU Memory: ${nvidia_sm[0]} MiB"
|
||||||
|
else
|
||||||
|
echo "Unable to determine NVIDIA GPU memory size."
|
||||||
|
fi
|
||||||
|
# if bigger than 8GB, use 16GB
|
||||||
|
#if [ "$nvidia_sm" -gt 8192 ]; then
|
||||||
|
# GPU_SIZE=gpu-16g
|
||||||
|
#fi
|
||||||
|
else
|
||||||
|
echo "Non-NVIDIA GPU detected. GPU memory size detection for non-NVIDIA GPUs is not supported in this script."
|
||||||
|
fi
|
||||||
|
|
||||||
|
# default to cpu if GPU_SIZE is not set
|
||||||
|
if [ -z "$GPU_SIZE" ]; then
|
||||||
|
GPU_SIZE=cpu
|
||||||
|
fi
|
||||||
|
}
|
||||||
|
|
||||||
|
function check_vars() {
|
||||||
|
if [ -z "$MODELS" ]; then
|
||||||
|
echo "MODELS environment variable is not set. Please set it to a comma-separated list of model YAML files to load."
|
||||||
|
exit 1
|
||||||
|
fi
|
||||||
|
|
||||||
|
if [ -z "$SIZE" ]; then
|
||||||
|
echo "SIZE environment variable is not set. Please set it to one of the following: cpu, gpu-8g, gpu-16g, apple"
|
||||||
|
exit 1
|
||||||
|
fi
|
||||||
|
}
|
||||||
|
|
||||||
|
detect_gpu
|
||||||
|
detect_gpu_size
|
||||||
|
|
||||||
|
SIZE=${SIZE:-$GPU_SIZE} # default to cpu
|
||||||
|
MODELS=${MODELS:-/aio/${SIZE}/embeddings.yaml,/aio/${SIZE}/text-to-speech.yaml,/aio/${SIZE}/image-gen.yaml,/aio/${SIZE}/text-to-text.yaml,/aio/${SIZE}/speech-to-text.yaml,/aio/${SIZE}/vision.yaml}
|
||||||
|
|
||||||
|
check_vars
|
||||||
|
|
||||||
|
echo "Starting LocalAI with the following models: $MODELS"
|
||||||
|
|
||||||
|
/build/entrypoint.sh "$@"
|
@ -1,4 +1,4 @@
|
|||||||
name: all-minilm-l6-v2
|
name: text-embedding-ada-002
|
||||||
backend: sentencetransformers
|
backend: sentencetransformers
|
||||||
embeddings: true
|
embeddings: true
|
||||||
parameters:
|
parameters:
|
||||||
@ -9,5 +9,5 @@ usage: |
|
|||||||
|
|
||||||
curl http://localhost:8080/embeddings -X POST -H "Content-Type: application/json" -d '{
|
curl http://localhost:8080/embeddings -X POST -H "Content-Type: application/json" -d '{
|
||||||
"input": "Your text string goes here",
|
"input": "Your text string goes here",
|
||||||
"model": "all-minilm-l6-v2"
|
"model": "text-embedding-ada-002"
|
||||||
}'
|
}'
|
@ -4,7 +4,7 @@ parameters:
|
|||||||
backend: diffusers
|
backend: diffusers
|
||||||
step: 25
|
step: 25
|
||||||
f16: true
|
f16: true
|
||||||
cuda: true
|
|
||||||
diffusers:
|
diffusers:
|
||||||
pipeline_type: StableDiffusionPipeline
|
pipeline_type: StableDiffusionPipeline
|
||||||
cuda: true
|
cuda: true
|
||||||
|
Loading…
Reference in New Issue
Block a user