Ettore Di Giacinto bfdc29d316
Some checks are pending
build backend container images / backend-jobs (bark, quay.io/go-skynet/intel-oneapi-base:latest, sycl_f16, ./backend, , , ./backend/Dockerfile.python, linux/amd64, ubuntu-latest, true, -gpu-intel-sycl-f16-bark) (push) Waiting to run
build backend container images / backend-jobs (bark, quay.io/go-skynet/intel-oneapi-base:latest, sycl_f32, ./backend, , , ./backend/Dockerfile.python, linux/amd64, ubuntu-latest, true, -gpu-intel-sycl-f32-bark) (push) Waiting to run
build backend container images / backend-jobs (bark, rocm/dev-ubuntu-22.04:6.1, hipblas, ./backend, , , ./backend/Dockerfile.python, linux/amd64, ubuntu-latest, true, -gpu-rocm-hipblas-bark) (push) Waiting to run
build backend container images / backend-jobs (bark, ubuntu:22.04, , ./, , , ./backend/Dockerfile.go, linux/amd64, ubuntu-latest, true, -bark-cpp) (push) Waiting to run
build backend container images / backend-jobs (bark, ubuntu:22.04, cublas, ./backend, 11, 7, ./backend/Dockerfile.python, linux/amd64, ubuntu-latest, true, -gpu-nvidia-cuda-11-bark) (push) Waiting to run
build backend container images / backend-jobs (bark, ubuntu:22.04, cublas, ./backend, 12, 0, ./backend/Dockerfile.python, linux/amd64, ubuntu-latest, true, -gpu-nvidia-cuda-12-bark) (push) Waiting to run
build backend container images / backend-jobs (chatterbox, ubuntu:22.04, cublas, ./backend, 11, 7, ./backend/Dockerfile.python, linux/amd64, ubuntu-latest, true, -gpu-nvidia-cuda-11-chatterbox) (push) Waiting to run
build backend container images / backend-jobs (chatterbox, ubuntu:22.04, cublas, ./backend, 12, 0, ./backend/Dockerfile.python, linux/amd64, ubuntu-latest, true, -gpu-nvidia-cuda-12-chatterbox) (push) Waiting to run
build backend container images / backend-jobs (coqui, quay.io/go-skynet/intel-oneapi-base:latest, sycl_f16, ./backend, , , ./backend/Dockerfile.python, linux/amd64, ubuntu-latest, true, -gpu-intel-sycl-f16-coqui) (push) Waiting to run
build backend container images / backend-jobs (coqui, quay.io/go-skynet/intel-oneapi-base:latest, sycl_f32, ./backend, , , ./backend/Dockerfile.python, linux/amd64, ubuntu-latest, true, -gpu-intel-sycl-f32-coqui) (push) Waiting to run
build backend container images / backend-jobs (coqui, rocm/dev-ubuntu-22.04:6.1, hipblas, ./backend, , , ./backend/Dockerfile.python, linux/amd64, ubuntu-latest, true, -gpu-rocm-hipblas-coqui) (push) Waiting to run
build backend container images / backend-jobs (coqui, ubuntu:22.04, cublas, ./backend, 11, 7, ./backend/Dockerfile.python, linux/amd64, ubuntu-latest, true, -gpu-nvidia-cuda-11-coqui) (push) Waiting to run
build backend container images / backend-jobs (coqui, ubuntu:22.04, cublas, ./backend, 12, 0, ./backend/Dockerfile.python, linux/amd64, ubuntu-latest, true, -gpu-nvidia-cuda-12-coqui) (push) Waiting to run
build backend container images / backend-jobs (diffusers, quay.io/go-skynet/intel-oneapi-base:latest, sycl_f32, ./backend, , , ./backend/Dockerfile.python, linux/amd64, ubuntu-latest, true, -gpu-intel-sycl-f32-diffusers) (push) Waiting to run
build backend container images / backend-jobs (diffusers, rocm/dev-ubuntu-22.04:6.1, hipblas, ./backend, , , ./backend/Dockerfile.python, linux/amd64, ubuntu-latest, true, -gpu-rocm-hipblas-diffusers) (push) Waiting to run
build backend container images / backend-jobs (diffusers, ubuntu:22.04, cublas, ./backend, 11, 7, ./backend/Dockerfile.python, linux/amd64, ubuntu-latest, true, -gpu-nvidia-cuda-11-diffusers) (push) Waiting to run
build backend container images / backend-jobs (diffusers, ubuntu:22.04, cublas, ./backend, 12, 0, ./backend/Dockerfile.python, linux/amd64, ubuntu-latest, true, -gpu-nvidia-cuda-12-diffusers) (push) Waiting to run
build backend container images / backend-jobs (faster-whisper, quay.io/go-skynet/intel-oneapi-base:latest, sycl_f16, ./backend, , , ./backend/Dockerfile.python, linux/amd64, ubuntu-latest, true, -gpu-intel-sycl-f16-faster-whisper) (push) Waiting to run
build backend container images / backend-jobs (faster-whisper, quay.io/go-skynet/intel-oneapi-base:latest, sycl_f32, ./backend, , , ./backend/Dockerfile.python, linux/amd64, ubuntu-latest, true, -gpu-intel-sycl-f32-faster-whisper) (push) Waiting to run
build backend container images / backend-jobs (faster-whisper, rocm/dev-ubuntu-22.04:6.1, hipblas, ./backend, , , ./backend/Dockerfile.python, linux/amd64, ubuntu-latest, true, -gpu-rocm-hipblas-faster-whisper) (push) Waiting to run
build backend container images / backend-jobs (faster-whisper, ubuntu:22.04, cublas, ./backend, 11, 7, ./backend/Dockerfile.python, linux/amd64, ubuntu-latest, true, -gpu-nvidia-cuda-11-faster-whisper) (push) Waiting to run
build backend container images / backend-jobs (faster-whisper, ubuntu:22.04, cublas, ./backend, 12, 0, ./backend/Dockerfile.python, linux/amd64, ubuntu-latest, true, -gpu-nvidia-cuda-12-faster-whisper) (push) Waiting to run
build backend container images / backend-jobs (kokoro, quay.io/go-skynet/intel-oneapi-base:latest, sycl_f16, ./backend, , , ./backend/Dockerfile.python, linux/amd64, ubuntu-latest, true, -gpu-intel-sycl-f16-kokoro) (push) Waiting to run
build backend container images / backend-jobs (kokoro, quay.io/go-skynet/intel-oneapi-base:latest, sycl_f32, ./backend, , , ./backend/Dockerfile.python, linux/amd64, ubuntu-latest, true, -gpu-intel-sycl-f32-kokoro) (push) Waiting to run
build backend container images / backend-jobs (kokoro, rocm/dev-ubuntu-22.04:6.1, hipblas, ./backend, , , ./backend/Dockerfile.python, linux/amd64, ubuntu-latest, true, -gpu-rocm-hipblas-kokoro) (push) Waiting to run
build backend container images / backend-jobs (kokoro, ubuntu:22.04, cublas, ./backend, 11, 7, ./backend/Dockerfile.python, linux/amd64, ubuntu-latest, true, -gpu-nvidia-cuda-11-kokoro) (push) Waiting to run
build backend container images / backend-jobs (kokoro, ubuntu:22.04, cublas, ./backend, 12, 0, ./backend/Dockerfile.python, linux/amd64, ubuntu-latest, true, -gpu-nvidia-cuda-12-kokoro) (push) Waiting to run
build backend container images / backend-jobs (rerankers, quay.io/go-skynet/intel-oneapi-base:latest, sycl_f16, ./backend, , , ./backend/Dockerfile.python, linux/amd64, ubuntu-latest, true, -gpu-intel-sycl-f16-rerankers) (push) Waiting to run
build backend container images / backend-jobs (rerankers, quay.io/go-skynet/intel-oneapi-base:latest, sycl_f32, ./backend, , , ./backend/Dockerfile.python, linux/amd64, ubuntu-latest, true, -gpu-intel-sycl-f32-rerankers) (push) Waiting to run
build backend container images / backend-jobs (rerankers, rocm/dev-ubuntu-22.04:6.1, hipblas, ./backend, , , ./backend/Dockerfile.python, linux/amd64, ubuntu-latest, true, -gpu-rocm-hipblas-rerankers) (push) Waiting to run
build backend container images / backend-jobs (rerankers, ubuntu:22.04, cublas, ./backend, 11, 7, ./backend/Dockerfile.python, linux/amd64, ubuntu-latest, true, -gpu-nvidia-cuda-11-rerankers) (push) Waiting to run
build backend container images / backend-jobs (rerankers, ubuntu:22.04, cublas, ./backend, 12, 0, ./backend/Dockerfile.python, linux/amd64, ubuntu-latest, true, -gpu-nvidia-cuda-12-rerankers) (push) Waiting to run
build backend container images / backend-jobs (transformers, quay.io/go-skynet/intel-oneapi-base:latest, sycl_f16, ./backend, , , ./backend/Dockerfile.python, linux/amd64, ubuntu-latest, true, -gpu-intel-sycl-f16-transformers) (push) Waiting to run
build backend container images / backend-jobs (transformers, quay.io/go-skynet/intel-oneapi-base:latest, sycl_f32, ./backend, , , ./backend/Dockerfile.python, linux/amd64, ubuntu-latest, true, -gpu-intel-sycl-f32-transformers) (push) Waiting to run
build backend container images / backend-jobs (transformers, rocm/dev-ubuntu-22.04:6.1, hipblas, ./backend, , , ./backend/Dockerfile.python, linux/amd64, ubuntu-latest, true, -gpu-rocm-hipblas-transformers) (push) Waiting to run
build backend container images / backend-jobs (transformers, ubuntu:22.04, cublas, ./backend, 11, 7, ./backend/Dockerfile.python, linux/amd64, ubuntu-latest, true, -gpu-nvidia-cuda-11-transformers) (push) Waiting to run
build backend container images / backend-jobs (transformers, ubuntu:22.04, cublas, ./backend, 12, 0, ./backend/Dockerfile.python, linux/amd64, ubuntu-latest, true, -gpu-nvidia-cuda-12-transformers) (push) Waiting to run
build backend container images / backend-jobs (vllm, quay.io/go-skynet/intel-oneapi-base:latest, sycl_f16, ./backend, , , ./backend/Dockerfile.python, linux/amd64, ubuntu-latest, true, -gpu-intel-sycl-f16-vllm) (push) Waiting to run
build backend container images / backend-jobs (vllm, quay.io/go-skynet/intel-oneapi-base:latest, sycl_f32, ./backend, , , ./backend/Dockerfile.python, linux/amd64, ubuntu-latest, true, -gpu-intel-sycl-f32-vllm) (push) Waiting to run
build backend container images / backend-jobs (vllm, rocm/dev-ubuntu-22.04:6.1, hipblas, ./backend, , , ./backend/Dockerfile.python, linux/amd64, ubuntu-latest, true, -gpu-rocm-hipblas-vllm) (push) Waiting to run
build backend container images / backend-jobs (vllm, ubuntu:22.04, cublas, ./backend, 11, 7, ./backend/Dockerfile.python, linux/amd64, ubuntu-latest, true, -gpu-nvidia-cuda-11-vllm) (push) Waiting to run
build backend container images / backend-jobs (vllm, ubuntu:22.04, cublas, ./backend, 12, 0, ./backend/Dockerfile.python, linux/amd64, ubuntu-latest, true, -gpu-nvidia-cuda-12-vllm) (push) Waiting to run
Explorer deployment / build-linux (push) Waiting to run
GPU tests / ubuntu-latest (1.21.x) (push) Waiting to run
generate and publish intel docker caches / generate_caches (intel/oneapi-basekit:2025.1.0-0-devel-ubuntu22.04, linux/amd64, ubuntu-latest) (push) Waiting to run
build container images / hipblas-jobs (-aio-gpu-hipblas, rocm/dev-ubuntu-22.04:6.1, hipblas, true, ubuntu:22.04, --jobs=3 --output-sync=target, linux/amd64, ubuntu-latest, auto, -gpu-hipblas) (push) Waiting to run
build container images / core-image-build (-aio-cpu, ubuntu:22.04, , true, --jobs=4 --output-sync=target, linux/amd64,linux/arm64, ubuntu-latest, false, auto, ) (push) Waiting to run
build container images / core-image-build (-aio-gpu-intel-f16, quay.io/go-skynet/intel-oneapi-base:latest, sycl_f16, true, ubuntu:22.04, --jobs=3 --output-sync=target, linux/amd64, ubuntu-latest, auto, -gpu-intel-f16) (push) Waiting to run
build container images / core-image-build (-aio-gpu-intel-f32, quay.io/go-skynet/intel-oneapi-base:latest, sycl_f32, true, ubuntu:22.04, --jobs=3 --output-sync=target, linux/amd64, ubuntu-latest, auto, -gpu-intel-f32) (push) Waiting to run
build container images / core-image-build (-aio-gpu-nvidia-cuda-11, ubuntu:22.04, cublas, 11, 7, true, --jobs=4 --output-sync=target, linux/amd64, ubuntu-latest, false, auto, -gpu-nvidia-cuda11) (push) Waiting to run
build container images / core-image-build (-aio-gpu-nvidia-cuda-12, ubuntu:22.04, cublas, 12, 0, true, --jobs=4 --output-sync=target, linux/amd64, ubuntu-latest, false, auto, -gpu-nvidia-cuda12) (push) Waiting to run
build container images / core-image-build (-aio-gpu-vulkan, ubuntu:22.04, vulkan, true, --jobs=4 --output-sync=target, linux/amd64, ubuntu-latest, false, auto, -vulkan) (push) Waiting to run
build container images / gh-runner (nvcr.io/nvidia/l4t-jetpack:r36.4.0, cublas, 12, 0, true, --jobs=4 --output-sync=target, linux/arm64, ubuntu-24.04-arm, true, auto, -nvidia-l4t-arm64) (push) Waiting to run
Security Scan / tests (push) Waiting to run
Tests extras backends / tests-transformers (push) Waiting to run
Tests extras backends / tests-rerankers (push) Waiting to run
Tests extras backends / tests-diffusers (push) Waiting to run
Tests extras backends / tests-coqui (push) Waiting to run
tests / tests-linux (1.21.x) (push) Waiting to run
tests / tests-aio-container (push) Waiting to run
tests / tests-apple (1.21.x) (push) Waiting to run
fix(gallery): correctly show status for downloading OCI images (#5774)
We can't use the mutate.Extract written bytes as current status as that
will be bigger than the compressed image size. Image manifest don't have
any guarantee of the type of artifact (can be compressed or not) when
showing the layer size.

Split the extraction process in two parts: Downloading and extracting as
a flattened system, in this way we can display the status of downloading
and extracting accordingly.

This change also fixes a small nuance in detecting installed backends,
now it's more consistent and looks if a metadata.json and/or a path with
a `run.sh` file is present.

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2025-07-02 08:25:48 +02:00
2023-04-13 01:13:14 +02:00
2025-06-15 14:56:52 +02:00
2025-06-15 14:56:52 +02:00
2024-10-03 20:03:35 +02:00
2023-04-19 18:43:10 +02:00
2025-06-15 14:56:52 +02:00
2025-06-15 14:56:52 +02:00
2025-02-15 18:17:15 +01:00
2025-06-15 14:56:52 +02:00
2025-07-01 12:36:17 +02:00
2023-05-04 15:01:29 +02:00
2024-02-29 19:53:04 +01:00




LocalAI forks LocalAI stars LocalAI pull-requests

LocalAI Docker hub LocalAI Quay.io

Follow LocalAI_API Join LocalAI Discord Community

mudler%2FLocalAI | Trendshift

💡 Get help - FAQ 💭Discussions 💬 Discord 📖 Documentation website

💻 Quickstart 🖼️ Models 🚀 Roadmap 🥽 Demo 🌍 Explorer 🛫 Examples Try on Telegram

testsBuild and Releasebuild container imagesBump dependenciesArtifact Hub

LocalAI is the free, Open Source OpenAI alternative. LocalAI act as a drop-in replacement REST API that's compatible with OpenAI (Elevenlabs, Anthropic... ) API specifications for local AI inferencing. It allows you to run LLMs, generate images, audio (and not only) locally or on-prem with consumer grade hardware, supporting multiple model families. Does not require GPU. It is created and maintained by Ettore Di Giacinto.

📚🆕 Local Stack Family

🆕 LocalAI is now part of a comprehensive suite of AI tools designed to work together:

LocalAGI Logo

LocalAGI

A powerful Local AI agent management platform that serves as a drop-in replacement for OpenAI's Responses API, enhanced with advanced agentic capabilities.

LocalRecall Logo

LocalRecall

A REST-ful API and knowledge base management system that provides persistent memory and storage capabilities for AI agents.

Screenshots

Talk Interface Generate Audio
Screenshot 2025-03-31 at 12-01-36 LocalAI - Talk Screenshot 2025-03-31 at 12-01-29 LocalAI - Generate audio with voice-en-us-ryan-low
Models Overview Generate Images
Screenshot 2025-03-31 at 12-01-20 LocalAI - Models Screenshot 2025-03-31 at 12-31-41 LocalAI - Generate images with flux 1-dev
Chat Interface Home
Screenshot 2025-03-31 at 11-57-44 LocalAI - Chat with localai-functioncall-qwen2 5-7b-v0 5 Screenshot 2025-03-31 at 11-57-23 LocalAI API - c2a39e3 (c2a39e3639227cfd94ffffe9f5691239acc275a8)
Login Swarm
Screenshot 2025-03-31 at 12-09-59 Screenshot 2025-03-31 at 12-10-39 LocalAI - P2P dashboard

💻 Quickstart

Run the installer script:

# Basic installation
curl https://localai.io/install.sh | sh

For more installation options, see Installer Options.

Or run with docker:

CPU only image:

docker run -ti --name local-ai -p 8080:8080 localai/localai:latest

NVIDIA GPU Images:

# CUDA 12.0
docker run -ti --name local-ai -p 8080:8080 --gpus all localai/localai:latest-gpu-nvidia-cuda-12

# CUDA 11.7
docker run -ti --name local-ai -p 8080:8080 --gpus all localai/localai:latest-gpu-nvidia-cuda-11

# NVIDIA Jetson (L4T) ARM64
docker run -ti --name local-ai -p 8080:8080 --gpus all localai/localai:latest-nvidia-l4t-arm64

AMD GPU Images (ROCm):

docker run -ti --name local-ai -p 8080:8080 --device=/dev/kfd --device=/dev/dri --group-add=video localai/localai:latest-gpu-hipblas

Intel GPU Images (oneAPI):

# Intel GPU with FP16 support
docker run -ti --name local-ai -p 8080:8080 --device=/dev/dri/card1 --device=/dev/dri/renderD128 localai/localai:latest-gpu-intel-f16

# Intel GPU with FP32 support
docker run -ti --name local-ai -p 8080:8080 --device=/dev/dri/card1 --device=/dev/dri/renderD128 localai/localai:latest-gpu-intel-f32

Vulkan GPU Images:

docker run -ti --name local-ai -p 8080:8080 localai/localai:latest-gpu-vulkan

AIO Images (pre-downloaded models):

# CPU version
docker run -ti --name local-ai -p 8080:8080 localai/localai:latest-aio-cpu

# NVIDIA CUDA 12 version
docker run -ti --name local-ai -p 8080:8080 --gpus all localai/localai:latest-aio-gpu-nvidia-cuda-12

# NVIDIA CUDA 11 version
docker run -ti --name local-ai -p 8080:8080 --gpus all localai/localai:latest-aio-gpu-nvidia-cuda-11

# Intel GPU version
docker run -ti --name local-ai -p 8080:8080 localai/localai:latest-aio-gpu-intel-f16

# AMD GPU version
docker run -ti --name local-ai -p 8080:8080 --device=/dev/kfd --device=/dev/dri --group-add=video localai/localai:latest-aio-gpu-hipblas

For more information about the AIO images and pre-downloaded models, see Container Documentation.

To load models:

# From the model gallery (see available models with `local-ai models list`, in the WebUI from the model tab, or visiting https://models.localai.io)
local-ai run llama-3.2-1b-instruct:q4_k_m
# Start LocalAI with the phi-2 model directly from huggingface
local-ai run huggingface://TheBloke/phi-2-GGUF/phi-2.Q8_0.gguf
# Install and run a model from the Ollama OCI registry
local-ai run ollama://gemma:2b
# Run a model from a configuration file
local-ai run https://gist.githubusercontent.com/.../phi-2.yaml
# Install and run a model from a standard OCI registry (e.g., Docker Hub)
local-ai run oci://localai/phi-2:latest

For more information, see 💻 Getting started

📰 Latest project news

Roadmap items: List of issues

🚀 Features

🔗 Community and integrations

Build and deploy custom containers:

WebUIs:

Model galleries

Other:

🔗 Resources

📖 🎥 Media, Blogs, Social

Citation

If you utilize this repository, data in a downstream project, please consider citing it with:

@misc{localai,
  author = {Ettore Di Giacinto},
  title = {LocalAI: The free, Open source OpenAI alternative},
  year = {2023},
  publisher = {GitHub},
  journal = {GitHub repository},
  howpublished = {\url{https://github.com/go-skynet/LocalAI}},

❤️ Sponsors

Do you find LocalAI useful?

Support the project by becoming a backer or sponsor. Your logo will show up here with a link to your website.

A huge thank you to our generous sponsors who support this project covering CI expenses, and our Sponsor list:


🌟 Star history

LocalAI Star history Chart

📖 License

LocalAI is a community-driven project created by Ettore Di Giacinto.

MIT - Author Ettore Di Giacinto mudler@localai.io

🙇 Acknowledgements

LocalAI couldn't have been built without the help of great software already available from the community. Thank you!

🤗 Contributors

This is a community project, a special thanks to our contributors! 🤗

Description
🤖 The free, Open Source alternative to OpenAI, Claude and others. Self-hosted and local-first. Drop-in replacement for OpenAI, running on consumer-grade hardware. No GPU required. Runs gguf, transformers, diffusers and many more models architectures. Features: Generate Text, Audio, Video, Images, Voice Cloning, Distributed, P2P inference
Readme MIT 114 MiB
Languages
Go 88.3%
Python 3.2%
JavaScript 2.9%
HTML 2.7%
Makefile 1%
Other 1.8%