View File

@ -2,9 +2,7 @@
name: Bug report
about: Create a report to help us improve
title: ''
labels: bug
assignees: mudler
labels: bug, unconfirmed, up-for-grabs
<!-- Thanks for helping us to improve LocalAI! We welcome all bug reports. Please fill out each area of the template so we can better help you. Comments like this will be hidden when you post but you can delete them if you wish. -->

View File

@ -2,9 +2,7 @@
name: Feature request
about: Suggest an idea for this project
title: ''
labels: enhancement
assignees: mudler
labels: enhancement, up-for-grabs
<!-- Thanks for helping us to improve LocalAI! We welcome all feature requests. Please fill out each area of the template so we can better help you. Comments like this will be hidden when you post but you can delete them if you wish. -->

View File

@ -1,3 +1,6 @@
[submodule "docs/themes/hugo-theme-relearn"]
path = docs/themes/hugo-theme-relearn
url =
[submodule "docs/themes/lotusdocs"]
path = docs/themes/lotusdocs
url =

@ -1,6 +1,6 @@
MIT License
Copyright (c) 2023-2024 Ettore Di Giacinto
Copyright (c) 2023-2024 Ettore Di Giacinto (
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal

View File

@ -0,0 +1,11 @@
"compilerOptions": {
"baseUrl": ".",
"paths": {
"*": [

@ -1,133 +1,178 @@
# this is a required setting for this theme to appear on
# change this to a value appropriate for you; if your site is served from a subdirectory
# set it like ""
baseURL = ""
languageCode = "en-GB"
contentDir = "content"
enableEmoji = true
enableGitInfo = true # N.B. .GitInfo does not currently function with git submodule content directories
# canonicalization will only be used for the sitemap.xml and index.xml files;
# if set to false, a site served from a subdirectory will generate wrong links
# inside of the above mentioned files; if you serve the page from the servers root
# you are free to set the value to false as recommended by the official Hugo documentation
canonifyURLs = true # true -> all relative URLs would instead be canonicalized using baseURL
# required value to serve this page from a webserver AND the file system;
# if you don't want to serve your page from the file system, you can also set this value
# to false
relativeURLs = true # true -> rewrite all relative URLs to be relative to the current content
# if you set uglyURLs to false, this theme will append 'index.html' to any branch bundle link
# so your page can be also served from the file system; if you don't want that,
# set disableExplicitIndexURLs=true in the [params] section
uglyURLs = false # true -> basic/index.html -> basic.html
defaultContentLanguage = 'en'
# the directory where Hugo reads the themes from; this is specific to your
# installation and most certainly needs be deleted or changed
#themesdir = "../.."
# yeah, well, obviously a mandatory setting for your site, if you want to
# use this theme ;-)
theme = "hugo-theme-relearn"
# the main language of this site; also an automatic pirrrate translation is
# available in this showcase
languageCode = "en"
# make sure your defaultContentLanguage is the first one in the [languages]
# array below, as the theme needs to make assumptions on it
defaultContentLanguage = "en"
# the site's title of this showcase; you should change this ;-)
title = "LocalAI Documentation"
# We disable this for testing the exampleSite; you must do so too
# if you want to use the themes parameter disableGeneratorVersion=true;
# otherwise Hugo will create a generator tag on your home page
disableHugoGeneratorInject = true
# add JSON to the home to support Lunr search; This is a mandatory setting
# for the search functionality
# add PRINT to home, section and page to activate the feature to print whole
# chapters
section = ["HTML", "RSS", "PRINT"]
page = ["HTML", "RSS", "PRINT"]
# if `guessSyntax = true`, there will be no unstyled code even if no language
# was given BUT Mermaid and Math codefences will not work anymore! So this is a
# mandatory setting for your site if you want to use Mermaid or Math codefences
guessSyntax = true
defaultMarkdownHandler = "goldmark"
endLevel = 3
startLevel = 1
unsafe = true #
# [markup.highlight]
# codeFences = false # disables Hugo's default syntax highlighting
# [markup.goldmark.parser]
# [markup.goldmark.parser.attribute]
# block = true
# title = true
# here in this showcase we use our own modified chroma syntax highlightning style
# which is imported in theme-relearn-light.css / theme-relearn-dark.css;
# if you want to use a predefined style instead:
# - remove the following `noClasses`
# - set the following `style` to a predefined style name
# - remove the `@import` of the self-defined chroma stylesheet from your CSS files
# (here eg.: theme-relearn-light.css / theme-relearn-dark.css)
noClasses = false
style = "tango"
# activated for this showcase to use HTML and JavaScript; decide on your own needs;
# if in doubt, remove this line
unsafe = true
# allows `hugo server` to display this showcase in IE11; this is used for testing, as we
# are still supporting IE11 - although with degraded experience; if you don't care about
# `hugo server` or browsers of ancient times, fell free to remove this whole block
for = "**.html"
X-UA-Compatible = "IE=edge"
google_fonts = [
["Inter", "300, 400, 600, 700"],
["Fira Code", "500, 700"]
sans_serif_font = "Inter" # Default is System font
secondary_font = "Inter" # Default is System font
mono_font = "Fira Code" # Default is System font
copyright = "© 2023-2024 Ettore Di Giacinto"
version = true # includes git commit info
github = "mudler/LocalAI" # YOUR_GITHUB_ID or YOUR_GITHUB_URL
twitter = "LocalAI_API" # YOUR_TWITTER_ID
dicord = "uJAeKSAGDy"
# instagram = "colinwilson" # YOUR_INSTAGRAM_ID
rss = true # show rss icon with link
[] # Parameters for the /docs 'template'
logo = ""
logo_text = "LocalAI"
title = "LocalAI documentation" # default html title for documentation pages/sections
pathName = "docs" # path name for documentation site | default "docs"
# themeColor = "cyan" # (optional) - Set theme accent colour. Options include: blue (default), green, red, yellow, emerald, cardinal, magenta, cyan
darkMode = true # enable dark mode option? default false
prism = true # enable syntax highlighting via Prism
prismTheme = "solarized-light" # (optional) - Set theme for PrismJS. Options include: lotusdocs (default), solarized-light, twilight, lucario
# gitinfo
repoURL = "" # Git repository URL for your site [support for GitHub, GitLab, and BitBucket]
repoBranch = "master"
editPage = true # enable 'Edit this page' feature - default false
lastMod = true # enable 'Last modified' date on pages - default false
lastModRelative = true # format 'Last modified' time as relative - default true
sidebarIcons = true # enable sidebar icons? default false
breadcrumbs = true # default is true
backToTop = true # enable back-to-top button? default true
# ToC
toc = true # enable table of contents? default is true
tocMobile = true # enable table of contents in mobile view? default is true
scrollSpy = true # enable scrollspy on ToC? default is true
# front matter
descriptions = true # enable front matter descriptions under content title?
titleIcon = true # enable front matter icon title prefix? default is false
# content navigation
navDesc = true # include front matter descriptions in Prev/Next navigation cards
navDescTrunc = 30 # Number of characters by which to truncate the Prev/Next descriptions
listDescTrunc = 100 # Number of characters by which to truncate the list card description
# Link behaviour
intLinkTooltip = true # Enable a tooltip for internal links that displays info about the destination? default false
# extLinkNewTab = false # Open external links in a new Tab? default true
# logoLinkURL = "" # Set a custom URL destination for the top header logo link.
[params.flexsearch] # Parameters for FlexSearch
enabled = true
# tokenize = "full"
# optimize = true
# cache = 100
# minQueryChar = 3 # default is 0 (disabled)
# maxResult = 5 # default is 5
# searchSectionsIndex = []
[params.docsearch] # Parameters for DocSearch
# appID = "" # Algolia Application ID
# apiKey = "" # Algolia Search-Only API (Public) Key
# indexName = "" # Index Name to perform search on (or set env variable HUGO_PARAM_DOCSEARCH_indexName)
[] # Parameters for Analytics (Google, Plausible)
# plausibleURL = "/docs/s" # (or set via env variable HUGO_PARAM_ANALYTICS_plausibleURL)
# plausibleAPI = "/docs/s" # optional - (or set via env variable HUGO_PARAM_ANALYTICS_plausibleAPI)
# plausibleDomain = "" # (or set via env variable HUGO_PARAM_ANALYTICS_plausibleDomain)
# []
# enabled = true
# emoticonTpl = true
# eventDest = ["plausible","google"]
# emoticonEventName = "Feedback"
# positiveEventName = "Positive Feedback"
# negativeEventName = "Negative Feedback"
# positiveFormTitle = "What did you like?"
# negativeFormTitle = "What went wrong?"
# successMsg = "Thank you for helping to improve Lotus Docs' documentation!"
# errorMsg = "Sorry! There was an error while attempting to submit your feedback!"
# positiveForm = [
# ["Accurate", "Accurately describes the feature or option."],
# ["Solved my problem", "Helped me resolve an issue."],
# ["Easy to understand", "Easy to follow and comprehend."],
# ["Something else"]
# ]
# negativeForm = [
# ["Inaccurate", "Doesn't accurately describe the feature or option."],
# ["Couldn't find what I was looking for", "Missing important information."],
# ["Hard to understand", "Too complicated or unclear."],
# ["Code sample errors", "One or more code samples are incorrect."],
# ["Something else"]
# ]
name = "Docs"
url = "docs/"
identifier = "docs"
weight = 10
name = "Discord"
url = ""
identifier = "discord"
weight = 20
# showcase of the menu shortcuts; you can use relative URLs linking
# to your content or use fully-quallified URLs to link outside of
# your project
title = "LocalAI documentation"
weight = 1
languageName = "English"
landingPageName = "<i class='fas fa-home'></i> Home"
name = "<i class='fas fa-home'></i> Home"
url = "/"
weight = 1
name = "<i class='fab fa-fw fa-github'></i> GitHub repo"
identifier = "ds"
url = ""
weight = 10
# []
# title = "LocalAI documentation"
# languageName = "Français"
# contentDir = "content/fr"
# weight = 20
# []
# title = "LocalAI documentation"
# languageName = "Deutsch"
# contentDir = "content/de"
# weight = 30
name = "<i class='fas fa-fw fa-camera'></i> Examples"
url = ""
weight = 11
name = "<i class='fas fa-fw fa-images'></i> Model Gallery"
url = ""
weight = 12
name = "<i class='fas fa-fw fa-download'></i> Container images"
url = ""
weight = 20
# name = "<i class='fas fa-fw fa-bullhorn'></i> Credits"
# url = "more/credits/"
# weight = 30
name = "<i class='fas fa-fw fa-tags'></i> Releases"
url = ""
weight = 40
# mounts are only needed in this showcase to access the publicly available screenshots;
# remove this section if you don't need further mounts
replacements = " -> lotusdocs"
source = 'archetypes'
target = 'archetypes'
@ -152,30 +197,11 @@ disableHugoGeneratorInject = true
source = 'static'
target = 'static'
# settings specific to this theme's features; choose to your likings and
# consult this documentation for explaination
editURL = ""
description = "Documentation for LocalAI"
author = "Ettore Di Giacinto"
showVisitedLinks = true
collapsibleMenu = true
disableBreadcrumb = false
disableInlineCopyToClipBoard = true
disableNextPrev = false
disableLandingPageButton = true
breadcrumbSeparator = ">"
titleSeparator = "::"
themeVariant = [ "auto", "relearn-bright", "relearn-light", "relearn-dark", "learn", "neon", "blue", "green", "red" ]
themeVariantAuto = [ "relearn-light", "relearn-dark" ]
disableSeoHiddenPages = true
# this is to index search for your native language in other languages, too (eg.
# pir in this showcase)
additionalContentLanguage = [ "en" ]
# this is for the stylesheet generator to allow for interactivity in Mermaid
# graphs; you usually will not need it and you should remove this for
# security reasons
mermaidInitialize = "{ \"securityLevel\": \"loose\" }"
mermaidZoom = true
# uncomment line below for temporary local development of module
# or when using a 'theme' as a git submodule
path = ""
disable = false
path = ""
disable = false

@ -1,37 +0,0 @@
disableToc = false
title = "Development documentation"
weight = 7
{{% notice note %}}
This section is for developers and contributors. If you are looking for the user documentation, this is not the right place!
{{% /notice %}}
This section will collect how-to, notes and development documentation
## Contributing
We use conventional commits and semantic versioning. Please follow the [conventional commits]( specification when writing commit messages.
## Creating a gRPC backend
LocalAI backends are `gRPC` servers.
In order to create a new backend you need:
- If there are changes required to the protobuf code, modify the [proto]( file and re-generate the code with `make protogen`.
- Modify the `Makefile` to add your new backend and re-generate the client code with `make protogen` if necessary.
- Create a new `gRPC` server in `extra/grpc` if it's not written in go: [link](, and create the specific implementation.
- Golang `gRPC` servers should be added in the [pkg/backend]( directory given their type. See [piper]( as an example.
- Golang servers needs a respective `cmd/grpc` binary that must be created too, see also [cmd/grpc/piper]( as an example, update also the Makefile accordingly to build the binary during build time.
- Update the Dockerfile: if the backend is written in another language, update the `Dockerfile` default *EXTERNAL_GRPC_BACKENDS* variable by listing the new binary [link](
Once you are done, you can either re-build `LocalAI` with your backend or you can try it out by running the `gRPC` server manually and specifying the host and IP to LocalAI with `--external-grpc-backends` or using (`EXTERNAL_GRPC_BACKENDS` environment variable, comma separated list of `name:host:port` tuples, e.g. `my-awesome-backend:host:port`):
./local-ai --debug --external-grpc-backends "my-awesome-backend:host:port" ...

@ -0,0 +1,11 @@
weight: 20
title: "Advanced"
description: "Advanced usage"
icon: science
lead: ""
date: 2020-10-06T08:49:15+00:00
lastmod: 2020-10-06T08:49:15+00:00
draft: false
images: []

@ -1,8 +1,9 @@
disableToc = false
title = "Advanced"
weight = 6
title = "Advanced usage"
weight = 21
url = '/advanced'
### Advanced configuration with YAML files
@ -309,7 +310,7 @@ prompt_cache_all: true
By default LocalAI will try to autoload the model by trying all the backends. This might work for most of models, but some of the backends are NOT configured to autoload.
The available backends are listed in the [model compatibility table]({{%relref "model-compatibility" %}}).
The available backends are listed in the [model compatibility table]({{%relref "docs/reference/compatibility-table" %}}).
In order to specify a backend for your models, create a model config file in your `models` directory specifying the backend:
@ -343,6 +344,19 @@ Or a remote URI:
./local-ai --debug --external-grpc-backends "my-awesome-backend:host:port"
For example, to start vllm manually after compiling LocalAI (also assuming running the command from the root of the repository):
./local-ai --external-grpc-backends "vllm:$PWD/backend/python/vllm/"
Note that first is is necessary to create the conda environment with:
make -C backend/python/vllm
### Environment variables
When LocalAI runs in a container,
@ -419,11 +433,11 @@ RUN PATH=$PATH:/opt/conda/bin make -C backend/python/diffusers
ENV EXTERNAL_GRPC_BACKENDS="diffusers:/build/backend/python/diffusers/"
{{% notice note %}}
{{% alert note %}}
You can specify remote external backends or path to local files. The syntax is `backend-name:/path/to/backend` or `backend-name:host:port`.
{{% /notice %}}
{{% /alert %}}
#### In runtime

@ -2,12 +2,12 @@
disableToc = false
title = "Fine-tuning LLMs for text generation"
weight = 3
weight = 22
{{% notice note %}}
{{% alert note %}}
Section under construction
{{% /notice %}}
{{% /alert %}}
This section covers how to fine-tune a language model for text generation and consume it in LocalAI.

@ -2,7 +2,8 @@
disableToc = false
title = "FAQ"
weight = 9
weight = 24
icon = "quiz"
## Frequently asked questions
@ -12,25 +13,13 @@ Here are answers to some of the most common questions.
### How do I get models?
Most gguf-based models should work, but newer models may require additions to the API. If a model doesn't work, please feel free to open up issues. However, be cautious about downloading models from the internet and directly onto your machine, as there may be security vulnerabilities in lama.cpp or ggml that could be maliciously exploited. Some models can be found on Hugging Face:, or models from gpt4all are compatible too:
### What's the difference with Serge, or XXX?
LocalAI is a multi-model solution that doesn't focus on a specific model type (e.g., llama.cpp or alpaca.cpp), and it handles all of these internally for faster inference, easy to set up locally and deploy to Kubernetes.
### Everything is slow, how come?
### Everything is slow, how is it possible?
There are few situation why this could occur. Some tips are:
- Don't use HDD to store your models. Prefer SSD over HDD. In case you are stuck with HDD, disable `mmap` in the model config file so it loads everything in memory.
@ -38,61 +27,31 @@ There are few situation why this could occur. Some tips are:
- Run LocalAI with `DEBUG=true`. This gives more information, including stats on the token inference speed.
- Check that you are actually getting an output: run a simple curl request with `"stream": true` to see how fast the model is responding.
### Can I use it with a Discord bot, or XXX?
Yes! If the client uses OpenAI and supports setting a different base URL to send requests to, you can use the LocalAI endpoint. This allows to use this with every application that was supposed to work with OpenAI, but without changing the application!
### Can this leverage GPUs?
There is partial GPU support, see build instructions above.
There is GPU support, see {{%relref "docs/features/GPU-acceleration" %}}.
### Where is the webUI?
There is the availability of localai-webui and chatbot-ui in the examples section and can be setup as per the instructions. However as LocalAI is an API you can already plug it into existing projects that provides are UI interfaces to OpenAI's APIs. There are several already on github, and should be compatible with LocalAI already (as it mimics the OpenAI API)
There is the availability of localai-webui and chatbot-ui in the examples section and can be setup as per the instructions. However as LocalAI is an API you can already plug it into existing projects that provides are UI interfaces to OpenAI's APIs. There are several already on Github, and should be compatible with LocalAI already (as it mimics the OpenAI API)
### Does it work with AutoGPT?
Yes, see the [examples](!
### How can I troubleshoot when something is wrong?
Enable the debug mode by setting `DEBUG=true` in the environment variables. This will give you more information on what's going on.
You can also specify `--debug` in the command line.
### I'm getting 'invalid pitch' error when running with CUDA, what's wrong?
This typically happens when your prompt exceeds the context size. Try to reduce the prompt size, or increase the context size.
### I'm getting a 'SIGILL' error, what's wrong?
Your CPU probably does not have support for certain instructions that are compiled by default in the pre-built binaries. If you are running in a container, try setting `REBUILD=true` and disable the CPU instructions that are not compatible with your CPU. For instance: `CMAKE_ARGS="-DLLAMA_F16C=OFF -DLLAMA_AVX512=OFF -DLLAMA_AVX2=OFF -DLLAMA_FMA=OFF" make build`
Your CPU probably does not have support for certain instructions that are compiled by default in the pre-built binaries. If you are running in a container, try setting `REBUILD=true` and disable the CPU instructions that are not compatible with your CPU. For instance: `CMAKE_ARGS="-DLLAMA_F16C=OFF -DLLAMA_AVX512=OFF -DLLAMA_AVX2=OFF -DLLAMA_FMA=OFF" make build`

@ -1,22 +1,23 @@
disableToc = false
title = "⚡ GPU acceleration"
weight = 2
weight = 9
{{% notice note %}}
{{% alert context="warning" %}}
Section under construction
{{% /notice %}}
{{% /alert %}}
This section contains instruction on how to use LocalAI with GPU acceleration.
{{% notice note %}}
For accelleration for AMD or Metal HW there are no specific container images, see the [build]({{%relref "build/#acceleration" %}})
{{% /notice %}}
{{% alert icon="⚡" context="warning" %}}
For accelleration for AMD or Metal HW there are no specific container images, see the [build]({{%relref "docs/getting-started/build#Acceleration" %}})
{{% /alert %}}
### CUDA(NVIDIA) acceleration
#### Requirements
Requirement: nvidia-container-toolkit (installation instructions [1]( [2](
To check what CUDA version do you need, you can either run `nvidia-smi` or `nvcc --version`.

@ -0,0 +1,7 @@
disableToc = false
title = "Features"
weight = 8
icon = "feature_search"

@ -1,10 +1,12 @@
disableToc = false
title = "🔈 Audio to text"
weight = 2
weight = 16
Audio to text models are models that can generate text from an audio file.

The transcription endpoint allows to convert audio files to text. The endpoint is based on [whisper.cpp](, a C++ library for audio transcription. The endpoint input supports all the audio formats supported by `ffmpeg`.

## Usage
Audio to text models are models that can generate text from an audio file.
The transcription endpoint allows to convert audio files to text. The endpoint is based on [whisper.cpp](, a C++ library for audio transcription. The endpoint input supports all the audio formats supported by `ffmpeg`.
## Usage

@ -2,20 +2,20 @@
disableToc = false
title = "✍️ Constrained grammars"
weight = 6
weight = 15
The chat endpoint accepts an additional `grammar` parameter which takes a [BNF defined grammar](
This allows the LLM to constrain the output to a user-defined schema, allowing to generate `JSON`, `YAML`, and everything that can be defined with a BNF grammar.
{{% notice note %}}
This feature works only with models compatible with the [llama.cpp]( backend (see also [Model compatibility]({{%relref "model-compatibility" %}})). For details on how it works, see the upstream PRs:,
{{% /notice %}}
{{% alert note %}}
This feature works only with models compatible with the [llama.cpp]( backend (see also [Model compatibility]({{%relref "docs/reference/compatibility-table" %}})). For details on how it works, see the upstream PRs:,
{{% /alert %}}
## Setup
Follow the setup instructions from the [LocalAI functions]({{%relref "features/openai-functions" %}}) page.
Follow the setup instructions from the [LocalAI functions]({{%relref "docs/features/openai-functions" %}}) page.
## 💡 Usage example

@ -2,7 +2,7 @@
disableToc = false
title = "🧠 Embeddings"
weight = 2
weight = 13
LocalAI supports generating embeddings for text or list of tokens.
@ -73,7 +73,7 @@ parameters:
The `sentencetransformers` backend uses Python [sentence-transformers]( For a list of all pre-trained models available see here:
{{% notice note %}}
{{% alert note %}}
- The `sentencetransformers` backend is an optional backend of LocalAI and uses Python. If you are running `LocalAI` from the containers you are good to go and should be already configured for use.
- If you are running `LocalAI` manually you must install the python dependencies (`make prepare-extra-conda-environments`). This requires `conda` to be installed.
@ -82,7 +82,7 @@ The `sentencetransformers` backend uses Python [sentence-transformers](https://g
- The `sentencetransformers` backend does support only embeddings of text, and not of tokens. If you need to embed tokens you can use the `bert` backend or `llama.cpp`.
- No models are required to be downloaded before using the `sentencetransformers` backend. The models will be downloaded automatically the first time the API is used.
{{% /notice %}}
{{% /alert %}}
## Llama.cpp embeddings

@ -2,12 +2,12 @@
disableToc = false
title = "🆕 GPT Vision"
weight = 2
weight = 14
{{% notice note %}}
{{% alert note %}}
Available only on `master` builds
{{% /notice %}}
{{% /alert %}}
LocalAI supports understanding images by using [LLaVA](, and implements the [GPT Vision API]( from OpenAI.

@ -2,13 +2,13 @@
disableToc = false
title = "🎨 Image generation"
weight = 2
weight = 12
(Generated with [AnimagineXL](
LocalAI supports generating images with Stable diffusion, running on CPU using a C++ implementation, [Stable-Diffusion-NCNN]( ([binding]( and [🧨 Diffusers]({{%relref "model-compatibility/diffusers" %}}).
LocalAI supports generating images with Stable diffusion, running on CPU using C++ and Python implementations.

## Usage
## Usage
@ -35,7 +35,9 @@ curl http://localhost:8080/v1/images/generations -H "Content-Type: application/j
## stablediffusion-cpp
## Backends
### stablediffusion-cpp
| mode=0 | mode=1 (winograd/sgemm) |
@ -45,7 +47,7 @@ curl http://localhost:8080/v1/images/generations -H "Content-Type: application/j
Note: image generator supports images up to 512x512. You can use other tools however to upscale the image, for instance:
### Setup
#### Setup
Note: In order to use the `images/generation` endpoint with the `stablediffusion` C++ backend, you need to build LocalAI with `GO_TAGS=stablediffusion`. If you are using the container images, it is already enabled.
@ -128,11 +130,14 @@ models
{{< /tabs >}}
## Diffusers
### Diffusers
This is an extra backend - in the container is already available and there is nothing to do for the setup.
[Diffusers]( is the go-to library for state-of-the-art pretrained diffusion models for generating images, audio, and even 3D structures of molecules. LocalAI has a diffusers backend which allows image generation using the `diffusers` library.
### Model setup
(Generated with [AnimagineXL](
#### Model setup
The models will be downloaded the first time you use the backend from `huggingface` automatically.
@ -150,3 +155,198 @@ diffusers:
cuda: false # Enable for GPU usage (CUDA)
scheduler_type: euler_a
#### Dependencies
This is an extra backend - in the container is already available and there is nothing to do for the setup. Do not use *core* images (ending with `-core`). If you are building manually, see the [build instructions]({{%relref "docs/getting-started/build" %}}).
#### Model setup
Create a model configuration file in the `models` directory, for instance to use `Linaqruf/animagine-xl` with CPU:
name: animagine-xl
model: Linaqruf/animagine-xl
backend: diffusers
cuda: true
f16: true
scheduler_type: euler_a
#### Local models
You can also use local models, or modify some parameters like `clip_skip`, `scheduler_type`, for instance:
name: stablediffusion
model: toonyou_beta6.safetensors
backend: diffusers
step: 30
f16: true
cuda: true
pipeline_type: StableDiffusionPipeline
enable_parameters: "negative_prompt,num_inference_steps,clip_skip"
scheduler_type: "k_dpmpp_sde"
cfg_scale: 8
clip_skip: 11
#### Configuration parameters
The following parameters are available in the configuration file:
| Parameter | Description | Default |
| --- | --- | --- |
| `f16` | Force the usage of `float16` instead of `float32` | `false` |
| `step` | Number of steps to run the model for | `30` |
| `cuda` | Enable CUDA acceleration | `false` |
| `enable_parameters` | Parameters to enable for the model | `negative_prompt,num_inference_steps,clip_skip` |
| `scheduler_type` | Scheduler type | `k_dpp_sde` |
| `cfg_scale` | Configuration scale | `8` |
| `clip_skip` | Clip skip | None |
| `pipeline_type` | Pipeline type | `AutoPipelineForText2Image` |
There are available several types of schedulers:
| Scheduler | Description |
| --- | --- |
| `ddim` | DDIM |
| `pndm` | PNDM |
| `heun` | Heun |
| `unipc` | UniPC |
| `euler` | Euler |
| `euler_a` | Euler a |
| `lms` | LMS |
| `k_lms` | LMS Karras |
| `dpm_2` | DPM2 |
| `k_dpm_2` | DPM2 Karras |
| `dpm_2_a` | DPM2 a |
| `k_dpm_2_a` | DPM2 a Karras |
| `dpmpp_2m` | DPM++ 2M |
| `k_dpmpp_2m` | DPM++ 2M Karras |
| `dpmpp_sde` | DPM++ SDE |
| `k_dpmpp_sde` | DPM++ SDE Karras |
| `dpmpp_2m_sde` | DPM++ 2M SDE |
| `k_dpmpp_2m_sde` | DPM++ 2M SDE Karras |
Pipelines types available:
| Pipeline type | Description |
| --- | --- |
| `StableDiffusionPipeline` | Stable diffusion pipeline |
| `StableDiffusionImg2ImgPipeline` | Stable diffusion image to image pipeline |
| `StableDiffusionDepth2ImgPipeline` | Stable diffusion depth to image pipeline |
| `DiffusionPipeline` | Diffusion pipeline |
| `StableDiffusionXLPipeline` | Stable diffusion XL pipeline |
#### Usage
#### Text to Image
Use the `image` generation endpoint with the `model` name from the configuration file:
curl http://localhost:8080/v1/images/generations \
-H "Content-Type: application/json" \
-d '{
"prompt": "<positive prompt>|<negative prompt>",
"model": "animagine-xl",
"step": 51,
"size": "1024x1024"
#### Image to Image
An example model (GPU):
name: stablediffusion-edit
model: nitrosocke/Ghibli-Diffusion
backend: diffusers
step: 25
cuda: true
f16: true
pipeline_type: StableDiffusionImg2ImgPipeline
enable_parameters: "negative_prompt,num_inference_steps,image"
(echo -n '{"file": "'; base64 $IMAGE_PATH; echo '", "prompt": "a sky background","size": "512x512","model":"stablediffusion-edit"}') |
curl -H "Content-Type: application/json" -d @- http://localhost:8080/v1/images/generations
#### Depth to Image
name: stablediffusion-depth
model: stabilityai/stable-diffusion-2-depth
backend: diffusers
step: 50
# Force CPU usage
f16: true
cuda: true
pipeline_type: StableDiffusionDepth2ImgPipeline
enable_parameters: "negative_prompt,num_inference_steps,image"
cfg_scale: 6
(echo -n '{"file": "'; base64 ~/path/to/image.jpeg; echo '", "prompt": "a sky background","size": "512x512","model":"stablediffusion-depth"}') |
curl -H "Content-Type: application/json" -d @- http://localhost:8080/v1/images/generations
#### img2vid
name: img2vid
model: stabilityai/stable-video-diffusion-img2vid
backend: diffusers
step: 25
# Force CPU usage
f16: true
cuda: true
pipeline_type: StableVideoDiffusionPipeline
(echo -n '{"file": "","size": "512x512","model":"img2vid"}') |
curl -H "Content-Type: application/json" -X POST -d @- http://localhost:8080/v1/images/generations
#### txt2vid
name: txt2vid
model: damo-vilab/text-to-video-ms-1.7b
backend: diffusers
step: 25
# Force CPU usage
f16: true
cuda: true
pipeline_type: VideoDiffusionPipeline
cuda: true
(echo -n '{"prompt": "spiderman surfing","size": "512x512","model":"txt2vid"}') |
curl -H "Content-Type: application/json" -X POST -d @- http://localhost:8080/v1/images/generations

@ -2,7 +2,9 @@
disableToc = false
title = "🖼️ Model gallery"
weight = 7
weight = 18
url = '/models'
<h1 align="center">
@ -15,13 +17,13 @@ The model gallery is a (experimental!) collection of models configurations for [
LocalAI to ease out installations of models provide a way to preload models on start and downloading and installing them in runtime. You can install models manually by copying them over the `models` directory, or use the API to configure, download and verify the model assets for you. As the UI is still a work in progress, you will find here the documentation about the API Endpoints.
{{% notice note %}}
{{% alert note %}}
The models in this gallery are not directly maintained by LocalAI. If you find a model that is not working, please open an issue on the model gallery repository.
{{% /notice %}}
{{% /alert %}}
{{% notice note %}}
{{% alert note %}}
GPT and text generation models might have a license which is not permissive for commercial use or might be questionable or without any license at all. Please check the model license before using it. The official gallery contains only open licensed models.
{{% /notice %}}
{{% /alert %}}
## Useful Links and resources
@ -48,7 +50,7 @@ GALLERIES=[{"name":"model-gallery", "url":"github:go-skynet/model-gallery/index.
where `github:go-skynet/model-gallery/index.yaml` will be expanded automatically to ``.
{{% notice note %}}
{{% alert note %}}
As this feature is experimental, you need to run `local-ai` with a list of `GALLERIES`. Currently there are two galleries:
@ -63,19 +65,19 @@ GALLERIES=[{"name":"model-gallery", "url":"github:go-skynet/model-gallery/index.
If running with `docker-compose`, simply edit the `.env` file and uncomment the `GALLERIES` variable, and add the one you want to use.
{{% /notice %}}
{{% /alert %}}
{{% notice note %}}
{{% alert note %}}
You might not find all the models in this gallery. Automated CI updates the gallery automatically. You can find however most of the models on huggingface (, generally it should be available `~24h` after upload.
By under any circumstances LocalAI and any developer is not responsible for the models in this gallery, as CI is just indexing them and providing a convenient way to install with an automatic configuration with a consistent API. Don't install models from authors you don't trust, and, check the appropriate license for your use case. Models are automatically indexed and hosted on huggingface ( For any issue with the models, please open an issue on the model gallery repository if it's a LocalAI misconfiguration, otherwise refer to the huggingface repository. If you think a model should not be listed, please reach to us and we will remove it from the gallery.
{{% /notice %}}
{{% /alert %}}
{{% notice note %}}
{{% alert note %}}
There is no documentation yet on how to build a gallery or a repository - but you can find an example in the [model-gallery]( repository.
{{% /notice %}}
{{% /alert %}}
### List Models
@ -117,7 +119,7 @@ where:
- `bert-embeddings` is the model name in the gallery
(read its [config here](
{{% notice note %}}
{{% alert note %}}
If the `huggingface` model gallery is enabled (it's enabled by default),
and the model has an entry in the model gallery's associated YAML config
(for `huggingface`, see [`model-gallery/huggingface.yaml`](,
@ -132,7 +134,7 @@ curl $LOCALAI/models/apply -H "Content-Type: application/json" -d '{
Note that the `id` can be used similarly when pre-loading models at start.
{{% /notice %}}
{{% /alert %}}
## How to install a model (without a gallery)
@ -217,7 +219,7 @@ YAML:
{{% notice note %}}
{{% alert note %}}
You can find already some open licensed models in the [model gallery](
@ -241,7 +243,7 @@ curl $LOCALAI/models/apply -H "Content-Type: application/json" -d '{
{{% /notice %}}
{{% /alert %}}
## Installing a model with a different name

@ -2,7 +2,7 @@
disableToc = false
title = "🔥 OpenAI functions"
weight = 2
weight = 17
LocalAI supports running OpenAI functions with `llama.cpp` compatible models.
@ -67,13 +67,13 @@ response = openai.ChatCompletion.create(
# ...
{{% notice note %}}
{{% alert note %}}
When running the python script, be sure to:
- Set `OPENAI_API_KEY` environment variable to a random string (the OpenAI api key is NOT required!)
- Set `OPENAI_API_BASE` to point to your LocalAI service, for example `OPENAI_API_BASE=http://localhost:8080`
{{% /notice %}}
{{% /alert %}}
## Advanced

@ -0,0 +1,263 @@
disableToc = false
title = "📖 Text generation (GPT)"
weight = 10
LocalAI supports generating text with GPT with `llama.cpp` and other backends (such as `rwkv.cpp` as ) see also the [Model compatibility]({{%relref "docs/reference/compatibility-table" %}}) for an up-to-date list of the supported model families.
- You can also specify the model name as part of the OpenAI token.
- If only one model is available, the API will use it for all the requests.
## API Reference
### Chat completions
For example, to generate a chat completion, you can send a POST request to the `/v1/chat/completions` endpoint with the instruction as the request body:
curl http://localhost:8080/v1/chat/completions -H "Content-Type: application/json" -d '{
"model": "ggml-koala-7b-model-q4_0-r2.bin",
"messages": [{"role": "user", "content": "Say this is a test!"}],
"temperature": 0.7
Available additional parameters: `top_p`, `top_k`, `max_tokens`
### Edit completions
To generate an edit completion you can send a POST request to the `/v1/edits` endpoint with the instruction as the request body:
curl http://localhost:8080/v1/edits -H "Content-Type: application/json" -d '{
"model": "ggml-koala-7b-model-q4_0-r2.bin",
"instruction": "rephrase",
"input": "Black cat jumped out of the window",
"temperature": 0.7
Available additional parameters: `top_p`, `top_k`, `max_tokens`.
### Completions
To generate a completion, you can send a POST request to the `/v1/completions` endpoint with the instruction as per the request body:
curl http://localhost:8080/v1/completions -H "Content-Type: application/json" -d '{
"model": "ggml-koala-7b-model-q4_0-r2.bin",
"prompt": "A long time ago in a galaxy far, far away",
"temperature": 0.7
Available additional parameters: `top_p`, `top_k`, `max_tokens`
### List models
You can list all the models available with:
curl http://localhost:8080/v1/models
## Backends
### AutoGPTQ
[AutoGPTQ]( is an easy-to-use LLMs quantization package with user-friendly apis, based on GPTQ algorithm.
#### Prerequisites
This is an extra backend - in the container images is already available and there is nothing to do for the setup.
If you are building LocalAI locally, you need to install [AutoGPTQ manually](
#### Model setup
The models are automatically downloaded from `huggingface` if not present the first time. It is possible to define models via `YAML` config file, or just by querying the endpoint with the `huggingface` repository model name. For example, create a `YAML` config file in `models/`:
name: orca
backend: autogptq
model_base_name: ""
model: "TheBloke/orca_mini_v2_13b-GPTQ"
# ...
Test with:
curl http://localhost:8080/v1/chat/completions -H "Content-Type: application/json" -d '{
"model": "orca",
"messages": [{"role": "user", "content": "How are you?"}],
"temperature": 0.1
### RWKV
A full example on how to run a rwkv model is in the [examples](
Note: rwkv models needs to specify the backend `rwkv` in the YAML config files and have an associated tokenizer along that needs to be provided with it:
36464540 -rw-r--r-- 1 mudler mudler 1.2G May 3 10:51 rwkv_small
36464543 -rw-r--r-- 1 mudler mudler 2.4M May 3 10:51 rwkv_small.tokenizer.json
### llama.cpp
[llama.cpp]( is a popular port of Facebook's LLaMA model in C/C++.
{{% alert note %}}
The `ggml` file format has been deprecated. If you are using `ggml` models and you are configuring your model with a YAML file, specify, use the `llama-ggml` backend instead. If you are relying in automatic detection of the model, you should be fine. For `gguf` models, use the `llama` backend. The go backend is deprecated as well but still available as `go-llama`. The go backend supports still features not available in the mainline: speculative sampling and embeddings.
{{% /alert %}}
#### Features
The `llama.cpp` model supports the following features:
- [📖 Text generation (GPT)]({{%relref "docs/features/text-generation" %}})
- [🧠 Embeddings]({{%relref "docs/features/embeddings" %}})
- [🔥 OpenAI functions]({{%relref "docs/features/openai-functions" %}})
- [✍️ Constrained grammars]({{%relref "docs/features/constrained_grammars" %}})
#### Setup
LocalAI supports `llama.cpp` models out of the box. You can use the `llama.cpp` model in the same way as any other model.
##### Manual setup
It is sufficient to copy the `ggml` or `gguf` model files in the `models` folder. You can refer to the model in the `model` parameter in the API calls.
[You can optionally create an associated YAML]({{%relref "docs/advanced" %}}) model config file to tune the model's parameters or apply a template to the prompt.
Prompt templates are useful for models that are fine-tuned towards a specific prompt.
##### Automatic setup
LocalAI supports model galleries which are indexes of models. For instance, the huggingface gallery contains a large curated index of models from the huggingface model hub for `ggml` or `gguf` models.
For instance, if you have the galleries enabled and LocalAI already running, you can just start chatting with models in huggingface by running:
curl http://localhost:8080/v1/chat/completions -H "Content-Type: application/json" -d '{
"model": "TheBloke/WizardLM-13B-V1.2-GGML/wizardlm-13b-v1.2.ggmlv3.q2_K.bin",
"messages": [{"role": "user", "content": "Say this is a test!"}],
"temperature": 0.1
LocalAI will automatically download and configure the model in the `model` directory.
Models can be also preloaded or downloaded on demand. To learn about model galleries, check out the [model gallery documentation]({{%relref "docs/features/model-gallery" %}}).
#### YAML configuration
To use the `llama.cpp` backend, specify `llama` as the backend in the YAML file:
name: llama
backend: llama
# Relative to the models path
model: file.gguf.bin
In the example above we specify `llama` as the backend to restrict loading `gguf` models only.
For instance, to use the `llama-ggml` backend for `ggml` models:
name: llama
backend: llama-ggml
# Relative to the models path
model: file.ggml.bin
#### Reference
- [llama](
- [binding](
### exllama/2
[Exllama]( is a "A more memory-efficient rewrite of the HF transformers implementation of Llama for use with quantized weights". Both `exllama` and `exllama2` are supported.
#### Model setup
Download the model as a folder inside the `model ` directory and create a YAML file specifying the `exllama` backend. For instance with the `TheBloke/WizardLM-7B-uncensored-GPTQ` model:
$ git lfs install
$ cd models && git clone
$ ls models/
.keep WizardLM-7B-uncensored-GPTQ/ exllama.yaml
$ cat models/exllama.yaml
name: exllama
model: WizardLM-7B-uncensored-GPTQ
backend: exllama
# Note: you can also specify "exllama2" if it's an exllama2 model here
# ...
Test with:
curl http://localhost:8080/v1/chat/completions -H "Content-Type: application/json" -d '{
"model": "exllama",
"messages": [{"role": "user", "content": "How are you?"}],
"temperature": 0.1
### vLLM
[vLLM]( is a fast and easy-to-use library for LLM inference.
LocalAI has a built-in integration with vLLM, and it can be used to run models. You can check out `vllm` performance [here](
#### Setup
Create a YAML file for the model you want to use with `vllm`.
To setup a model, you need to just specify the model name in the YAML config file:
name: vllm
backend: vllm
model: "facebook/opt-125m"
# Decomment to specify a quantization method (optional)
# quantization: "awq"
The backend will automatically download the required files in order to run the model.
#### Usage
Use the `completions` endpoint by specifying the `vllm` backend:
curl http://localhost:8080/v1/completions -H "Content-Type: application/json" -d '{
"model": "vllm",
"prompt": "Hello, my name is",
"temperature": 0.1, "top_p": 0.1

@ -0,0 +1,158 @@
disableToc = false
title = "🗣 Text to audio (TTS)"
weight = 11
The `/tts` endpoint can be used to generate speech from text.

## Usage
## Usage
Input: `input`, `model`
For example, to generate an audio file, you can send a POST request to the `/tts` endpoint with the instruction as the request body:
curl http://localhost:8080/tts -H "Content-Type: application/json" -d '{
"input": "Hello world",
"model": "tts"
Returns an `audio/wav` file.
## Backends
### 🐸 Coqui
Required: Don't use `LocalAI` images ending with the `-core` tag,. Python dependencies are required in order to use this backend.
Coqui works without any configuration, to test it, you can run the following curl command:
curl http://localhost:8080/tts -H "Content-Type: application/json" -d '{
"backend": "coqui",
"model": "tts_models/en/ljspeech/glow-tts",
"input":"Hello, this is a test!"
### Bark
[Bark]( allows to generate audio from text prompts.
This is an extra backend - in the container is already available and there is nothing to do for the setup.
#### Model setup
There is nothing to be done for the model setup. You can already start to use bark. The models will be downloaded the first time you use the backend.
#### Usage
Use the `tts` endpoint by specifying the `bark` backend:
curl http://localhost:8080/tts -H "Content-Type: application/json" -d '{
"backend": "bark",
}' | aplay
To specify a voice from ( ), use the `model` parameter:
curl http://localhost:8080/tts -H "Content-Type: application/json" -d '{
"backend": "bark",
"model": "v2/en_speaker_4"
}' | aplay
### Piper
To install the `piper` audio models manually:
- Download Voices from
- Extract the `.tar.tgz` files (.onnx,.json) inside `models`
- Run the following command to test the model is working
To use the tts endpoint, run the following command. You can specify a backend with the `backend` parameter. For example, to use the `piper` backend:
curl http://localhost:8080/tts -H "Content-Type: application/json" -d '{
"backend": "piper",
"input": "Ciao, sono Ettore"
}' | aplay
- `aplay` is a Linux command. You can use other tools to play the audio file.
- The model name is the filename with the extension.
- The model name is case sensitive.
- LocalAI must be compiled with the `GO_TAGS=tts` flag.
### Transformers-musicgen
LocalAI also has experimental support for `transformers-musicgen` for the generation of short musical compositions. Currently, this is implemented via the same requests used for text to speech:
curl --request POST \
--url http://localhost:8080/tts \
--header 'Content-Type: application/json' \
--data '{
"backend": "transformers-musicgen",
"model": "facebook/musicgen-medium",
"input": "Cello Rave"
}' | aplay
Future versions of LocalAI will expose additional control over audio generation beyond the text prompt.
### Vall-E-X
[VALL-E-X]( is an open source implementation of Microsoft's VALL-E X zero-shot TTS model.
#### Setup
The backend will automatically download the required files in order to run the model.
This is an extra backend - in the container is already available and there is nothing to do for the setup. If you are building manually, you need to install Vall-E-X manually first.
#### Usage
Use the tts endpoint by specifying the vall-e-x backend:
curl http://localhost:8080/tts -H "Content-Type: application/json" -d '{
"backend": "vall-e-x",
}' | aplay
#### Voice cloning
In order to use voice cloning capabilities you must create a `YAML` configuration file to setup a model:
name: cloned-voice
backend: vall-e-x
model: "cloned-voice"
# The path to the audio file to be cloned
# relative to the models directory
audio_path: "path-to-wav-source.wav"