chore: drop embedded models (#4715)
Some checks are pending
Explorer deployment / build-linux (push) Waiting to run
GPU tests / ubuntu-latest (1.21.x) (push) Waiting to run
generate and publish GRPC docker caches / generate_caches (ubuntu:22.04, linux/amd64,linux/arm64, ubuntu-latest) (push) Waiting to run
generate and publish intel docker caches / generate_caches (intel/oneapi-basekit:2025.0.0-0-devel-ubuntu22.04, linux/amd64, ubuntu-latest) (push) Waiting to run
build container images / hipblas-jobs (-aio-gpu-hipblas, rocm/dev-ubuntu-22.04:6.1, hipblas, true, ubuntu:22.04, extras, latest-gpu-hipblas, latest-aio-gpu-hipblas, --jobs=3 --output-sync=target, linux/amd64, arc-runner-set, auto, -hipblas-ffmpeg) (push) Waiting to run
build container images / hipblas-jobs (rocm/dev-ubuntu-22.04:6.1, hipblas, false, ubuntu:22.04, core, --jobs=3 --output-sync=target, linux/amd64, arc-runner-set, false, -hipblas-core) (push) Waiting to run
build container images / hipblas-jobs (rocm/dev-ubuntu-22.04:6.1, hipblas, false, ubuntu:22.04, extras, --jobs=3 --output-sync=target, linux/amd64, arc-runner-set, false, -hipblas) (push) Waiting to run
build container images / hipblas-jobs (rocm/dev-ubuntu-22.04:6.1, hipblas, true, ubuntu:22.04, core, --jobs=3 --output-sync=target, linux/amd64, arc-runner-set, false, -hipblas-ffmpeg-core) (push) Waiting to run
build container images / self-hosted-jobs (-aio-gpu-intel-f16, quay.io/go-skynet/intel-oneapi-base:latest, sycl_f16, true, ubuntu:22.04, extras, latest-gpu-intel-f16, latest-aio-gpu-intel-f16, --jobs=3 --output-sync=target, linux/amd64, arc-runner-set, auto, -sycl-f16-ffmpeg) (push) Waiting to run
build container images / self-hosted-jobs (-aio-gpu-intel-f32, quay.io/go-skynet/intel-oneapi-base:latest, sycl_f32, true, ubuntu:22.04, extras, latest-gpu-intel-f32, latest-aio-gpu-intel-f32, --jobs=3 --output-sync=target, linux/amd64, arc-runner-set, auto, -sycl-f32-ffmpeg) (push) Waiting to run
build container images / self-hosted-jobs (-aio-gpu-nvidia-cuda-11, ubuntu:22.04, cublas, 11, 7, true, extras, latest-gpu-nvidia-cuda-11, latest-aio-gpu-nvidia-cuda-11, --jobs=3 --output-sync=target, linux/amd64, arc-runner-set, auto, -cublas-cuda11-ffmpeg) (push) Waiting to run
build container images / self-hosted-jobs (-aio-gpu-nvidia-cuda-12, ubuntu:22.04, cublas, 12, 0, true, extras, latest-gpu-nvidia-cuda-12, latest-aio-gpu-nvidia-cuda-12, --jobs=3 --output-sync=target, linux/amd64, arc-runner-set, auto, -cublas-cuda12-ffmpeg) (push) Waiting to run
build container images / self-hosted-jobs (quay.io/go-skynet/intel-oneapi-base:latest, sycl_f16, false, ubuntu:22.04, core, --jobs=3 --output-sync=target, linux/amd64, arc-runner-set, false, -sycl-f16-core) (push) Waiting to run
build container images / self-hosted-jobs (quay.io/go-skynet/intel-oneapi-base:latest, sycl_f16, true, ubuntu:22.04, core, --jobs=3 --output-sync=target, linux/amd64, arc-runner-set, false, -sycl-f16-ffmpeg-core) (push) Waiting to run
build container images / self-hosted-jobs (quay.io/go-skynet/intel-oneapi-base:latest, sycl_f32, false, ubuntu:22.04, core, --jobs=3 --output-sync=target, linux/amd64, arc-runner-set, false, -sycl-f32-core) (push) Waiting to run
build container images / self-hosted-jobs (quay.io/go-skynet/intel-oneapi-base:latest, sycl_f32, true, ubuntu:22.04, core, --jobs=3 --output-sync=target, linux/amd64, arc-runner-set, false, -sycl-f32-ffmpeg-core) (push) Waiting to run
build container images / self-hosted-jobs (ubuntu:22.04, , , extras, --jobs=3 --output-sync=target, linux/amd64, arc-runner-set, auto, ) (push) Waiting to run
build container images / self-hosted-jobs (ubuntu:22.04, , true, extras, --jobs=3 --output-sync=target, linux/amd64, arc-runner-set, auto, -ffmpeg) (push) Waiting to run
build container images / self-hosted-jobs (ubuntu:22.04, cublas, 11, 7, , extras, --jobs=3 --output-sync=target, linux/amd64, arc-runner-set, false, -cublas-cuda11) (push) Waiting to run
build container images / self-hosted-jobs (ubuntu:22.04, cublas, 12, 0, , extras, --jobs=3 --output-sync=target, linux/amd64, arc-runner-set, false, -cublas-cuda12) (push) Waiting to run
build container images / core-image-build (-aio-cpu, ubuntu:22.04, , true, core, latest-cpu, latest-aio-cpu, --jobs=4 --output-sync=target, linux/amd64,linux/arm64, arc-runner-set, false, auto, -ffmpeg-core) (push) Waiting to run
build container images / core-image-build (ubuntu:22.04, cublas, 11, 7, , core, --jobs=4 --output-sync=target, linux/amd64, arc-runner-set, false, false, -cublas-cuda11-core) (push) Waiting to run
build container images / core-image-build (ubuntu:22.04, cublas, 11, 7, true, core, --jobs=4 --output-sync=target, linux/amd64, arc-runner-set, false, false, -cublas-cuda11-ffmpeg-core) (push) Waiting to run
build container images / core-image-build (ubuntu:22.04, cublas, 12, 0, , core, --jobs=4 --output-sync=target, linux/amd64, arc-runner-set, false, false, -cublas-cuda12-core) (push) Waiting to run
build container images / core-image-build (ubuntu:22.04, cublas, 12, 0, true, core, --jobs=4 --output-sync=target, linux/amd64, arc-runner-set, false, false, -cublas-cuda12-ffmpeg-core) (push) Waiting to run
build container images / core-image-build (ubuntu:22.04, vulkan, true, core, latest-vulkan-ffmpeg-core, --jobs=4 --output-sync=target, linux/amd64, arc-runner-set, false, false, -vulkan-ffmpeg-core) (push) Waiting to run
build container images / gh-runner (nvcr.io/nvidia/l4t-jetpack:r36.4.0, cublas, 12, 0, true, core, latest-nvidia-l4t-arm64-core, --jobs=4 --output-sync=target, linux/arm64, ubuntu-24.04-arm, true, false, -nvidia-l4t-arm64-core) (push) Waiting to run
Security Scan / tests (push) Waiting to run
Tests extras backends / tests-transformers (push) Waiting to run
Tests extras backends / tests-rerankers (push) Waiting to run
Tests extras backends / tests-diffusers (push) Waiting to run
Tests extras backends / tests-coqui (push) Waiting to run
tests / tests-linux (1.21.x) (push) Waiting to run
tests / tests-aio-container (push) Waiting to run
tests / tests-apple (1.21.x) (push) Waiting to run

Since the remote gallery was introduced this is now completely
superseded by it. In order to keep the code clean and remove redudant
parts let's simplify the usage.

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
This commit is contained in:
Ettore Di Giacinto 2025-01-30 00:03:01 +01:00 committed by GitHub
parent 1656e1a88e
commit 72e52c4f6a
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
39 changed files with 8 additions and 986 deletions

View File

@ -861,7 +861,7 @@ swagger:
.PHONY: gen-assets
gen-assets:
$(GOCMD) run core/dependencies_manager/manager.go embedded/webui_static.yaml core/http/static/assets
$(GOCMD) run core/dependencies_manager/manager.go webui_static.yaml core/http/static/assets
## Documentation
docs/layouts/_default:

View File

@ -62,7 +62,7 @@ func New(opts ...config.AppOption) (*Application, error) {
}
}
if err := pkgStartup.InstallModels(options.Galleries, options.ModelLibraryURL, options.ModelPath, options.EnforcePredownloadScans, nil, options.ModelsURL...); err != nil {
if err := pkgStartup.InstallModels(options.Galleries, options.ModelPath, options.EnforcePredownloadScans, nil, options.ModelsURL...); err != nil {
log.Error().Err(err).Msg("error installing models")
}

View File

@ -100,7 +100,7 @@ func (mi *ModelsInstall) Run(ctx *cliContext.Context) error {
log.Info().Str("model", modelName).Str("license", model.License).Msg("installing model")
}
err = startup.InstallModels(galleries, "", mi.ModelsPath, !mi.DisablePredownloadScan, progressCallback, modelName)
err = startup.InstallModels(galleries, mi.ModelsPath, !mi.DisablePredownloadScan, progressCallback, modelName)
if err != nil {
return err
}

View File

@ -32,7 +32,6 @@ type RunCMD struct {
Galleries string `env:"LOCALAI_GALLERIES,GALLERIES" help:"JSON list of galleries" group:"models" default:"${galleries}"`
AutoloadGalleries bool `env:"LOCALAI_AUTOLOAD_GALLERIES,AUTOLOAD_GALLERIES" group:"models"`
RemoteLibrary string `env:"LOCALAI_REMOTE_LIBRARY,REMOTE_LIBRARY" default:"${remoteLibraryURL}" help:"A LocalAI remote library URL" group:"models"`
PreloadModels string `env:"LOCALAI_PRELOAD_MODELS,PRELOAD_MODELS" help:"A List of models to apply in JSON at start" group:"models"`
Models []string `env:"LOCALAI_MODELS,MODELS" help:"A List of model configuration URLs to load" group:"models"`
PreloadModelsConfig string `env:"LOCALAI_PRELOAD_MODELS_CONFIG,PRELOAD_MODELS_CONFIG" help:"A List of models to apply at startup. Path to a YAML config file" group:"models"`
@ -90,7 +89,6 @@ func (r *RunCMD) Run(ctx *cliContext.Context) error {
config.WithDynamicConfigDirPollInterval(r.LocalaiConfigDirPollInterval),
config.WithF16(r.F16),
config.WithStringGalleries(r.Galleries),
config.WithModelLibraryURL(r.RemoteLibrary),
config.WithCors(r.CORS),
config.WithCorsAllowOrigins(r.CORSAllowOrigins),
config.WithCsrf(r.CSRF),

View File

@ -44,8 +44,6 @@ type ApplicationConfig struct {
DisableGalleryEndpoint bool
LoadToMemory []string
ModelLibraryURL string
Galleries []Gallery
BackendAssets embed.FS
@ -126,12 +124,6 @@ func WithP2PToken(s string) AppOption {
}
}
func WithModelLibraryURL(url string) AppOption {
return func(o *ApplicationConfig) {
o.ModelLibraryURL = url
}
}
func WithLibPath(path string) AppOption {
return func(o *ApplicationConfig) {
o.LibPath = path

View File

@ -129,7 +129,7 @@ func (g *GalleryService) Start(c context.Context, cl *config.BackendConfigLoader
if op.GalleryModelName != "" {
err = gallery.InstallModelFromGallery(op.Galleries, op.GalleryModelName, g.appConfig.ModelPath, op.Req, progressCallback, g.appConfig.EnforcePredownloadScans)
} else if op.ConfigURL != "" {
err = startup.InstallModels(op.Galleries, op.ConfigURL, g.appConfig.ModelPath, g.appConfig.EnforcePredownloadScans, progressCallback, op.ConfigURL)
err = startup.InstallModels(op.Galleries, g.appConfig.ModelPath, g.appConfig.EnforcePredownloadScans, progressCallback, op.ConfigURL)
if err != nil {
updateError(err)
continue

View File

@ -1,126 +0,0 @@
+++
disableToc = false
title = "Run other Models"
weight = 23
icon = "rocket_launch"
+++
## Running other models
> _Do you have already a model file? Skip to [Run models manually]({{%relref "docs/getting-started/models" %}})_.
To load models into LocalAI, you can either [use models manually]({{%relref "docs/getting-started/models" %}}) or configure LocalAI to pull the models from external sources, like Huggingface and configure the model.
To do that, you can point LocalAI to an URL to a YAML configuration file - however - LocalAI does also have some popular model configuration embedded in the binary as well. Below you can find a list of the models configuration that LocalAI has pre-built, see [Model customization]({{%relref "docs/getting-started/customize-model" %}}) on how to configure models from URLs.
There are different categories of models: [LLMs]({{%relref "docs/features/text-generation" %}}), [Multimodal LLM]({{%relref "docs/features/gpt-vision" %}}) , [Embeddings]({{%relref "docs/features/embeddings" %}}), [Audio to Text]({{%relref "docs/features/audio-to-text" %}}), and [Text to Audio]({{%relref "docs/features/text-to-audio" %}}) depending on the backend being used and the model architecture.
{{% alert icon="💡" %}}
To customize the models, see [Model customization]({{%relref "docs/getting-started/customize-model" %}}). For more model configurations, visit the [Examples Section](https://github.com/mudler/LocalAI-examples/tree/main/configurations) and the configurations for the models below is available [here](https://github.com/mudler/LocalAI/tree/master/embedded/models).
{{% /alert %}}
{{< tabs tabTotal="3" >}}
{{% tab tabName="CPU-only" %}}
> 💡Don't need GPU acceleration? use the CPU images which are lighter and do not have Nvidia dependencies
| Model | Category | Docker command |
| --- | --- | --- |
| [phi-2](https://huggingface.co/microsoft/phi-2) | [LLM]({{%relref "docs/features/text-generation" %}}) | ```docker run -ti -p 8080:8080 localai/localai:{{< version >}}-ffmpeg-core phi-2``` |
| 🌋 [bakllava](https://github.com/SkunkworksAI/BakLLaVA) | [Multimodal LLM]({{%relref "docs/features/gpt-vision" %}}) | ```docker run -ti -p 8080:8080 localai/localai:{{< version >}}-ffmpeg-core bakllava``` |
| 🌋 [llava-1.5](https://llava-vl.github.io/) | [Multimodal LLM]({{%relref "docs/features/gpt-vision" %}}) | ```docker run -ti -p 8080:8080 localai/localai:{{< version >}}-ffmpeg-core llava-1.5``` |
| 🌋 [llava-1.6-mistral](https://huggingface.co/cjpais/llava-1.6-mistral-7b-gguf) | [Multimodal LLM]({{%relref "docs/features/gpt-vision" %}}) | ```docker run -ti -p 8080:8080 localai/localai:{{< version >}}-ffmpeg-core llava-1.6-mistral``` |
| 🌋 [llava-1.6-vicuna](https://huggingface.co/cmp-nct/llava-1.6-gguf) | [Multimodal LLM]({{%relref "docs/features/gpt-vision" %}}) | ```docker run -ti -p 8080:8080 localai/localai:{{< version >}}-ffmpeg-core llava-1.6-vicuna``` |
| [mistral-openorca](https://huggingface.co/Open-Orca/Mistral-7B-OpenOrca) | [LLM]({{%relref "docs/features/text-generation" %}}) | ```docker run -ti -p 8080:8080 localai/localai:{{< version >}}-ffmpeg-core mistral-openorca``` |
| [bert-cpp](https://github.com/skeskinen/bert.cpp) | [Embeddings]({{%relref "docs/features/embeddings" %}}) | ```docker run -ti -p 8080:8080 localai/localai:{{< version >}}-ffmpeg-core bert-cpp``` |
| [all-minilm-l6-v2](https://huggingface.co/sentence-transformers/all-MiniLM-L6-v2) | [Embeddings]({{%relref "docs/features/embeddings" %}}) | ```docker run -ti -p 8080:8080 localai/localai:{{< version >}}-ffmpeg all-minilm-l6-v2``` |
| whisper-base | [Audio to Text]({{%relref "docs/features/audio-to-text" %}}) | ```docker run -ti -p 8080:8080 localai/localai:{{< version >}}-ffmpeg-core whisper-base``` |
| rhasspy-voice-en-us-amy | [Text to Audio]({{%relref "docs/features/text-to-audio" %}}) | ```docker run -ti -p 8080:8080 localai/localai:{{< version >}}-ffmpeg-core rhasspy-voice-en-us-amy``` |
| 🐸 [coqui](https://github.com/coqui-ai/TTS) | [Text to Audio]({{%relref "docs/features/text-to-audio" %}}) | ```docker run -ti -p 8080:8080 localai/localai:{{< version >}}-ffmpeg coqui``` |
| 🐶 [bark](https://github.com/suno-ai/bark) | [Text to Audio]({{%relref "docs/features/text-to-audio" %}}) | ```docker run -ti -p 8080:8080 localai/localai:{{< version >}}-ffmpeg bark``` |
| 🔊 [vall-e-x](https://github.com/Plachtaa/VALL-E-X) | [Text to Audio]({{%relref "docs/features/text-to-audio" %}}) | ```docker run -ti -p 8080:8080 localai/localai:{{< version >}}-ffmpeg vall-e-x``` |
| mixtral-instruct Mixtral-8x7B-Instruct-v0.1 | [LLM]({{%relref "docs/features/text-generation" %}}) | ```docker run -ti -p 8080:8080 localai/localai:{{< version >}}-ffmpeg-core mixtral-instruct``` |
| [tinyllama-chat](https://huggingface.co/TheBloke/TinyLlama-1.1B-Chat-v0.3-GGUF) [original model](https://huggingface.co/TinyLlama/TinyLlama-1.1B-Chat-v0.3) | [LLM]({{%relref "docs/features/text-generation" %}}) | ```docker run -ti -p 8080:8080 localai/localai:{{< version >}}-ffmpeg-core tinyllama-chat``` |
| [dolphin-2.5-mixtral-8x7b](https://huggingface.co/TheBloke/dolphin-2.5-mixtral-8x7b-GGUF) | [LLM]({{%relref "docs/features/text-generation" %}}) | ```docker run -ti -p 8080:8080 localai/localai:{{< version >}}-ffmpeg-core dolphin-2.5-mixtral-8x7b``` |
| 🐍 [mamba](https://github.com/state-spaces/mamba) | [LLM]({{%relref "docs/features/text-generation" %}}) | GPU-only |
| animagine-xl | [Text to Image]({{%relref "docs/features/image-generation" %}}) | GPU-only |
| transformers-tinyllama | [LLM]({{%relref "docs/features/text-generation" %}}) | GPU-only |
| [codellama-7b](https://huggingface.co/codellama/CodeLlama-7b-hf) (with transformers) | [LLM]({{%relref "docs/features/text-generation" %}}) | GPU-only |
| [codellama-7b-gguf](https://huggingface.co/TheBloke/CodeLlama-7B-GGUF) (with llama.cpp) | [LLM]({{%relref "docs/features/text-generation" %}}) | ```docker run -ti -p 8080:8080 localai/localai:{{< version >}}-ffmpeg-core codellama-7b-gguf``` |
| [hermes-2-pro-mistral](https://huggingface.co/NousResearch/Hermes-2-Pro-Mistral-7B-GGUF) | [LLM]({{%relref "docs/features/text-generation" %}}) | ```docker run -ti -p 8080:8080 localai/localai:{{< version >}}-ffmpeg-core hermes-2-pro-mistral``` |
{{% /tab %}}
{{% tab tabName="GPU (CUDA 11)" %}}
> To know which version of CUDA do you have available, you can check with `nvidia-smi` or `nvcc --version` see also [GPU acceleration]({{%relref "docs/features/gpu-acceleration" %}}).
| Model | Category | Docker command |
| --- | --- | --- |
| [phi-2](https://huggingface.co/microsoft/phi-2) | [LLM]({{%relref "docs/features/text-generation" %}}) | ```docker run -ti -p 8080:8080 --gpus all localai/localai:{{< version >}}-cublas-cuda11-core phi-2``` |
| 🌋 [bakllava](https://github.com/SkunkworksAI/BakLLaVA) | [Multimodal LLM]({{%relref "docs/features/gpt-vision" %}}) | ```docker run -ti -p 8080:8080 --gpus all localai/localai:{{< version >}}-cublas-cuda11-core bakllava``` |
| 🌋 [llava-1.5](https://llava-vl.github.io/) | [Multimodal LLM]({{%relref "docs/features/gpt-vision" %}}) | ```docker run -ti -p 8080:8080 localai/localai:{{< version >}}-cublas-cuda11-core llava-1.5``` |
| 🌋 [llava-1.6-mistral](https://huggingface.co/cjpais/llava-1.6-mistral-7b-gguf) | [Multimodal LLM]({{%relref "docs/features/gpt-vision" %}}) | ```docker run -ti -p 8080:8080 localai/localai:{{< version >}}-cublas-cuda11-core llava-1.6-mistral``` |
| 🌋 [llava-1.6-vicuna](https://huggingface.co/cmp-nct/llava-1.6-gguf) | [Multimodal LLM]({{%relref "docs/features/gpt-vision" %}}) | ```docker run -ti -p 8080:8080 localai/localai:{{< version >}}-cublas-cuda11-core llava-1.6-vicuna``` |
| [mistral-openorca](https://huggingface.co/Open-Orca/Mistral-7B-OpenOrca) | [LLM]({{%relref "docs/features/text-generation" %}}) | ```docker run -ti -p 8080:8080 --gpus all localai/localai:{{< version >}}-cublas-cuda11-core mistral-openorca``` |
| [bert-cpp](https://github.com/skeskinen/bert.cpp) | [Embeddings]({{%relref "docs/features/embeddings" %}}) | ```docker run -ti -p 8080:8080 --gpus all localai/localai:{{< version >}}-cublas-cuda11-core bert-cpp``` |
| [all-minilm-l6-v2](https://huggingface.co/sentence-transformers/all-MiniLM-L6-v2) | [Embeddings]({{%relref "docs/features/embeddings" %}}) | ```docker run -ti -p 8080:8080 --gpus all localai/localai:{{< version >}}-cublas-cuda11 all-minilm-l6-v2``` |
| whisper-base | [Audio to Text]({{%relref "docs/features/audio-to-text" %}}) | ```docker run -ti -p 8080:8080 --gpus all localai/localai:{{< version >}}-cublas-cuda11-core whisper-base``` |
| rhasspy-voice-en-us-amy | [Text to Audio]({{%relref "docs/features/text-to-audio" %}}) | ```docker run -ti -p 8080:8080 --gpus all localai/localai:{{< version >}}-cublas-cuda11-core rhasspy-voice-en-us-amy``` |
| 🐸 [coqui](https://github.com/coqui-ai/TTS) | [Text to Audio]({{%relref "docs/features/text-to-audio" %}}) | ```docker run -ti -p 8080:8080 --gpus all localai/localai:{{< version >}}-cublas-cuda11 coqui``` |
| 🐶 [bark](https://github.com/suno-ai/bark) | [Text to Audio]({{%relref "docs/features/text-to-audio" %}}) | ```docker run -ti -p 8080:8080 --gpus all localai/localai:{{< version >}}-cublas-cuda11 bark``` |
| 🔊 [vall-e-x](https://github.com/Plachtaa/VALL-E-X) | [Text to Audio]({{%relref "docs/features/text-to-audio" %}}) | ```docker run -ti -p 8080:8080 --gpus all localai/localai:{{< version >}}-cublas-cuda11 vall-e-x``` |
| mixtral-instruct Mixtral-8x7B-Instruct-v0.1 | [LLM]({{%relref "docs/features/text-generation" %}}) | ```docker run -ti -p 8080:8080 --gpus all localai/localai:{{< version >}}-cublas-cuda11-core mixtral-instruct``` |
| [tinyllama-chat](https://huggingface.co/TheBloke/TinyLlama-1.1B-Chat-v0.3-GGUF) [original model](https://huggingface.co/TinyLlama/TinyLlama-1.1B-Chat-v0.3) | [LLM]({{%relref "docs/features/text-generation" %}}) | ```docker run -ti -p 8080:8080 --gpus all localai/localai:{{< version >}}-cublas-cuda11-core tinyllama-chat``` |
| [dolphin-2.5-mixtral-8x7b](https://huggingface.co/TheBloke/dolphin-2.5-mixtral-8x7b-GGUF) | [LLM]({{%relref "docs/features/text-generation" %}}) | ```docker run -ti -p 8080:8080 --gpus all localai/localai:{{< version >}}-cublas-cuda11-core dolphin-2.5-mixtral-8x7b``` |
| 🐍 [mamba](https://github.com/state-spaces/mamba) | [LLM]({{%relref "docs/features/text-generation" %}}) | ```docker run -ti -p 8080:8080 --gpus all localai/localai:{{< version >}}-cublas-cuda11 mamba-chat``` |
| animagine-xl | [Text to Image]({{%relref "docs/features/image-generation" %}}) | ```docker run -ti -p 8080:8080 -e COMPEL=0 --gpus all localai/localai:{{< version >}}-cublas-cuda11 animagine-xl``` |
| transformers-tinyllama | [LLM]({{%relref "docs/features/text-generation" %}}) | ```docker run -ti -p 8080:8080 --gpus all localai/localai:{{< version >}}-cublas-cuda11 transformers-tinyllama``` |
| [codellama-7b](https://huggingface.co/codellama/CodeLlama-7b-hf) | [LLM]({{%relref "docs/features/text-generation" %}}) | ```docker run -ti -p 8080:8080 --gpus all localai/localai:{{< version >}}-cublas-cuda11 codellama-7b``` |
| [codellama-7b-gguf](https://huggingface.co/TheBloke/CodeLlama-7B-GGUF) | [LLM]({{%relref "docs/features/text-generation" %}}) | ```docker run -ti -p 8080:8080 --gpus all localai/localai:{{< version >}}-cublas-cuda11-core codellama-7b-gguf``` |
| [hermes-2-pro-mistral](https://huggingface.co/NousResearch/Hermes-2-Pro-Mistral-7B-GGUF) | [LLM]({{%relref "docs/features/text-generation" %}}) | ```docker run -ti -p 8080:8080 --gpus all localai/localai:{{< version >}}-cublas-cuda11-core hermes-2-pro-mistral``` |
{{% /tab %}}
{{% tab tabName="GPU (CUDA 12)" %}}
> To know which version of CUDA do you have available, you can check with `nvidia-smi` or `nvcc --version` see also [GPU acceleration]({{%relref "docs/features/gpu-acceleration" %}}).
| Model | Category | Docker command |
| --- | --- | --- |
| [phi-2](https://huggingface.co/microsoft/phi-2) | [LLM]({{%relref "docs/features/text-generation" %}}) | ```docker run -ti -p 8080:8080 --gpus all localai/localai:{{< version >}}-cublas-cuda12-core phi-2``` |
| 🌋 [bakllava](https://github.com/SkunkworksAI/BakLLaVA) | [Multimodal LLM]({{%relref "docs/features/gpt-vision" %}}) | ```docker run -ti -p 8080:8080 --gpus all localai/localai:{{< version >}}-cublas-cuda12-core bakllava``` |
| 🌋 [llava-1.5](https://llava-vl.github.io/) | [Multimodal LLM]({{%relref "docs/features/gpt-vision" %}}) | ```docker run -ti -p 8080:8080 localai/localai:{{< version >}}-cublas-cuda12-core llava-1.5``` |
| 🌋 [llava-1.6-mistral](https://huggingface.co/cjpais/llava-1.6-mistral-7b-gguf) | [Multimodal LLM]({{%relref "docs/features/gpt-vision" %}}) | ```docker run -ti -p 8080:8080 localai/localai:{{< version >}}-cublas-cuda12-core llava-1.6-mistral``` |
| 🌋 [llava-1.6-vicuna](https://huggingface.co/cmp-nct/llava-1.6-gguf) | [Multimodal LLM]({{%relref "docs/features/gpt-vision" %}}) | ```docker run -ti -p 8080:8080 localai/localai:{{< version >}}-cublas-cuda12-core llava-1.6-vicuna``` |
| [mistral-openorca](https://huggingface.co/Open-Orca/Mistral-7B-OpenOrca) | [LLM]({{%relref "docs/features/text-generation" %}}) | ```docker run -ti -p 8080:8080 --gpus all localai/localai:{{< version >}}-cublas-cuda12-core mistral-openorca``` |
| [bert-cpp](https://github.com/skeskinen/bert.cpp) | [Embeddings]({{%relref "docs/features/embeddings" %}}) | ```docker run -ti -p 8080:8080 --gpus all localai/localai:{{< version >}}-cublas-cuda12-core bert-cpp``` |
| [all-minilm-l6-v2](https://huggingface.co/sentence-transformers/all-MiniLM-L6-v2) | [Embeddings]({{%relref "docs/features/embeddings" %}}) | ```docker run -ti -p 8080:8080 --gpus all localai/localai:{{< version >}}-cublas-cuda12 all-minilm-l6-v2``` |
| whisper-base | [Audio to Text]({{%relref "docs/features/audio-to-text" %}}) | ```docker run -ti -p 8080:8080 --gpus all localai/localai:{{< version >}}-cublas-cuda12-core whisper-base``` |
| rhasspy-voice-en-us-amy | [Text to Audio]({{%relref "docs/features/text-to-audio" %}}) | ```docker run -ti -p 8080:8080 --gpus all localai/localai:{{< version >}}-cublas-cuda12-core rhasspy-voice-en-us-amy``` |
| 🐸 [coqui](https://github.com/coqui-ai/TTS) | [Text to Audio]({{%relref "docs/features/text-to-audio" %}}) | ```docker run -ti -p 8080:8080 --gpus all localai/localai:{{< version >}}-cublas-cuda12 coqui``` |
| 🐶 [bark](https://github.com/suno-ai/bark) | [Text to Audio]({{%relref "docs/features/text-to-audio" %}}) | ```docker run -ti -p 8080:8080 --gpus all localai/localai:{{< version >}}-cublas-cuda12 bark``` |
| 🔊 [vall-e-x](https://github.com/Plachtaa/VALL-E-X) | [Text to Audio]({{%relref "docs/features/text-to-audio" %}}) | ```docker run -ti -p 8080:8080 --gpus all localai/localai:{{< version >}}-cublas-cuda12 vall-e-x``` |
| mixtral-instruct Mixtral-8x7B-Instruct-v0.1 | [LLM]({{%relref "docs/features/text-generation" %}}) | ```docker run -ti -p 8080:8080 --gpus all localai/localai:{{< version >}}-cublas-cuda12-core mixtral-instruct``` |
| [tinyllama-chat](https://huggingface.co/TheBloke/TinyLlama-1.1B-Chat-v0.3-GGUF) [original model](https://huggingface.co/TinyLlama/TinyLlama-1.1B-Chat-v0.3) | [LLM]({{%relref "docs/features/text-generation" %}}) | ```docker run -ti -p 8080:8080 --gpus all localai/localai:{{< version >}}-cublas-cuda12-core tinyllama-chat``` |
| [dolphin-2.5-mixtral-8x7b](https://huggingface.co/TheBloke/dolphin-2.5-mixtral-8x7b-GGUF) | [LLM]({{%relref "docs/features/text-generation" %}}) | ```docker run -ti -p 8080:8080 --gpus all localai/localai:{{< version >}}-cublas-cuda12-core dolphin-2.5-mixtral-8x7b``` |
| 🐍 [mamba](https://github.com/state-spaces/mamba) | [LLM]({{%relref "docs/features/text-generation" %}}) | ```docker run -ti -p 8080:8080 --gpus all localai/localai:{{< version >}}-cublas-cuda12 mamba-chat``` |
| animagine-xl | [Text to Image]({{%relref "docs/features/image-generation" %}}) | ```docker run -ti -p 8080:8080 -e COMPEL=0 --gpus all localai/localai:{{< version >}}-cublas-cuda12 animagine-xl``` |
| transformers-tinyllama | [LLM]({{%relref "docs/features/text-generation" %}}) | ```docker run -ti -p 8080:8080 --gpus all localai/localai:{{< version >}}-cublas-cuda12 transformers-tinyllama``` |
| [codellama-7b](https://huggingface.co/codellama/CodeLlama-7b-hf) | [LLM]({{%relref "docs/features/text-generation" %}}) | ```docker run -ti -p 8080:8080 --gpus all localai/localai:{{< version >}}-cublas-cuda12 codellama-7b``` |
| [codellama-7b-gguf](https://huggingface.co/TheBloke/CodeLlama-7B-GGUF) | [LLM]({{%relref "docs/features/text-generation" %}}) | ```docker run -ti -p 8080:8080 --gpus all localai/localai:{{< version >}}-cublas-cuda12-core codellama-7b-gguf``` |
| [hermes-2-pro-mistral](https://huggingface.co/NousResearch/Hermes-2-Pro-Mistral-7B-GGUF) | [LLM]({{%relref "docs/features/text-generation" %}}) | ```docker run -ti -p 8080:8080 --gpus all localai/localai:{{< version >}}-cublas-cuda12-core hermes-2-pro-mistral``` |
{{% /tab %}}
{{< /tabs >}}
{{% alert icon="💡" %}}
**Tip** You can actually specify multiple models to start an instance with the models loaded, for example to have both llava and phi-2 configured:
```bash
docker run -ti -p 8080:8080 localai/localai:{{< version >}}-ffmpeg-core llava phi-2
```
{{% /alert %}}

View File

@ -143,7 +143,7 @@ The AIO Images are inheriting the same environment variables as the base images
| Variable | Default | Description |
| ---------------------| ------- | ----------- |
| `PROFILE` | Auto-detected | The size of the model to use. Available: `cpu`, `gpu-8g` |
| `MODELS` | Auto-detected | A list of models YAML Configuration file URI/URL (see also [running models]({{%relref "docs/advanced/run-other-models" %}})) |
| `MODELS` | Auto-detected | A list of models YAML Configuration file URI/URL (see also [running models]({{%relref "docs/getting-started/models" %}})) |
## Standard container images

View File

@ -1,72 +0,0 @@
package embedded
import (
"embed"
"fmt"
"slices"
"strings"
"github.com/mudler/LocalAI/pkg/downloader"
"github.com/rs/zerolog/log"
"github.com/mudler/LocalAI/pkg/assets"
"gopkg.in/yaml.v3"
)
var modelShorteners map[string]string
//go:embed model_library.yaml
var modelLibrary []byte
//go:embed models/*
var embeddedModels embed.FS
func ModelShortURL(s string) string {
if _, ok := modelShorteners[s]; ok {
s = modelShorteners[s]
}
return s
}
func init() {
err := yaml.Unmarshal(modelLibrary, &modelShorteners)
if err != nil {
log.Error().Err(err).Msg("error while unmarshalling embedded modelLibrary")
}
}
func GetRemoteLibraryShorteners(url string, basePath string) (map[string]string, error) {
remoteLibrary := map[string]string{}
uri := downloader.URI(url)
err := uri.DownloadWithCallback(basePath, func(_ string, i []byte) error {
return yaml.Unmarshal(i, &remoteLibrary)
})
if err != nil {
return nil, fmt.Errorf("error downloading remote library: %s", err.Error())
}
return remoteLibrary, err
}
// ExistsInModelsLibrary checks if a model exists in the embedded models library
func ExistsInModelsLibrary(s string) bool {
f := fmt.Sprintf("%s.yaml", s)
a := []string{}
for _, j := range assets.ListFiles(embeddedModels) {
a = append(a, strings.TrimPrefix(j, "models/"))
}
return slices.Contains(a, f)
}
// ResolveContent returns the content in the embedded model library
func ResolveContent(s string) ([]byte, error) {
if ExistsInModelsLibrary(s) {
return embeddedModels.ReadFile(fmt.Sprintf("models/%s.yaml", s))
}
return nil, fmt.Errorf("cannot find model %s", s)
}

View File

@ -1,9 +0,0 @@
###
###
### This file contains the list of models that are available in the library
### The URLs are automatically expanded when local-ai is being called with the key as argument
###
### For models with an entire YAML file to be embededd, put the file inside the `models`
### directory, it will be automatically available with the file name as key (without the .yaml extension)
phi-2: "github://mudler/LocalAI-examples/configurations/phi-2.yaml@main"

View File

@ -1,13 +0,0 @@
name: all-minilm-l6-v2
backend: sentencetransformers
embeddings: true
parameters:
model: all-MiniLM-L6-v2
usage: |
You can test this model with curl like this:
curl http://localhost:8080/embeddings -X POST -H "Content-Type: application/json" -d '{
"input": "Your text string goes here",
"model": "all-minilm-l6-v2"
}'

View File

@ -1,17 +0,0 @@
name: animagine-xl
parameters:
model: Linaqruf/animagine-xl
backend: diffusers
f16: true
diffusers:
scheduler_type: euler_a
usage: |
curl http://localhost:8080/v1/images/generations \
-H "Content-Type: application/json" \
-d '{
"prompt": "<positive prompt>|<negative prompt>",
"model": "animagine-xl",
"step": 51,
"size": "1024x1024"
}'

View File

@ -1,40 +0,0 @@
backend: llama-cpp
context_size: 4096
f16: true
gpu_layers: 90
mmap: true
name: bakllava
roles:
user: "USER:"
assistant: "ASSISTANT:"
system: "SYSTEM:"
mmproj: bakllava-mmproj.gguf
parameters:
model: bakllava.gguf
temperature: 0.2
top_k: 40
top_p: 0.95
seed: -1
mirostat: 2
mirostat_eta: 1.0
mirostat_tau: 1.0
template:
chat: |
A chat between a curious human and an artificial intelligence assistant. The assistant gives helpful, detailed, and polite answers to the human's questions.
{{.Input}}
ASSISTANT:
download_files:
- filename: bakllava.gguf
uri: huggingface://mys/ggml_bakllava-1/ggml-model-q4_k.gguf
- filename: bakllava-mmproj.gguf
uri: huggingface://mys/ggml_bakllava-1/mmproj-model-f16.gguf
usage: |
curl http://localhost:8080/v1/chat/completions -H "Content-Type: application/json" -d '{
"model": "bakllava",
"messages": [{"role": "user", "content": [{"type":"text", "text": "What is in the image?"}, {"type": "image_url", "image_url": {"url": "https://upload.wikimedia.org/wikipedia/commons/thumb/d/dd/Gfp-wisconsin-madison-the-nature-boardwalk.jpg/2560px-Gfp-wisconsin-madison-the-nature-boardwalk.jpg" }}], "temperature": 0.9}]}'

View File

@ -1,8 +0,0 @@
usage: |
bark works without any configuration, to test it, you can run the following curl command:
curl http://localhost:8080/tts -H "Content-Type: application/json" -d '{
"backend": "bark",
"input":"Hello, this is a test!"
}' | aplay
# TODO: This is a placeholder until we manage to pre-load HF/Transformers models

View File

@ -1,24 +0,0 @@
backend: llama
context_size: 8192
f16: false
gpu_layers: 90
name: cerbero
mmap: false
parameters:
model: huggingface://galatolo/cerbero-7b-gguf/ggml-model-Q8_0.gguf
top_k: 80
temperature: 0.2
top_p: 0.7
template:
completion: "{{.Input}}"
chat: "Questa è una conversazione tra un umano ed un assistente AI.\n{{.Input}}\n[|Assistente|] "
roles:
user: "[|Umano|] "
system: "[|Umano|] "
assistant: "[|Assistente|] "
stopwords:
- "[|Umano|]"
trimsuffix:
- "\n"

View File

@ -1,20 +0,0 @@
name: codellama-7b-gguf
backend: transformers
parameters:
model: huggingface://TheBloke/CodeLlama-7B-GGUF/codellama-7b.Q4_K_M.gguf
temperature: 0.5
top_k: 40
seed: -1
top_p: 0.95
mirostat: 2
mirostat_eta: 1.0
mirostat_tau: 1.0
context_size: 4096
f16: true
gpu_layers: 90
usage: |
curl http://localhost:8080/v1/completions -H "Content-Type: application/json" -d '{
"model": "codellama-7b-gguf",
"prompt": "import socket\n\ndef ping_exponential_backoff(host: str):"
}'

View File

@ -1,14 +0,0 @@
name: codellama-7b
backend: transformers
type: AutoModelForCausalLM
parameters:
model: codellama/CodeLlama-7b-hf
temperature: 0.2
top_k: 40
top_p: 0.95
usage: |
curl http://localhost:8080/v1/completions -H "Content-Type: application/json" -d '{
"model": "codellama-7b",
"prompt": "import socket\n\ndef ping_exponential_backoff(host: str):"
}'

View File

@ -1,9 +0,0 @@
usage: |
coqui works without any configuration, to test it, you can run the following curl command:
curl http://localhost:8080/tts -H "Content-Type: application/json" -d '{
"backend": "coqui",
"model": "tts_models/en/ljspeech/glow-tts",
"input":"Hello, this is a test!"
}'
# TODO: This is a placeholder until we manage to pre-load HF/Transformers models

View File

@ -1,31 +0,0 @@
name: dolphin-mixtral-8x7b
mmap: true
parameters:
model: huggingface://TheBloke/dolphin-2.5-mixtral-8x7b-GGUF/dolphin-2.5-mixtral-8x7b.Q2_K.gguf
temperature: 0.5
top_k: 40
top_p: 0.95
seed: -1
mirostat: 2
mirostat_eta: 1.0
mirostat_tau: 1.0
template:
chat_message: |
<|im_start|>{{if eq .RoleName "assistant"}}assistant{{else if eq .RoleName "system"}}system{{else if eq .RoleName "user"}}user{{end}}
{{if .Content}}{{.Content}}{{end}}<|im_end|>
chat: |
{{.Input}}
<|im_start|>assistant
completion: |
{{.Input}}
context_size: 4096
f16: true
stopwords:
- <|im_end|>
gpu_layers: 90
usage: |
curl http://localhost:8080/v1/chat/completions -H "Content-Type: application/json" -d '{
"model": "dolphin-mixtral-8x7b",
"messages": [{"role": "user", "content": "How are you doing?", "temperature": 0.1}]
}'

View File

@ -1,59 +0,0 @@
name: hermes-2-pro-mistral
mmap: true
parameters:
model: huggingface://NousResearch/Hermes-2-Pro-Mistral-7B-GGUF/Hermes-2-Pro-Mistral-7B.Q6_K.gguf
template:
chat_message: |
<|im_start|>{{if eq .RoleName "assistant"}}assistant{{else if eq .RoleName "system"}}system{{else if eq .RoleName "tool"}}tool{{else if eq .RoleName "user"}}user{{end}}
{{- if .FunctionCall }}
<tool_call>
{{- else if eq .RoleName "tool" }}
<tool_response>
{{- end }}
{{- if .Content}}
{{.Content }}
{{- end }}
{{- if .FunctionCall}}
{{toJson .FunctionCall}}
{{- end }}
{{- if .FunctionCall }}
</tool_call>
{{- else if eq .RoleName "tool" }}
</tool_response>
{{- end }}<|im_end|>
# https://huggingface.co/NousResearch/Hermes-2-Pro-Mistral-7B-GGUF#prompt-format-for-function-calling
function: |
<|im_start|>system
You are a function calling AI model. You are provided with function signatures within <tools></tools> XML tags. You may call one or more functions to assist with the user query. Don't make assumptions about what values to plug into functions. Here are the available tools:
<tools>
{{range .Functions}}
{'type': 'function', 'function': {'name': '{{.Name}}', 'description': '{{.Description}}', 'parameters': {{toJson .Parameters}} }}
{{end}}
</tools>
Use the following pydantic model json schema for each tool call you will make:
{'title': 'FunctionCall', 'type': 'object', 'properties': {'arguments': {'title': 'Arguments', 'type': 'object'}, 'name': {'title': 'Name', 'type': 'string'}}, 'required': ['arguments', 'name']}
For each function call return a json object with function name and arguments within <tool_call></tool_call> XML tags as follows:
<tool_call>
{'arguments': <args-dict>, 'name': <function-name>}
</tool_call><|im_end|>
{{.Input -}}
<|im_start|>assistant
<tool_call>
chat: |
{{.Input -}}
<|im_start|>assistant
completion: |
{{.Input}}
context_size: 4096
f16: true
stopwords:
- <|im_end|>
- <dummy32000>
- "\n</tool_call>"
- "\n\n\n"
usage: |
curl http://localhost:8080/v1/chat/completions -H "Content-Type: application/json" -d '{
"model": "hermes-2-pro-mistral",
"messages": [{"role": "user", "content": "How are you doing?", "temperature": 0.1}]
}'

View File

@ -1,48 +0,0 @@
name: llama3-8b-instruct
mmap: true
parameters:
model: huggingface://second-state/Llama-3-8B-Instruct-GGUF/Meta-Llama-3-8B-Instruct-Q5_K_M.gguf
template:
chat_message: |
<|start_header_id|>{{if eq .RoleName "assistant"}}assistant{{else if eq .RoleName "system"}}system{{else if eq .RoleName "tool"}}tool{{else if eq .RoleName "user"}}user{{end}}<|end_header_id|>
{{ if .FunctionCall -}}
Function call:
{{ else if eq .RoleName "tool" -}}
Function response:
{{ end -}}
{{ if .Content -}}
{{.Content -}}
{{ else if .FunctionCall -}}
{{ toJson .FunctionCall -}}
{{ end -}}
<|eot_id|>
function: |
<|start_header_id|>system<|end_header_id|>
You are a function calling AI model. You are provided with function signatures within <tools></tools> XML tags. You may call one or more functions to assist with the user query. Don't make assumptions about what values to plug into functions. Here are the available tools:
<tools>
{{range .Functions}}
{'type': 'function', 'function': {'name': '{{.Name}}', 'description': '{{.Description}}', 'parameters': {{toJson .Parameters}} }}
{{end}}
</tools>
Use the following pydantic model json schema for each tool call you will make:
{'title': 'FunctionCall', 'type': 'object', 'properties': {'arguments': {'title': 'Arguments', 'type': 'object'}, 'name': {'title': 'Name', 'type': 'string'}}, 'required': ['arguments', 'name']}<|eot_id|><|start_header_id|>assistant<|end_header_id|>
Function call:
chat: |
<|begin_of_text|>{{.Input }}
<|start_header_id|>assistant<|end_header_id|>
completion: |
{{.Input}}
context_size: 8192
f16: true
stopwords:
- <|im_end|>
- <dummy32000>
- "<|eot_id|>"
usage: |
curl http://localhost:8080/v1/chat/completions -H "Content-Type: application/json" -d '{
"model": "llama3-8b-instruct",
"messages": [{"role": "user", "content": "How are you doing?", "temperature": 0.1}]
}'

View File

@ -1,33 +0,0 @@
backend: llama-cpp
context_size: 4096
f16: true
gpu_layers: 90
mmap: true
name: llava-1.5
roles:
user: "USER:"
assistant: "ASSISTANT:"
system: "SYSTEM:"
mmproj: llava-v1.5-7b-mmproj-Q8_0.gguf
parameters:
model: llava-v1.5-7b-Q4_K.gguf
template:
chat: |
A chat between a curious human and an artificial intelligence assistant. The assistant gives helpful, detailed, and polite answers to the human's questions.
{{.Input}}
ASSISTANT:
download_files:
- filename: llava-v1.5-7b-Q4_K.gguf
uri: huggingface://jartine/llava-v1.5-7B-GGUF/llava-v1.5-7b-Q4_K.gguf
- filename: llava-v1.5-7b-mmproj-Q8_0.gguf
uri: huggingface://jartine/llava-v1.5-7B-GGUF/llava-v1.5-7b-mmproj-Q8_0.gguf
usage: |
curl http://localhost:8080/v1/chat/completions -H "Content-Type: application/json" -d '{
"model": "llava-1.5",
"messages": [{"role": "user", "content": [{"type":"text", "text": "What is in the image?"}, {"type": "image_url", "image_url": {"url": "https://upload.wikimedia.org/wikipedia/commons/thumb/d/dd/Gfp-wisconsin-madison-the-nature-boardwalk.jpg/2560px-Gfp-wisconsin-madison-the-nature-boardwalk.jpg" }}], "temperature": 0.9}]}'

View File

@ -1,33 +0,0 @@
backend: llama-cpp
context_size: 4096
f16: true
gpu_layers: 90
mmap: true
name: llava-1.6-mistral
roles:
user: "USER:"
assistant: "ASSISTANT:"
system: "SYSTEM:"
mmproj: llava-v1.6-7b-mmproj-f16.gguf
parameters:
model: llava-v1.6-mistral-7b.gguf
template:
chat: |
A chat between a curious human and an artificial intelligence assistant. The assistant gives helpful, detailed, and polite answers to the human's questions.
{{.Input}}
ASSISTANT:
download_files:
- filename: llava-v1.6-mistral-7b.gguf
uri: huggingface://cjpais/llava-1.6-mistral-7b-gguf/llava-v1.6-mistral-7b.Q6_K.gguf
- filename: llava-v1.6-7b-mmproj-f16.gguf
uri: huggingface://cjpais/llava-1.6-mistral-7b-gguf/mmproj-model-f16.gguf
usage: |
curl http://localhost:8080/v1/chat/completions -H "Content-Type: application/json" -d '{
"model": "llava-1.6-mistral",
"messages": [{"role": "user", "content": [{"type":"text", "text": "What is in the image?"}, {"type": "image_url", "image_url": {"url": "https://upload.wikimedia.org/wikipedia/commons/thumb/d/dd/Gfp-wisconsin-madison-the-nature-boardwalk.jpg/2560px-Gfp-wisconsin-madison-the-nature-boardwalk.jpg" }}], "temperature": 0.9}]}'

View File

@ -1,37 +0,0 @@
backend: llama-cpp
context_size: 4096
f16: true
gpu_layers: 90
mmap: true
name: llava-1.6-vicuna
roles:
user: "USER:"
assistant: "ASSISTANT:"
system: "SYSTEM:"
mmproj: mmproj-vicuna7b-f16.gguf
parameters:
model: vicuna-7b-q5_k.gguf
temperature: 0.2
top_k: 40
top_p: 0.95
seed: -1
template:
chat: |
A chat between a curious human and an artificial intelligence assistant. The assistant gives helpful, detailed, and polite answers to the human's questions.
{{.Input}}
ASSISTANT:
download_files:
- filename: vicuna-7b-q5_k.gguf
uri: https://huggingface.co/cmp-nct/llava-1.6-gguf/resolve/main/vicuna-7b-q5_k.gguf
- filename: mmproj-vicuna7b-f16.gguf
uri: https://huggingface.co/cmp-nct/llava-1.6-gguf/resolve/main/mmproj-vicuna7b-f16.gguf
usage: |
curl http://localhost:8080/v1/chat/completions -H "Content-Type: application/json" -d '{
"model": "llava-1.6-vicuna",
"messages": [{"role": "user", "content": [{"type":"text", "text": "What is in the image?"}, {"type": "image_url", "image_url": {"url": "https://upload.wikimedia.org/wikipedia/commons/thumb/d/dd/Gfp-wisconsin-madison-the-nature-boardwalk.jpg/2560px-Gfp-wisconsin-madison-the-nature-boardwalk.jpg" }}], "temperature": 0.9}]}'

View File

@ -1,40 +0,0 @@
backend: llama-cpp
context_size: 4096
f16: true
gpu_layers: 90
mmap: true
name: llava
roles:
user: "USER:"
assistant: "ASSISTANT:"
system: "SYSTEM:"
mmproj: bakllava-mmproj.gguf
parameters:
model: bakllava.gguf
temperature: 0.2
top_k: 40
top_p: 0.95
seed: -1
mirostat: 2
mirostat_eta: 1.0
mirostat_tau: 1.0
template:
chat: |
A chat between a curious human and an artificial intelligence assistant. The assistant gives helpful, detailed, and polite answers to the human's questions.
{{.Input}}
ASSISTANT:
download_files:
- filename: bakllava.gguf
uri: huggingface://mys/ggml_bakllava-1/ggml-model-q4_k.gguf
- filename: bakllava-mmproj.gguf
uri: huggingface://mys/ggml_bakllava-1/mmproj-model-f16.gguf
usage: |
curl http://localhost:8080/v1/chat/completions -H "Content-Type: application/json" -d '{
"model": "llava",
"messages": [{"role": "user", "content": [{"type":"text", "text": "What is in the image?"}, {"type": "image_url", "image_url": {"url": "https://upload.wikimedia.org/wikipedia/commons/thumb/d/dd/Gfp-wisconsin-madison-the-nature-boardwalk.jpg/2560px-Gfp-wisconsin-madison-the-nature-boardwalk.jpg" }}], "temperature": 0.9}]}'

View File

@ -1,21 +0,0 @@
name: bagel
backend: mamba
parameters:
model: "jondurbin/bagel-dpo-2.8b-v0.2"
systemPrompt: "You are a helpful, unbiased, uncensored assistant."
template:
chat_message: |
{{if eq .RoleName "assistant"}}{{.Content}}{{else}}
[INST]
{{if .SystemPrompt}}{{.SystemPrompt}}{{else if eq .RoleName "system"}}<<SYS>>{{.Content}}<</SYS>>
{{else if .Content}}{{.Content}}{{end}}
[/INST]
{{end}}
completion: |
{{.Input}}
usage: |
curl http://localhost:8080/v1/chat/completions -H "Content-Type: application/json" -d '{
"model": "bagel",
"messages": [{"role": "user", "content": "how are you doing"}],
}'

View File

@ -1,28 +0,0 @@
name: mamba-chat
backend: mamba
parameters:
model: "havenhq/mamba-chat"
trimsuffix:
- <|endoftext|>
# https://huggingface.co/HuggingFaceH4/zephyr-7b-beta/blob/main/tokenizer_config.json
# "chat_template": "{% for message in messages %}\n{% if message['role'] == 'user' %}\n{{ '<|user|>\n' + message['content'] + eos_token }}\n{% elif message['role'] == 'system' %}\n{{ '<|system|>\n' + message['content'] + eos_token }}\n{% elif message['role'] == 'assistant' %}\n{{ '<|assistant|>\n' + message['content'] + eos_token }}\n{% endif %}\n{% if loop.last and add_generation_prompt %}\n{{ '<|assistant|>' }}\n{% endif %}\n{% endfor %}",
template:
chat_message: |
{{if eq .RoleName "assistant"}}<|assistant|>{{else if eq .RoleName "system"}}<|system|>{{else if eq .RoleName "user"}}<|user|>{{end}}
{{if .Content}}{{.Content}}{{end}}
</s>
chat: |
{{.Input}}
<|assistant|>
completion: |
{{.Input}}
usage: |
curl http://localhost:8080/v1/chat/completions -H "Content-Type: application/json" -d '{
"model": "mamba-chat",
"messages": [{"role": "user", "content": "how are you doing"}],
"temperature": 0.7
}'

View File

@ -1,32 +0,0 @@
name: mistral-openorca
mmap: true
parameters:
model: huggingface://TheBloke/Mistral-7B-OpenOrca-GGUF/mistral-7b-openorca.Q6_K.gguf
temperature: 0.2
top_k: 40
top_p: 0.95
seed: -1
mirostat: 2
mirostat_eta: 1.0
mirostat_tau: 1.0
template:
chat_message: |
<|im_start|>{{if eq .RoleName "assistant"}}assistant{{else if eq .RoleName "system"}}system{{else if eq .RoleName "user"}}user{{end}}
{{if .Content}}{{.Content}}{{end}}
<|im_end|>
chat: |
{{.Input}}
<|im_start|>assistant
completion: |
{{.Input}}
context_size: 4096
f16: true
stopwords:
- <|im_end|>
- <dummy32000>
usage: |
curl http://localhost:8080/v1/chat/completions -H "Content-Type: application/json" -d '{
"model": "mistral-openorca",
"messages": [{"role": "user", "content": "How are you doing?", "temperature": 0.1}]
}'

View File

@ -1,25 +0,0 @@
name: mixtral-instruct
mmap: true
parameters:
model: huggingface://TheBloke/Mixtral-8x7B-Instruct-v0.1-GGUF/mixtral-8x7b-instruct-v0.1.Q2_K.gguf
temperature: 0.2
top_k: 40
seed: -1
top_p: 0.95
mirostat: 2
mirostat_eta: 1.0
mirostat_tau: 1.0
template:
chat: &chat |
[INST] {{.Input}} [/INST]
completion: *chat
context_size: 4096
f16: true
gpu_layers: 90
usage: |
curl http://localhost:8080/v1/completions -H "Content-Type: application/json" -d '{
"model": "mixtral-instruct",
"prompt": "How are you doing?"
}'

View File

@ -1,25 +0,0 @@
name: phi-2-chat
mmap: true
parameters:
model: huggingface://l3utterfly/phi-2-layla-v1-chatml-gguf/phi-2-layla-v1-chatml-Q8_0.gguf
template:
chat_message: |
<|im_start|>{{if eq .RoleName "assistant"}}assistant{{else if eq .RoleName "system"}}system{{else if eq .RoleName "user"}}user{{end}}
{{if .Content}}{{.Content}}{{end}}
<|im_end|>
chat: |
{{.Input}}
<|im_start|>assistant
completion: |
{{.Input}}
context_size: 4096
f16: true
stopwords:
- <|im_end|>
- <dummy32000>
usage: |
curl http://localhost:8080/v1/chat/completions -H "Content-Type: application/json" -d '{
"model": "phi-2-chat",
"messages": [{"role": "user", "content": "How are you doing?", "temperature": 0.1}]
}'

View File

@ -1,30 +0,0 @@
name: phi-2-orange
mmap: true
parameters:
model: huggingface://l3utterfly/phi-2-orange-GGUF/phi-2-orange.Q6_K.gguf
template:
chat_message: |
<|im_start|>{{if eq .RoleName "assistant"}}assistant{{else if eq .RoleName "system"}}system{{else if eq .RoleName "user"}}user{{end}}
{{if .Content}}{{.Content}}{{end}}
<|im_end|>
chat: |
{{.Input}}
<|im_start|>assistant
completion: |
{{.Input}}
context_size: 4096
f16: true
stopwords:
- <|im_end|>
- <dummy32000>
description: |
This model is a chatbot that can be used for general conversation.
[Model card](https://huggingface.co/TheBloke/phi-2-orange-GGUF)
usage: |
curl http://localhost:8080/v1/chat/completions -H "Content-Type: application/json" -d '{
"model": "phi-2-orange",
"messages": [{"role": "user", "content": "How are you doing?", "temperature": 0.1}]
}'

View File

@ -1,13 +0,0 @@
name: voice-en-us-amy-low
download_files:
- filename: voice-en-us-amy-low.tar.gz
uri: https://github.com/rhasspy/piper/releases/download/v0.0.2/voice-en-us-amy-low.tar.gz
usage: |
To test if this model works as expected, you can use the following curl command:
curl http://localhost:8080/tts -H "Content-Type: application/json" -d '{
"model":"en-us-amy-low.onnx",
"input": "Hi, this is a test."
}'

View File

@ -1,29 +0,0 @@
name: tinyllama-chat
mmap: true
parameters:
model: huggingface://TheBloke/TinyLlama-1.1B-Chat-v0.3-GGUF/tinyllama-1.1b-chat-v0.3.Q8_0.gguf
temperature: 0.2
top_k: 40
seed: -1
top_p: 0.95
template:
chat_message: |
<|im_start|>{{if eq .RoleName "assistant"}}assistant{{else if eq .RoleName "system"}}system{{else if eq .RoleName "user"}}user{{end}}
{{if .Content}}{{.Content}}{{end}}<|im_end|>
chat: |
{{.Input}}
<|im_start|>assistant
completion: |
{{.Input}}
context_size: 4096
f16: true
stopwords:
- <|im_end|>
gpu_layers: 90
usage: |
curl http://localhost:8080/v1/chat/completions -H "Content-Type: application/json" -d '{
"model": "tinyllama-chat",
"messages": [{"role": "user", "content": "How are you doing?", "temperature": 0.1}]
}'

View File

@ -1,31 +0,0 @@
name: tinyllama-chat
backend: transformers
type: AutoModelForCausalLM
parameters:
model: TinyLlama/TinyLlama-1.1B-Chat-v1.0
temperature: 0.2
top_k: 40
top_p: 0.95
max_tokens: 4096
template:
chat_message: |
<|im_start|>{{if eq .RoleName "assistant"}}assistant{{else if eq .RoleName "system"}}system{{else if eq .RoleName "user"}}user{{end}}
{{if .Content}}{{.Content}}{{end}}<|im_end|>
chat: |
{{.Input}}
<|im_start|>assistant
completion: |
{{.Input}}
stopwords:
- <|im_end|>
usage: |
curl http://localhost:8080/v1/chat/completions -H "Content-Type: application/json" -d '{
"model": "tinyllama-chat",
"messages": [{"role": "user", "content": "Say this is a test!"}],
"temperature": 0.7
}'

View File

@ -1,8 +0,0 @@
usage: |
Vall-e-x works without any configuration, to test it, you can run the following curl command:
curl http://localhost:8080/tts -H "Content-Type: application/json" -d '{
"backend": "vall-e-x",
"input":"Hello, this is a test!"
}' | aplay
# TODO: This is a placeholder until we manage to pre-load HF/Transformers models

View File

@ -1,18 +0,0 @@
name: whisper
backend: whisper
parameters:
model: ggml-whisper-base.bin
usage: |
## example audio file
wget --quiet --show-progress -O gb1.ogg https://upload.wikimedia.org/wikipedia/commons/1/1f/George_W_Bush_Columbia_FINAL.ogg
## Send the example audio file to the transcriptions endpoint
curl http://localhost:8080/v1/audio/transcriptions \
-H "Content-Type: multipart/form-data" \
-F file="@$PWD/gb1.ogg" -F model="whisper"
download_files:
- filename: "ggml-whisper-base.bin"
sha256: "60ed5bc3dd14eea856493d334349b405782ddcaf0028d4b5df4088345fba2efe"
uri: "https://huggingface.co/ggerganov/whisper.cpp/resolve/main/ggml-base.bin"

View File

@ -9,7 +9,6 @@ import (
"github.com/mudler/LocalAI/core/config"
"github.com/mudler/LocalAI/core/gallery"
"github.com/mudler/LocalAI/embedded"
"github.com/mudler/LocalAI/pkg/downloader"
"github.com/mudler/LocalAI/pkg/utils"
"github.com/rs/zerolog/log"
@ -18,42 +17,17 @@ import (
// InstallModels will preload models from the given list of URLs and galleries
// It will download the model if it is not already present in the model path
// It will also try to resolve if the model is an embedded model YAML configuration
func InstallModels(galleries []config.Gallery, modelLibraryURL string, modelPath string, enforceScan bool, downloadStatus func(string, string, string, float64), models ...string) error {
func InstallModels(galleries []config.Gallery, modelPath string, enforceScan bool, downloadStatus func(string, string, string, float64), models ...string) error {
// create an error that groups all errors
var err error
lib, _ := embedded.GetRemoteLibraryShorteners(modelLibraryURL, modelPath)
for _, url := range models {
// As a best effort, try to resolve the model from the remote library
// if it's not resolved we try with the other method below
if modelLibraryURL != "" {
if lib[url] != "" {
log.Debug().Msgf("[startup] model configuration is defined remotely: %s (%s)", url, lib[url])
url = lib[url]
}
}
url = embedded.ModelShortURL(url)
uri := downloader.URI(url)
switch {
case embedded.ExistsInModelsLibrary(url):
modelYAML, e := embedded.ResolveContent(url)
// If we resolve something, just save it to disk and continue
if e != nil {
log.Error().Err(e).Msg("error resolving model content")
err = errors.Join(err, e)
continue
}
log.Debug().Msgf("[startup] resolved embedded model: %s", url)
md5Name := utils.MD5(url)
modelDefinitionFilePath := filepath.Join(modelPath, md5Name) + ".yaml"
if e := os.WriteFile(modelDefinitionFilePath, modelYAML, 0600); err != nil {
log.Error().Err(e).Str("filepath", modelDefinitionFilePath).Msg("error writing model definition")
err = errors.Join(err, e)
}
case uri.LooksLikeOCI():
log.Debug().Msgf("[startup] resolved OCI model to download: %s", url)

View File

@ -7,7 +7,6 @@ import (
"github.com/mudler/LocalAI/core/config"
. "github.com/mudler/LocalAI/pkg/startup"
"github.com/mudler/LocalAI/pkg/utils"
. "github.com/onsi/ginkgo/v2"
. "github.com/onsi/gomega"
@ -16,29 +15,13 @@ import (
var _ = Describe("Preload test", func() {
Context("Preloading from strings", func() {
It("loads from remote url", func() {
tmpdir, err := os.MkdirTemp("", "")
Expect(err).ToNot(HaveOccurred())
libraryURL := "https://raw.githubusercontent.com/mudler/LocalAI/master/embedded/model_library.yaml"
fileName := fmt.Sprintf("%s.yaml", "phi-2")
InstallModels([]config.Gallery{}, libraryURL, tmpdir, true, nil, "phi-2")
resultFile := filepath.Join(tmpdir, fileName)
content, err := os.ReadFile(resultFile)
Expect(err).ToNot(HaveOccurred())
Expect(string(content)).To(ContainSubstring("name: phi-2"))
})
It("loads from embedded full-urls", func() {
tmpdir, err := os.MkdirTemp("", "")
Expect(err).ToNot(HaveOccurred())
url := "https://raw.githubusercontent.com/mudler/LocalAI-examples/main/configurations/phi-2.yaml"
fileName := fmt.Sprintf("%s.yaml", "phi-2")
InstallModels([]config.Gallery{}, "", tmpdir, true, nil, url)
InstallModels([]config.Gallery{}, tmpdir, true, nil, url)
resultFile := filepath.Join(tmpdir, fileName)
@ -47,45 +30,13 @@ var _ = Describe("Preload test", func() {
Expect(string(content)).To(ContainSubstring("name: phi-2"))
})
It("loads from embedded short-urls", func() {
tmpdir, err := os.MkdirTemp("", "")
Expect(err).ToNot(HaveOccurred())
url := "phi-2"
InstallModels([]config.Gallery{}, "", tmpdir, true, nil, url)
entry, err := os.ReadDir(tmpdir)
Expect(err).ToNot(HaveOccurred())
Expect(entry).To(HaveLen(1))
resultFile := entry[0].Name()
content, err := os.ReadFile(filepath.Join(tmpdir, resultFile))
Expect(err).ToNot(HaveOccurred())
Expect(string(content)).To(ContainSubstring("name: phi-2"))
})
It("loads from embedded models", func() {
tmpdir, err := os.MkdirTemp("", "")
Expect(err).ToNot(HaveOccurred())
url := "mistral-openorca"
fileName := fmt.Sprintf("%s.yaml", utils.MD5(url))
InstallModels([]config.Gallery{}, "", tmpdir, true, nil, url)
resultFile := filepath.Join(tmpdir, fileName)
content, err := os.ReadFile(resultFile)
Expect(err).ToNot(HaveOccurred())
Expect(string(content)).To(ContainSubstring("name: mistral-openorca"))
})
It("downloads from urls", func() {
tmpdir, err := os.MkdirTemp("", "")
Expect(err).ToNot(HaveOccurred())
url := "huggingface://TheBloke/TinyLlama-1.1B-Chat-v0.3-GGUF/tinyllama-1.1b-chat-v0.3.Q2_K.gguf"
fileName := fmt.Sprintf("%s.gguf", "tinyllama-1.1b-chat-v0.3.Q2_K")
err = InstallModels([]config.Gallery{}, "", tmpdir, false, nil, url)
err = InstallModels([]config.Gallery{}, tmpdir, false, nil, url)
Expect(err).ToNot(HaveOccurred())
resultFile := filepath.Join(tmpdir, fileName)