2023-05-20 17:03:53 +02:00
## Set number of threads.
## Note: prefer the number of physical cores. Overbooking the CPU degrades performance notably.
2024-04-11 02:19:24 -05:00
# LOCALAI_THREADS=14
2023-05-20 17:03:53 +02:00
## Specify a different bind address (defaults to ":8080")
2024-04-11 02:19:24 -05:00
# LOCALAI_ADDRESS=127.0.0.1:8080
2023-05-20 17:03:53 +02:00
## Default models context size
2024-04-11 02:19:24 -05:00
# LOCALAI_CONTEXT_SIZE=512
2023-06-28 18:28:53 +02:00
#
## Define galleries.
## models will to install will be visible in `/models/available`
2024-04-28 23:56:10 +02:00
# LOCALAI_GALLERIES=[{"name":"localai", "url":"github:mudler/LocalAI/gallery/index.yaml@master"}]
2023-06-28 18:28:53 +02:00
## CORS settings
2024-04-11 02:19:24 -05:00
# LOCALAI_CORS=true
# LOCALAI_CORS_ALLOW_ORIGINS=*
2023-05-20 17:03:53 +02:00
## Default path for models
2023-06-28 18:28:53 +02:00
#
2024-04-11 02:19:24 -05:00
# LOCALAI_MODELS_PATH=/models
2023-05-20 17:03:53 +02:00
## Enable debug mode
2024-04-11 02:19:24 -05:00
# LOCALAI_LOG_LEVEL=debug
2023-05-20 17:03:53 +02:00
2023-09-08 18:38:22 +02:00
## Disables COMPEL (Diffusers)
# COMPEL=0
## Enable/Disable single backend (useful if only one GPU is available)
2024-04-11 02:19:24 -05:00
# LOCALAI_SINGLE_ACTIVE_BACKEND=true
2023-09-08 18:38:22 +02:00
2023-06-28 18:28:53 +02:00
## Specify a build type. Available: cublas, openblas, clblas.
2023-08-07 04:22:42 -04:00
## cuBLAS: This is a GPU-accelerated version of the complete standard BLAS (Basic Linear Algebra Subprograms) library. It's provided by Nvidia and is part of their CUDA toolkit.
## OpenBLAS: This is an open-source implementation of the BLAS library that aims to provide highly optimized code for various platforms. It includes support for multi-threading and can be compiled to use hardware-specific features for additional performance. OpenBLAS can run on many kinds of hardware, including CPUs from Intel, AMD, and ARM.
## clBLAS: This is an open-source implementation of the BLAS library that uses OpenCL, a framework for writing programs that execute across heterogeneous platforms consisting of CPUs, GPUs, and other processors. clBLAS is designed to take advantage of the parallel computing power of GPUs but can also run on any hardware that supports OpenCL. This includes hardware from different vendors like Nvidia, AMD, and Intel.
2023-05-20 17:03:53 +02:00
# BUILD_TYPE=openblas
2023-07-07 00:29:10 +02:00
## Uncomment and set to true to enable rebuilding from source
# REBUILD=true
2023-05-20 17:03:53 +02:00
2023-06-28 18:28:53 +02:00
## Enable go tags, available: stablediffusion, tts
## stablediffusion: image generation with stablediffusion
## tts: enables text-to-speech with go-piper
## (requires REBUILD=true)
#
2023-05-20 17:03:53 +02:00
# GO_TAGS=stablediffusion
## Path where to store generated images
2024-04-11 02:19:24 -05:00
# LOCALAI_IMAGE_PATH=/tmp/generated/images
2023-05-20 17:03:53 +02:00
## Specify a default upload limit in MB (whisper)
2024-04-11 02:19:24 -05:00
# LOCALAI_UPLOAD_LIMIT=15
2023-09-08 18:38:22 +02:00
## List of external GRPC backends (note on the container image this variable is already set to use extra backends available in extra/)
2024-04-11 02:19:24 -05:00
# LOCALAI_EXTERNAL_GRPC_BACKENDS=my-backend:127.0.0.1:9000,my-backend2:/usr/bin/backend.py
2023-09-08 18:38:22 +02:00
### Advanced settings ###
### Those are not really used by LocalAI, but from components in the stack ###
##
### Preload libraries
# LD_PRELOAD=
### Huggingface cache for models
2023-09-19 21:30:39 +02:00
# HUGGINGFACE_HUB_CACHE=/usr/local/huggingface
### Python backends GRPC max workers
### Default number of workers for GRPC Python backends.
### This actually controls wether a backend can process multiple requests or not.
2023-11-11 13:14:59 +01:00
# PYTHON_GRPC_MAX_WORKERS=1
### Define the number of parallel LLAMA.cpp workers (Defaults to 1)
2023-11-16 08:20:05 +01:00
# LLAMACPP_PARALLEL=1
2024-05-15 01:17:02 +02:00
### Define a list of GRPC Servers for llama-cpp workers to distribute the load
# https://github.com/ggerganov/llama.cpp/pull/6829
# https://github.com/ggerganov/llama.cpp/blob/master/examples/rpc/README.md
# LLAMACPP_GRPC_SERVERS=""
2023-11-16 08:20:05 +01:00
### Enable to run parallel requests
2024-04-11 02:19:24 -05:00
# LOCALAI_PARALLEL_REQUESTS=true
2023-11-26 18:36:23 +01:00
2024-08-14 03:06:41 -04:00
# Enable to allow p2p mode
# LOCALAI_P2P=true
2024-12-20 00:19:31 +10:00
# Enable to use federated mode
# LOCALAI_FEDERATED=true
# Enable to start federation server
# FEDERATED_SERVER=true
# Define to use federation token
# TOKEN=""
2023-11-26 18:36:23 +01:00
### Watchdog settings
###
# Enables watchdog to kill backends that are inactive for too much time
2024-04-11 02:19:24 -05:00
# LOCALAI_WATCHDOG_IDLE=true
2023-11-26 18:36:23 +01:00
#
# Time in duration format (e.g. 1h30m) after which a backend is considered idle
2024-04-11 02:19:24 -05:00
# LOCALAI_WATCHDOG_IDLE_TIMEOUT=5m
#
# Enables watchdog to kill backends that are busy for too much time
# LOCALAI_WATCHDOG_BUSY=true
2023-11-26 18:36:23 +01:00
#
# Time in duration format (e.g. 1h30m) after which a backend is considered busy
2024-04-28 23:56:10 +02:00
# LOCALAI_WATCHDOG_BUSY_TIMEOUT=5m