mirror of
https://github.com/mudler/LocalAI.git
synced 2024-12-18 20:27:57 +00:00
Merge branch 'master' into fix_aarch64
This commit is contained in:
commit
9080e6442d
4
.github/workflows/image-pr.yml
vendored
4
.github/workflows/image-pr.yml
vendored
@ -46,7 +46,7 @@ jobs:
|
|||||||
makeflags: "--jobs=3 --output-sync=target"
|
makeflags: "--jobs=3 --output-sync=target"
|
||||||
- build-type: 'cublas'
|
- build-type: 'cublas'
|
||||||
cuda-major-version: "12"
|
cuda-major-version: "12"
|
||||||
cuda-minor-version: "5"
|
cuda-minor-version: "4"
|
||||||
platforms: 'linux/amd64'
|
platforms: 'linux/amd64'
|
||||||
tag-latest: 'false'
|
tag-latest: 'false'
|
||||||
tag-suffix: '-cublas-cuda12-ffmpeg'
|
tag-suffix: '-cublas-cuda12-ffmpeg'
|
||||||
@ -119,7 +119,7 @@ jobs:
|
|||||||
makeflags: "--jobs=3 --output-sync=target"
|
makeflags: "--jobs=3 --output-sync=target"
|
||||||
- build-type: 'cublas'
|
- build-type: 'cublas'
|
||||||
cuda-major-version: "12"
|
cuda-major-version: "12"
|
||||||
cuda-minor-version: "5"
|
cuda-minor-version: "4"
|
||||||
platforms: 'linux/amd64'
|
platforms: 'linux/amd64'
|
||||||
tag-latest: 'false'
|
tag-latest: 'false'
|
||||||
tag-suffix: '-cublas-cuda12-ffmpeg-core'
|
tag-suffix: '-cublas-cuda12-ffmpeg-core'
|
||||||
|
16
.github/workflows/image.yml
vendored
16
.github/workflows/image.yml
vendored
@ -64,7 +64,7 @@ jobs:
|
|||||||
makeflags: "--jobs=3 --output-sync=target"
|
makeflags: "--jobs=3 --output-sync=target"
|
||||||
- build-type: 'cublas'
|
- build-type: 'cublas'
|
||||||
cuda-major-version: "11"
|
cuda-major-version: "11"
|
||||||
cuda-minor-version: "8"
|
cuda-minor-version: "7"
|
||||||
platforms: 'linux/amd64'
|
platforms: 'linux/amd64'
|
||||||
tag-latest: 'false'
|
tag-latest: 'false'
|
||||||
tag-suffix: '-cublas-cuda11'
|
tag-suffix: '-cublas-cuda11'
|
||||||
@ -75,7 +75,7 @@ jobs:
|
|||||||
makeflags: "--jobs=3 --output-sync=target"
|
makeflags: "--jobs=3 --output-sync=target"
|
||||||
- build-type: 'cublas'
|
- build-type: 'cublas'
|
||||||
cuda-major-version: "12"
|
cuda-major-version: "12"
|
||||||
cuda-minor-version: "5"
|
cuda-minor-version: "4"
|
||||||
platforms: 'linux/amd64'
|
platforms: 'linux/amd64'
|
||||||
tag-latest: 'false'
|
tag-latest: 'false'
|
||||||
tag-suffix: '-cublas-cuda12'
|
tag-suffix: '-cublas-cuda12'
|
||||||
@ -86,7 +86,7 @@ jobs:
|
|||||||
makeflags: "--jobs=3 --output-sync=target"
|
makeflags: "--jobs=3 --output-sync=target"
|
||||||
- build-type: 'cublas'
|
- build-type: 'cublas'
|
||||||
cuda-major-version: "11"
|
cuda-major-version: "11"
|
||||||
cuda-minor-version: "8"
|
cuda-minor-version: "7"
|
||||||
platforms: 'linux/amd64'
|
platforms: 'linux/amd64'
|
||||||
tag-latest: 'auto'
|
tag-latest: 'auto'
|
||||||
tag-suffix: '-cublas-cuda11-ffmpeg'
|
tag-suffix: '-cublas-cuda11-ffmpeg'
|
||||||
@ -100,7 +100,7 @@ jobs:
|
|||||||
makeflags: "--jobs=3 --output-sync=target"
|
makeflags: "--jobs=3 --output-sync=target"
|
||||||
- build-type: 'cublas'
|
- build-type: 'cublas'
|
||||||
cuda-major-version: "12"
|
cuda-major-version: "12"
|
||||||
cuda-minor-version: "5"
|
cuda-minor-version: "4"
|
||||||
platforms: 'linux/amd64'
|
platforms: 'linux/amd64'
|
||||||
tag-latest: 'auto'
|
tag-latest: 'auto'
|
||||||
tag-suffix: '-cublas-cuda12-ffmpeg'
|
tag-suffix: '-cublas-cuda12-ffmpeg'
|
||||||
@ -274,7 +274,7 @@ jobs:
|
|||||||
makeflags: "--jobs=4 --output-sync=target"
|
makeflags: "--jobs=4 --output-sync=target"
|
||||||
- build-type: 'cublas'
|
- build-type: 'cublas'
|
||||||
cuda-major-version: "11"
|
cuda-major-version: "11"
|
||||||
cuda-minor-version: "8"
|
cuda-minor-version: "7"
|
||||||
platforms: 'linux/amd64'
|
platforms: 'linux/amd64'
|
||||||
tag-latest: 'false'
|
tag-latest: 'false'
|
||||||
tag-suffix: '-cublas-cuda11-core'
|
tag-suffix: '-cublas-cuda11-core'
|
||||||
@ -285,7 +285,7 @@ jobs:
|
|||||||
makeflags: "--jobs=4 --output-sync=target"
|
makeflags: "--jobs=4 --output-sync=target"
|
||||||
- build-type: 'cublas'
|
- build-type: 'cublas'
|
||||||
cuda-major-version: "12"
|
cuda-major-version: "12"
|
||||||
cuda-minor-version: "5"
|
cuda-minor-version: "4"
|
||||||
platforms: 'linux/amd64'
|
platforms: 'linux/amd64'
|
||||||
tag-latest: 'false'
|
tag-latest: 'false'
|
||||||
tag-suffix: '-cublas-cuda12-core'
|
tag-suffix: '-cublas-cuda12-core'
|
||||||
@ -296,7 +296,7 @@ jobs:
|
|||||||
makeflags: "--jobs=4 --output-sync=target"
|
makeflags: "--jobs=4 --output-sync=target"
|
||||||
- build-type: 'cublas'
|
- build-type: 'cublas'
|
||||||
cuda-major-version: "11"
|
cuda-major-version: "11"
|
||||||
cuda-minor-version: "8"
|
cuda-minor-version: "7"
|
||||||
platforms: 'linux/amd64'
|
platforms: 'linux/amd64'
|
||||||
tag-latest: 'false'
|
tag-latest: 'false'
|
||||||
tag-suffix: '-cublas-cuda11-ffmpeg-core'
|
tag-suffix: '-cublas-cuda11-ffmpeg-core'
|
||||||
@ -307,7 +307,7 @@ jobs:
|
|||||||
makeflags: "--jobs=4 --output-sync=target"
|
makeflags: "--jobs=4 --output-sync=target"
|
||||||
- build-type: 'cublas'
|
- build-type: 'cublas'
|
||||||
cuda-major-version: "12"
|
cuda-major-version: "12"
|
||||||
cuda-minor-version: "5"
|
cuda-minor-version: "4"
|
||||||
platforms: 'linux/amd64'
|
platforms: 'linux/amd64'
|
||||||
tag-latest: 'false'
|
tag-latest: 'false'
|
||||||
tag-suffix: '-cublas-cuda12-ffmpeg-core'
|
tag-suffix: '-cublas-cuda12-ffmpeg-core'
|
||||||
|
2
.github/workflows/image_build.yml
vendored
2
.github/workflows/image_build.yml
vendored
@ -23,7 +23,7 @@ on:
|
|||||||
type: string
|
type: string
|
||||||
cuda-minor-version:
|
cuda-minor-version:
|
||||||
description: 'CUDA minor version'
|
description: 'CUDA minor version'
|
||||||
default: "5"
|
default: "4"
|
||||||
type: string
|
type: string
|
||||||
platforms:
|
platforms:
|
||||||
description: 'Platforms'
|
description: 'Platforms'
|
||||||
|
2
.github/workflows/release.yaml
vendored
2
.github/workflows/release.yaml
vendored
@ -40,7 +40,7 @@ jobs:
|
|||||||
sudo apt-get update
|
sudo apt-get update
|
||||||
sudo apt-get install -y cuda-cross-aarch64 cuda-nvcc-cross-aarch64-${CUDA_VERSION} libcublas-cross-aarch64-${CUDA_VERSION}
|
sudo apt-get install -y cuda-cross-aarch64 cuda-nvcc-cross-aarch64-${CUDA_VERSION} libcublas-cross-aarch64-${CUDA_VERSION}
|
||||||
env:
|
env:
|
||||||
CUDA_VERSION: 12-5
|
CUDA_VERSION: 12-4
|
||||||
- name: Cache grpc
|
- name: Cache grpc
|
||||||
id: cache-grpc
|
id: cache-grpc
|
||||||
uses: actions/cache@v4
|
uses: actions/cache@v4
|
||||||
|
@ -99,7 +99,7 @@ FROM requirements-${IMAGE_TYPE} AS requirements-drivers
|
|||||||
|
|
||||||
ARG BUILD_TYPE
|
ARG BUILD_TYPE
|
||||||
ARG CUDA_MAJOR_VERSION=12
|
ARG CUDA_MAJOR_VERSION=12
|
||||||
ARG CUDA_MINOR_VERSION=5
|
ARG CUDA_MINOR_VERSION=4
|
||||||
|
|
||||||
ENV BUILD_TYPE=${BUILD_TYPE}
|
ENV BUILD_TYPE=${BUILD_TYPE}
|
||||||
|
|
||||||
|
2
Makefile
2
Makefile
@ -425,7 +425,7 @@ prepare-e2e:
|
|||||||
mkdir -p $(TEST_DIR)
|
mkdir -p $(TEST_DIR)
|
||||||
cp -rfv $(abspath ./tests/e2e-fixtures)/gpu.yaml $(TEST_DIR)/gpu.yaml
|
cp -rfv $(abspath ./tests/e2e-fixtures)/gpu.yaml $(TEST_DIR)/gpu.yaml
|
||||||
test -e $(TEST_DIR)/ggllm-test-model.bin || wget -q https://huggingface.co/TheBloke/CodeLlama-7B-Instruct-GGUF/resolve/main/codellama-7b-instruct.Q2_K.gguf -O $(TEST_DIR)/ggllm-test-model.bin
|
test -e $(TEST_DIR)/ggllm-test-model.bin || wget -q https://huggingface.co/TheBloke/CodeLlama-7B-Instruct-GGUF/resolve/main/codellama-7b-instruct.Q2_K.gguf -O $(TEST_DIR)/ggllm-test-model.bin
|
||||||
docker build --build-arg GRPC_BACKENDS="$(GRPC_BACKENDS)" --build-arg IMAGE_TYPE=core --build-arg BUILD_TYPE=$(BUILD_TYPE) --build-arg CUDA_MAJOR_VERSION=12 --build-arg CUDA_MINOR_VERSION=5 --build-arg FFMPEG=true -t localai-tests .
|
docker build --build-arg GRPC_BACKENDS="$(GRPC_BACKENDS)" --build-arg IMAGE_TYPE=core --build-arg BUILD_TYPE=$(BUILD_TYPE) --build-arg CUDA_MAJOR_VERSION=12 --build-arg CUDA_MINOR_VERSION=4 --build-arg FFMPEG=true -t localai-tests .
|
||||||
|
|
||||||
run-e2e-image:
|
run-e2e-image:
|
||||||
ls -liah $(abspath ./tests/e2e-fixtures)
|
ls -liah $(abspath ./tests/e2e-fixtures)
|
||||||
|
@ -123,7 +123,10 @@ func getFileFromRequest(c *fiber.Ctx) (*File, error) {
|
|||||||
return nil, fmt.Errorf("unable to find file id %s", id)
|
return nil, fmt.Errorf("unable to find file id %s", id)
|
||||||
}
|
}
|
||||||
|
|
||||||
// GetFilesEndpoint https://platform.openai.com/docs/api-reference/files/retrieve
|
// GetFilesEndpoint is the OpenAI API endpoint to get files https://platform.openai.com/docs/api-reference/files/retrieve
|
||||||
|
// @Summary Returns information about a specific file.
|
||||||
|
// @Success 200 {object} File "Response"
|
||||||
|
// @Router /v1/files/{file_id} [get]
|
||||||
func GetFilesEndpoint(cm *config.BackendConfigLoader, appConfig *config.ApplicationConfig) func(c *fiber.Ctx) error {
|
func GetFilesEndpoint(cm *config.BackendConfigLoader, appConfig *config.ApplicationConfig) func(c *fiber.Ctx) error {
|
||||||
return func(c *fiber.Ctx) error {
|
return func(c *fiber.Ctx) error {
|
||||||
file, err := getFileFromRequest(c)
|
file, err := getFileFromRequest(c)
|
||||||
@ -135,13 +138,17 @@ func GetFilesEndpoint(cm *config.BackendConfigLoader, appConfig *config.Applicat
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// DeleteFilesEndpoint https://platform.openai.com/docs/api-reference/files/delete
|
type DeleteStatus struct {
|
||||||
|
Id string
|
||||||
|
Object string
|
||||||
|
Deleted bool
|
||||||
|
}
|
||||||
|
|
||||||
|
// DeleteFilesEndpoint is the OpenAI API endpoint to delete files https://platform.openai.com/docs/api-reference/files/delete
|
||||||
|
// @Summary Delete a file.
|
||||||
|
// @Success 200 {object} DeleteStatus "Response"
|
||||||
|
// @Router /v1/files/{file_id} [delete]
|
||||||
func DeleteFilesEndpoint(cm *config.BackendConfigLoader, appConfig *config.ApplicationConfig) func(c *fiber.Ctx) error {
|
func DeleteFilesEndpoint(cm *config.BackendConfigLoader, appConfig *config.ApplicationConfig) func(c *fiber.Ctx) error {
|
||||||
type DeleteStatus struct {
|
|
||||||
Id string
|
|
||||||
Object string
|
|
||||||
Deleted bool
|
|
||||||
}
|
|
||||||
|
|
||||||
return func(c *fiber.Ctx) error {
|
return func(c *fiber.Ctx) error {
|
||||||
file, err := getFileFromRequest(c)
|
file, err := getFileFromRequest(c)
|
||||||
@ -174,7 +181,11 @@ func DeleteFilesEndpoint(cm *config.BackendConfigLoader, appConfig *config.Appli
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// GetFilesContentsEndpoint https://platform.openai.com/docs/api-reference/files/retrieve-contents
|
// GetFilesContentsEndpoint is the OpenAI API endpoint to get files content https://platform.openai.com/docs/api-reference/files/retrieve-contents
|
||||||
|
// @Summary Returns information about a specific file.
|
||||||
|
// @Success 200 {string} binary "file"
|
||||||
|
// @Router /v1/files/{file_id}/content [get]
|
||||||
|
// GetFilesContentsEndpoint
|
||||||
func GetFilesContentsEndpoint(cm *config.BackendConfigLoader, appConfig *config.ApplicationConfig) func(c *fiber.Ctx) error {
|
func GetFilesContentsEndpoint(cm *config.BackendConfigLoader, appConfig *config.ApplicationConfig) func(c *fiber.Ctx) error {
|
||||||
return func(c *fiber.Ctx) error {
|
return func(c *fiber.Ctx) error {
|
||||||
file, err := getFileFromRequest(c)
|
file, err := getFileFromRequest(c)
|
||||||
|
@ -6,6 +6,10 @@ import (
|
|||||||
"github.com/mudler/LocalAI/core/services"
|
"github.com/mudler/LocalAI/core/services"
|
||||||
)
|
)
|
||||||
|
|
||||||
|
// ListModelsEndpoint is the OpenAI Models API endpoint https://platform.openai.com/docs/api-reference/models
|
||||||
|
// @Summary List and describe the various models available in the API.
|
||||||
|
// @Success 200 {object} schema.ModelsDataResponse "Response"
|
||||||
|
// @Router /v1/models [get]
|
||||||
func ListModelsEndpoint(lms *services.ListModelsService) func(ctx *fiber.Ctx) error {
|
func ListModelsEndpoint(lms *services.ListModelsService) func(ctx *fiber.Ctx) error {
|
||||||
return func(c *fiber.Ctx) error {
|
return func(c *fiber.Ctx) error {
|
||||||
// If blank, no filter is applied.
|
// If blank, no filter is applied.
|
||||||
@ -18,10 +22,7 @@ func ListModelsEndpoint(lms *services.ListModelsService) func(ctx *fiber.Ctx) er
|
|||||||
if err != nil {
|
if err != nil {
|
||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
return c.JSON(struct {
|
return c.JSON(schema.ModelsDataResponse{
|
||||||
Object string `json:"object"`
|
|
||||||
Data []schema.OpenAIModel `json:"data"`
|
|
||||||
}{
|
|
||||||
Object: "list",
|
Object: "list",
|
||||||
Data: dataModels,
|
Data: dataModels,
|
||||||
})
|
})
|
||||||
|
@ -348,11 +348,15 @@ func newNodeOpts(token string) ([]node.Option, error) {
|
|||||||
llger := logger.New(log.LevelFatal)
|
llger := logger.New(log.LevelFatal)
|
||||||
defaultInterval := 10 * time.Second
|
defaultInterval := 10 * time.Second
|
||||||
|
|
||||||
|
// TODO: move this up, expose more config options when creating a node
|
||||||
|
noDHT := os.Getenv("LOCALAI_P2P_DISABLE_DHT") == "true"
|
||||||
|
noLimits := os.Getenv("LOCALAI_P2P_DISABLE_LIMITS") == "true"
|
||||||
|
|
||||||
loglevel := "info"
|
loglevel := "info"
|
||||||
|
|
||||||
c := config.Config{
|
c := config.Config{
|
||||||
Limit: config.ResourceLimit{
|
Limit: config.ResourceLimit{
|
||||||
Enable: true,
|
Enable: !noLimits,
|
||||||
MaxConns: 100,
|
MaxConns: 100,
|
||||||
},
|
},
|
||||||
NetworkToken: token,
|
NetworkToken: token,
|
||||||
@ -372,7 +376,7 @@ func newNodeOpts(token string) ([]node.Option, error) {
|
|||||||
RateLimitInterval: defaultInterval,
|
RateLimitInterval: defaultInterval,
|
||||||
},
|
},
|
||||||
Discovery: config.Discovery{
|
Discovery: config.Discovery{
|
||||||
DHT: true,
|
DHT: noDHT,
|
||||||
MDNS: true,
|
MDNS: true,
|
||||||
Interval: 30 * time.Second,
|
Interval: 30 * time.Second,
|
||||||
},
|
},
|
||||||
|
@ -155,3 +155,8 @@ type OpenAIRequest struct {
|
|||||||
// AutoGPTQ
|
// AutoGPTQ
|
||||||
ModelBaseName string `json:"model_base_name" yaml:"model_base_name"`
|
ModelBaseName string `json:"model_base_name" yaml:"model_base_name"`
|
||||||
}
|
}
|
||||||
|
|
||||||
|
type ModelsDataResponse struct {
|
||||||
|
Object string `json:"object"`
|
||||||
|
Data []OpenAIModel `json:"data"`
|
||||||
|
}
|
||||||
|
@ -98,3 +98,14 @@ The server logs should indicate that new workers are being discovered.
|
|||||||
- If running in p2p mode with container images, make sure you start the container with `--net host` or `network_mode: host` in the docker-compose file.
|
- If running in p2p mode with container images, make sure you start the container with `--net host` or `network_mode: host` in the docker-compose file.
|
||||||
- Only a single model is supported currently.
|
- Only a single model is supported currently.
|
||||||
- Ensure the server detects new workers before starting inference. Currently, additional workers cannot be added once inference has begun.
|
- Ensure the server detects new workers before starting inference. Currently, additional workers cannot be added once inference has begun.
|
||||||
|
|
||||||
|
|
||||||
|
## Environment Variables
|
||||||
|
|
||||||
|
There are options that can be tweaked or parameters that can be set using environment variables
|
||||||
|
|
||||||
|
| Environment Variable | Description |
|
||||||
|
|----------------------|-------------|
|
||||||
|
| **LOCALAI_P2P_DISABLE_DHT** | Set to "true" to disable DHT and enable p2p layer to be local only (mDNS) |
|
||||||
|
| **LOCALAI_P2P_DISABLE_LIMITS** | Set to "true" to disable connection limits and resources management |
|
||||||
|
| **LOCALAI_P2P_TOKEN** | Set the token for the p2p network |
|
||||||
|
Loading…
Reference in New Issue
Block a user