From f120a0c9f90fe94db084121c3f46589adebb0483 Mon Sep 17 00:00:00 2001 From: Ettore Di Giacinto Date: Tue, 9 Jul 2024 23:09:49 +0200 Subject: [PATCH 1/3] docs(swagger): enhance coverage of APIs (#2753) Signed-off-by: Ettore Di Giacinto --- core/http/endpoints/openai/files.go | 27 +++++++++++++++++++-------- core/http/endpoints/openai/list.go | 9 +++++---- core/schema/openai.go | 5 +++++ 3 files changed, 29 insertions(+), 12 deletions(-) diff --git a/core/http/endpoints/openai/files.go b/core/http/endpoints/openai/files.go index 23a6eba6..d7741580 100644 --- a/core/http/endpoints/openai/files.go +++ b/core/http/endpoints/openai/files.go @@ -123,7 +123,10 @@ func getFileFromRequest(c *fiber.Ctx) (*File, error) { return nil, fmt.Errorf("unable to find file id %s", id) } -// GetFilesEndpoint https://platform.openai.com/docs/api-reference/files/retrieve +// GetFilesEndpoint is the OpenAI API endpoint to get files https://platform.openai.com/docs/api-reference/files/retrieve +// @Summary Returns information about a specific file. +// @Success 200 {object} File "Response" +// @Router /v1/files/{file_id} [get] func GetFilesEndpoint(cm *config.BackendConfigLoader, appConfig *config.ApplicationConfig) func(c *fiber.Ctx) error { return func(c *fiber.Ctx) error { file, err := getFileFromRequest(c) @@ -135,13 +138,17 @@ func GetFilesEndpoint(cm *config.BackendConfigLoader, appConfig *config.Applicat } } -// DeleteFilesEndpoint https://platform.openai.com/docs/api-reference/files/delete +type DeleteStatus struct { + Id string + Object string + Deleted bool +} + +// DeleteFilesEndpoint is the OpenAI API endpoint to delete files https://platform.openai.com/docs/api-reference/files/delete +// @Summary Delete a file. +// @Success 200 {object} DeleteStatus "Response" +// @Router /v1/files/{file_id} [delete] func DeleteFilesEndpoint(cm *config.BackendConfigLoader, appConfig *config.ApplicationConfig) func(c *fiber.Ctx) error { - type DeleteStatus struct { - Id string - Object string - Deleted bool - } return func(c *fiber.Ctx) error { file, err := getFileFromRequest(c) @@ -174,7 +181,11 @@ func DeleteFilesEndpoint(cm *config.BackendConfigLoader, appConfig *config.Appli } } -// GetFilesContentsEndpoint https://platform.openai.com/docs/api-reference/files/retrieve-contents +// GetFilesContentsEndpoint is the OpenAI API endpoint to get files content https://platform.openai.com/docs/api-reference/files/retrieve-contents +// @Summary Returns information about a specific file. +// @Success 200 {string} binary "file" +// @Router /v1/files/{file_id}/content [get] +// GetFilesContentsEndpoint func GetFilesContentsEndpoint(cm *config.BackendConfigLoader, appConfig *config.ApplicationConfig) func(c *fiber.Ctx) error { return func(c *fiber.Ctx) error { file, err := getFileFromRequest(c) diff --git a/core/http/endpoints/openai/list.go b/core/http/endpoints/openai/list.go index 3253c38a..ba6bd1d7 100644 --- a/core/http/endpoints/openai/list.go +++ b/core/http/endpoints/openai/list.go @@ -6,6 +6,10 @@ import ( "github.com/mudler/LocalAI/core/services" ) +// ListModelsEndpoint is the OpenAI Models API endpoint https://platform.openai.com/docs/api-reference/models +// @Summary List and describe the various models available in the API. +// @Success 200 {object} schema.ModelsDataResponse "Response" +// @Router /v1/models [get] func ListModelsEndpoint(lms *services.ListModelsService) func(ctx *fiber.Ctx) error { return func(c *fiber.Ctx) error { // If blank, no filter is applied. @@ -18,10 +22,7 @@ func ListModelsEndpoint(lms *services.ListModelsService) func(ctx *fiber.Ctx) er if err != nil { return err } - return c.JSON(struct { - Object string `json:"object"` - Data []schema.OpenAIModel `json:"data"` - }{ + return c.JSON(schema.ModelsDataResponse{ Object: "list", Data: dataModels, }) diff --git a/core/schema/openai.go b/core/schema/openai.go index e95b7d8f..9735bb32 100644 --- a/core/schema/openai.go +++ b/core/schema/openai.go @@ -155,3 +155,8 @@ type OpenAIRequest struct { // AutoGPTQ ModelBaseName string `json:"model_base_name" yaml:"model_base_name"` } + +type ModelsDataResponse struct { + Object string `json:"object"` + Data []OpenAIModel `json:"data"` +} From d5a56f04be348ea94c12caa1a8ad28555be0e95c Mon Sep 17 00:00:00 2001 From: Ettore Di Giacinto Date: Tue, 9 Jul 2024 23:10:02 +0200 Subject: [PATCH 2/3] feat(p2p): allow to disable DHT and use only LAN (#2751) This allows LocalAI to be less noisy avoiding to connect outside. Needed if e.g. there is no plan into using p2p across separate networks. Signed-off-by: Ettore Di Giacinto --- core/p2p/p2p.go | 8 ++++++-- docs/content/docs/features/distributed_inferencing.md | 11 +++++++++++ 2 files changed, 17 insertions(+), 2 deletions(-) diff --git a/core/p2p/p2p.go b/core/p2p/p2p.go index 79e8051e..e0e46170 100644 --- a/core/p2p/p2p.go +++ b/core/p2p/p2p.go @@ -348,11 +348,15 @@ func newNodeOpts(token string) ([]node.Option, error) { llger := logger.New(log.LevelFatal) defaultInterval := 10 * time.Second + // TODO: move this up, expose more config options when creating a node + noDHT := os.Getenv("LOCALAI_P2P_DISABLE_DHT") == "true" + noLimits := os.Getenv("LOCALAI_P2P_DISABLE_LIMITS") == "true" + loglevel := "info" c := config.Config{ Limit: config.ResourceLimit{ - Enable: true, + Enable: !noLimits, MaxConns: 100, }, NetworkToken: token, @@ -372,7 +376,7 @@ func newNodeOpts(token string) ([]node.Option, error) { RateLimitInterval: defaultInterval, }, Discovery: config.Discovery{ - DHT: true, + DHT: noDHT, MDNS: true, Interval: 30 * time.Second, }, diff --git a/docs/content/docs/features/distributed_inferencing.md b/docs/content/docs/features/distributed_inferencing.md index 80a9a7cf..abe34373 100644 --- a/docs/content/docs/features/distributed_inferencing.md +++ b/docs/content/docs/features/distributed_inferencing.md @@ -98,3 +98,14 @@ The server logs should indicate that new workers are being discovered. - If running in p2p mode with container images, make sure you start the container with `--net host` or `network_mode: host` in the docker-compose file. - Only a single model is supported currently. - Ensure the server detects new workers before starting inference. Currently, additional workers cannot be added once inference has begun. + + +## Environment Variables + +There are options that can be tweaked or parameters that can be set using environment variables + +| Environment Variable | Description | +|----------------------|-------------| +| **LOCALAI_P2P_DISABLE_DHT** | Set to "true" to disable DHT and enable p2p layer to be local only (mDNS) | +| **LOCALAI_P2P_DISABLE_LIMITS** | Set to "true" to disable connection limits and resources management | +| **LOCALAI_P2P_TOKEN** | Set the token for the p2p network | From 2845baecd56c8caba4022d01d2d5432110a055e9 Mon Sep 17 00:00:00 2001 From: Ettore Di Giacinto Date: Tue, 9 Jul 2024 23:13:29 +0200 Subject: [PATCH 3/3] fix(cuda): downgrade default version from 12.5 to 12.4 (#2707) Signed-off-by: Ettore Di Giacinto --- .github/workflows/image-pr.yml | 4 ++-- .github/workflows/image.yml | 16 ++++++++-------- .github/workflows/image_build.yml | 2 +- .github/workflows/release.yaml | 2 +- Dockerfile | 2 +- Makefile | 2 +- 6 files changed, 14 insertions(+), 14 deletions(-) diff --git a/.github/workflows/image-pr.yml b/.github/workflows/image-pr.yml index 2452ef04..3e4d8e4d 100644 --- a/.github/workflows/image-pr.yml +++ b/.github/workflows/image-pr.yml @@ -46,7 +46,7 @@ jobs: makeflags: "--jobs=3 --output-sync=target" - build-type: 'cublas' cuda-major-version: "12" - cuda-minor-version: "5" + cuda-minor-version: "4" platforms: 'linux/amd64' tag-latest: 'false' tag-suffix: '-cublas-cuda12-ffmpeg' @@ -119,7 +119,7 @@ jobs: makeflags: "--jobs=3 --output-sync=target" - build-type: 'cublas' cuda-major-version: "12" - cuda-minor-version: "5" + cuda-minor-version: "4" platforms: 'linux/amd64' tag-latest: 'false' tag-suffix: '-cublas-cuda12-ffmpeg-core' diff --git a/.github/workflows/image.yml b/.github/workflows/image.yml index 0a30e46f..73899e15 100644 --- a/.github/workflows/image.yml +++ b/.github/workflows/image.yml @@ -64,7 +64,7 @@ jobs: makeflags: "--jobs=3 --output-sync=target" - build-type: 'cublas' cuda-major-version: "11" - cuda-minor-version: "8" + cuda-minor-version: "7" platforms: 'linux/amd64' tag-latest: 'false' tag-suffix: '-cublas-cuda11' @@ -75,7 +75,7 @@ jobs: makeflags: "--jobs=3 --output-sync=target" - build-type: 'cublas' cuda-major-version: "12" - cuda-minor-version: "5" + cuda-minor-version: "4" platforms: 'linux/amd64' tag-latest: 'false' tag-suffix: '-cublas-cuda12' @@ -86,7 +86,7 @@ jobs: makeflags: "--jobs=3 --output-sync=target" - build-type: 'cublas' cuda-major-version: "11" - cuda-minor-version: "8" + cuda-minor-version: "7" platforms: 'linux/amd64' tag-latest: 'auto' tag-suffix: '-cublas-cuda11-ffmpeg' @@ -100,7 +100,7 @@ jobs: makeflags: "--jobs=3 --output-sync=target" - build-type: 'cublas' cuda-major-version: "12" - cuda-minor-version: "5" + cuda-minor-version: "4" platforms: 'linux/amd64' tag-latest: 'auto' tag-suffix: '-cublas-cuda12-ffmpeg' @@ -274,7 +274,7 @@ jobs: makeflags: "--jobs=4 --output-sync=target" - build-type: 'cublas' cuda-major-version: "11" - cuda-minor-version: "8" + cuda-minor-version: "7" platforms: 'linux/amd64' tag-latest: 'false' tag-suffix: '-cublas-cuda11-core' @@ -285,7 +285,7 @@ jobs: makeflags: "--jobs=4 --output-sync=target" - build-type: 'cublas' cuda-major-version: "12" - cuda-minor-version: "5" + cuda-minor-version: "4" platforms: 'linux/amd64' tag-latest: 'false' tag-suffix: '-cublas-cuda12-core' @@ -296,7 +296,7 @@ jobs: makeflags: "--jobs=4 --output-sync=target" - build-type: 'cublas' cuda-major-version: "11" - cuda-minor-version: "8" + cuda-minor-version: "7" platforms: 'linux/amd64' tag-latest: 'false' tag-suffix: '-cublas-cuda11-ffmpeg-core' @@ -307,7 +307,7 @@ jobs: makeflags: "--jobs=4 --output-sync=target" - build-type: 'cublas' cuda-major-version: "12" - cuda-minor-version: "5" + cuda-minor-version: "4" platforms: 'linux/amd64' tag-latest: 'false' tag-suffix: '-cublas-cuda12-ffmpeg-core' diff --git a/.github/workflows/image_build.yml b/.github/workflows/image_build.yml index 53015565..c891d3dd 100644 --- a/.github/workflows/image_build.yml +++ b/.github/workflows/image_build.yml @@ -23,7 +23,7 @@ on: type: string cuda-minor-version: description: 'CUDA minor version' - default: "5" + default: "4" type: string platforms: description: 'Platforms' diff --git a/.github/workflows/release.yaml b/.github/workflows/release.yaml index efd33f83..86d60921 100644 --- a/.github/workflows/release.yaml +++ b/.github/workflows/release.yaml @@ -40,7 +40,7 @@ jobs: sudo apt-get update sudo apt-get install -y cuda-cross-aarch64 cuda-nvcc-cross-aarch64-${CUDA_VERSION} libcublas-cross-aarch64-${CUDA_VERSION} env: - CUDA_VERSION: 12-5 + CUDA_VERSION: 12-4 - name: Cache grpc id: cache-grpc uses: actions/cache@v4 diff --git a/Dockerfile b/Dockerfile index ac42db5d..907ad54b 100644 --- a/Dockerfile +++ b/Dockerfile @@ -99,7 +99,7 @@ FROM requirements-${IMAGE_TYPE} AS requirements-drivers ARG BUILD_TYPE ARG CUDA_MAJOR_VERSION=12 -ARG CUDA_MINOR_VERSION=5 +ARG CUDA_MINOR_VERSION=4 ENV BUILD_TYPE=${BUILD_TYPE} diff --git a/Makefile b/Makefile index e2ae5c49..d4ca1e4f 100644 --- a/Makefile +++ b/Makefile @@ -425,7 +425,7 @@ prepare-e2e: mkdir -p $(TEST_DIR) cp -rfv $(abspath ./tests/e2e-fixtures)/gpu.yaml $(TEST_DIR)/gpu.yaml test -e $(TEST_DIR)/ggllm-test-model.bin || wget -q https://huggingface.co/TheBloke/CodeLlama-7B-Instruct-GGUF/resolve/main/codellama-7b-instruct.Q2_K.gguf -O $(TEST_DIR)/ggllm-test-model.bin - docker build --build-arg GRPC_BACKENDS="$(GRPC_BACKENDS)" --build-arg IMAGE_TYPE=core --build-arg BUILD_TYPE=$(BUILD_TYPE) --build-arg CUDA_MAJOR_VERSION=12 --build-arg CUDA_MINOR_VERSION=5 --build-arg FFMPEG=true -t localai-tests . + docker build --build-arg GRPC_BACKENDS="$(GRPC_BACKENDS)" --build-arg IMAGE_TYPE=core --build-arg BUILD_TYPE=$(BUILD_TYPE) --build-arg CUDA_MAJOR_VERSION=12 --build-arg CUDA_MINOR_VERSION=4 --build-arg FFMPEG=true -t localai-tests . run-e2e-image: ls -liah $(abspath ./tests/e2e-fixtures)