From f120a0c9f90fe94db084121c3f46589adebb0483 Mon Sep 17 00:00:00 2001
From: Ettore Di Giacinto <mudler@users.noreply.github.com>
Date: Tue, 9 Jul 2024 23:09:49 +0200
Subject: [PATCH 1/3] docs(swagger): enhance coverage of APIs (#2753)

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
---
 core/http/endpoints/openai/files.go | 27 +++++++++++++++++++--------
 core/http/endpoints/openai/list.go  |  9 +++++----
 core/schema/openai.go               |  5 +++++
 3 files changed, 29 insertions(+), 12 deletions(-)

diff --git a/core/http/endpoints/openai/files.go b/core/http/endpoints/openai/files.go
index 23a6eba6..d7741580 100644
--- a/core/http/endpoints/openai/files.go
+++ b/core/http/endpoints/openai/files.go
@@ -123,7 +123,10 @@ func getFileFromRequest(c *fiber.Ctx) (*File, error) {
 	return nil, fmt.Errorf("unable to find file id %s", id)
 }
 
-// GetFilesEndpoint https://platform.openai.com/docs/api-reference/files/retrieve
+// GetFilesEndpoint is the OpenAI API endpoint to get files https://platform.openai.com/docs/api-reference/files/retrieve
+// @Summary Returns information about a specific file.
+// @Success 200 {object} File "Response"
+// @Router /v1/files/{file_id} [get]
 func GetFilesEndpoint(cm *config.BackendConfigLoader, appConfig *config.ApplicationConfig) func(c *fiber.Ctx) error {
 	return func(c *fiber.Ctx) error {
 		file, err := getFileFromRequest(c)
@@ -135,13 +138,17 @@ func GetFilesEndpoint(cm *config.BackendConfigLoader, appConfig *config.Applicat
 	}
 }
 
-// DeleteFilesEndpoint https://platform.openai.com/docs/api-reference/files/delete
+type DeleteStatus struct {
+	Id      string
+	Object  string
+	Deleted bool
+}
+
+// DeleteFilesEndpoint is the OpenAI API endpoint to delete files https://platform.openai.com/docs/api-reference/files/delete
+// @Summary Delete a file.
+// @Success 200 {object} DeleteStatus "Response"
+// @Router /v1/files/{file_id} [delete]
 func DeleteFilesEndpoint(cm *config.BackendConfigLoader, appConfig *config.ApplicationConfig) func(c *fiber.Ctx) error {
-	type DeleteStatus struct {
-		Id      string
-		Object  string
-		Deleted bool
-	}
 
 	return func(c *fiber.Ctx) error {
 		file, err := getFileFromRequest(c)
@@ -174,7 +181,11 @@ func DeleteFilesEndpoint(cm *config.BackendConfigLoader, appConfig *config.Appli
 	}
 }
 
-// GetFilesContentsEndpoint https://platform.openai.com/docs/api-reference/files/retrieve-contents
+// GetFilesContentsEndpoint is the OpenAI API endpoint to get files content https://platform.openai.com/docs/api-reference/files/retrieve-contents
+// @Summary Returns information about a specific file.
+// @Success	200		{string}	binary				"file"
+// @Router /v1/files/{file_id}/content [get]
+// GetFilesContentsEndpoint
 func GetFilesContentsEndpoint(cm *config.BackendConfigLoader, appConfig *config.ApplicationConfig) func(c *fiber.Ctx) error {
 	return func(c *fiber.Ctx) error {
 		file, err := getFileFromRequest(c)
diff --git a/core/http/endpoints/openai/list.go b/core/http/endpoints/openai/list.go
index 3253c38a..ba6bd1d7 100644
--- a/core/http/endpoints/openai/list.go
+++ b/core/http/endpoints/openai/list.go
@@ -6,6 +6,10 @@ import (
 	"github.com/mudler/LocalAI/core/services"
 )
 
+// ListModelsEndpoint is the OpenAI Models API endpoint https://platform.openai.com/docs/api-reference/models
+// @Summary List and describe the various models available in the API.
+// @Success 200 {object} schema.ModelsDataResponse "Response"
+// @Router /v1/models [get]
 func ListModelsEndpoint(lms *services.ListModelsService) func(ctx *fiber.Ctx) error {
 	return func(c *fiber.Ctx) error {
 		// If blank, no filter is applied.
@@ -18,10 +22,7 @@ func ListModelsEndpoint(lms *services.ListModelsService) func(ctx *fiber.Ctx) er
 		if err != nil {
 			return err
 		}
-		return c.JSON(struct {
-			Object string               `json:"object"`
-			Data   []schema.OpenAIModel `json:"data"`
-		}{
+		return c.JSON(schema.ModelsDataResponse{
 			Object: "list",
 			Data:   dataModels,
 		})
diff --git a/core/schema/openai.go b/core/schema/openai.go
index e95b7d8f..9735bb32 100644
--- a/core/schema/openai.go
+++ b/core/schema/openai.go
@@ -155,3 +155,8 @@ type OpenAIRequest struct {
 	// AutoGPTQ
 	ModelBaseName string `json:"model_base_name" yaml:"model_base_name"`
 }
+
+type ModelsDataResponse struct {
+	Object string        `json:"object"`
+	Data   []OpenAIModel `json:"data"`
+}

From d5a56f04be348ea94c12caa1a8ad28555be0e95c Mon Sep 17 00:00:00 2001
From: Ettore Di Giacinto <mudler@users.noreply.github.com>
Date: Tue, 9 Jul 2024 23:10:02 +0200
Subject: [PATCH 2/3] feat(p2p): allow to disable DHT and use only LAN (#2751)

This allows LocalAI to be less noisy avoiding to connect outside.
Needed if e.g. there is no plan into using p2p across separate networks.

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
---
 core/p2p/p2p.go                                       |  8 ++++++--
 docs/content/docs/features/distributed_inferencing.md | 11 +++++++++++
 2 files changed, 17 insertions(+), 2 deletions(-)

diff --git a/core/p2p/p2p.go b/core/p2p/p2p.go
index 79e8051e..e0e46170 100644
--- a/core/p2p/p2p.go
+++ b/core/p2p/p2p.go
@@ -348,11 +348,15 @@ func newNodeOpts(token string) ([]node.Option, error) {
 	llger := logger.New(log.LevelFatal)
 	defaultInterval := 10 * time.Second
 
+	// TODO: move this up, expose more config options when creating a node
+	noDHT := os.Getenv("LOCALAI_P2P_DISABLE_DHT") == "true"
+	noLimits := os.Getenv("LOCALAI_P2P_DISABLE_LIMITS") == "true"
+
 	loglevel := "info"
 
 	c := config.Config{
 		Limit: config.ResourceLimit{
-			Enable:   true,
+			Enable:   !noLimits,
 			MaxConns: 100,
 		},
 		NetworkToken:   token,
@@ -372,7 +376,7 @@ func newNodeOpts(token string) ([]node.Option, error) {
 			RateLimitInterval: defaultInterval,
 		},
 		Discovery: config.Discovery{
-			DHT:      true,
+			DHT:      noDHT,
 			MDNS:     true,
 			Interval: 30 * time.Second,
 		},
diff --git a/docs/content/docs/features/distributed_inferencing.md b/docs/content/docs/features/distributed_inferencing.md
index 80a9a7cf..abe34373 100644
--- a/docs/content/docs/features/distributed_inferencing.md
+++ b/docs/content/docs/features/distributed_inferencing.md
@@ -98,3 +98,14 @@ The server logs should indicate that new workers are being discovered.
 - If running in p2p mode with container images, make sure you start the container with `--net host` or `network_mode: host` in the docker-compose file.
 - Only a single model is supported currently.
 - Ensure the server detects new workers before starting inference. Currently, additional workers cannot be added once inference has begun.
+
+
+## Environment Variables
+
+There are options that can be tweaked or parameters that can be set using environment variables
+
+| Environment Variable | Description |
+|----------------------|-------------|
+| **LOCALAI_P2P_DISABLE_DHT** | Set to "true" to disable DHT and enable p2p layer to be local only (mDNS) |
+| **LOCALAI_P2P_DISABLE_LIMITS** | Set to "true" to disable connection limits and resources management |
+| **LOCALAI_P2P_TOKEN** | Set the token for the p2p network |

From 2845baecd56c8caba4022d01d2d5432110a055e9 Mon Sep 17 00:00:00 2001
From: Ettore Di Giacinto <mudler@users.noreply.github.com>
Date: Tue, 9 Jul 2024 23:13:29 +0200
Subject: [PATCH 3/3] fix(cuda): downgrade default version from 12.5 to 12.4
 (#2707)

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
---
 .github/workflows/image-pr.yml    |  4 ++--
 .github/workflows/image.yml       | 16 ++++++++--------
 .github/workflows/image_build.yml |  2 +-
 .github/workflows/release.yaml    |  2 +-
 Dockerfile                        |  2 +-
 Makefile                          |  2 +-
 6 files changed, 14 insertions(+), 14 deletions(-)

diff --git a/.github/workflows/image-pr.yml b/.github/workflows/image-pr.yml
index 2452ef04..3e4d8e4d 100644
--- a/.github/workflows/image-pr.yml
+++ b/.github/workflows/image-pr.yml
@@ -46,7 +46,7 @@ jobs:
             makeflags: "--jobs=3 --output-sync=target"
           - build-type: 'cublas'
             cuda-major-version: "12"
-            cuda-minor-version: "5"
+            cuda-minor-version: "4"
             platforms: 'linux/amd64'
             tag-latest: 'false'
             tag-suffix: '-cublas-cuda12-ffmpeg'
@@ -119,7 +119,7 @@ jobs:
             makeflags: "--jobs=3 --output-sync=target"
           - build-type: 'cublas'
             cuda-major-version: "12"
-            cuda-minor-version: "5"
+            cuda-minor-version: "4"
             platforms: 'linux/amd64'
             tag-latest: 'false'
             tag-suffix: '-cublas-cuda12-ffmpeg-core'
diff --git a/.github/workflows/image.yml b/.github/workflows/image.yml
index 0a30e46f..73899e15 100644
--- a/.github/workflows/image.yml
+++ b/.github/workflows/image.yml
@@ -64,7 +64,7 @@ jobs:
             makeflags: "--jobs=3 --output-sync=target"
           - build-type: 'cublas'
             cuda-major-version: "11"
-            cuda-minor-version: "8"
+            cuda-minor-version: "7"
             platforms: 'linux/amd64'
             tag-latest: 'false'
             tag-suffix: '-cublas-cuda11'
@@ -75,7 +75,7 @@ jobs:
             makeflags: "--jobs=3 --output-sync=target"
           - build-type: 'cublas'
             cuda-major-version: "12"
-            cuda-minor-version: "5"
+            cuda-minor-version: "4"
             platforms: 'linux/amd64'
             tag-latest: 'false'
             tag-suffix: '-cublas-cuda12'
@@ -86,7 +86,7 @@ jobs:
             makeflags: "--jobs=3 --output-sync=target"
           - build-type: 'cublas'
             cuda-major-version: "11"
-            cuda-minor-version: "8"
+            cuda-minor-version: "7"
             platforms: 'linux/amd64'
             tag-latest: 'auto'
             tag-suffix: '-cublas-cuda11-ffmpeg'
@@ -100,7 +100,7 @@ jobs:
             makeflags: "--jobs=3 --output-sync=target"
           - build-type: 'cublas'
             cuda-major-version: "12"
-            cuda-minor-version: "5"
+            cuda-minor-version: "4"
             platforms: 'linux/amd64'
             tag-latest: 'auto'
             tag-suffix: '-cublas-cuda12-ffmpeg'
@@ -274,7 +274,7 @@ jobs:
             makeflags: "--jobs=4 --output-sync=target"
           - build-type: 'cublas'
             cuda-major-version: "11"
-            cuda-minor-version: "8"
+            cuda-minor-version: "7"
             platforms: 'linux/amd64'
             tag-latest: 'false'
             tag-suffix: '-cublas-cuda11-core'
@@ -285,7 +285,7 @@ jobs:
             makeflags: "--jobs=4 --output-sync=target"
           - build-type: 'cublas'
             cuda-major-version: "12"
-            cuda-minor-version: "5"
+            cuda-minor-version: "4"
             platforms: 'linux/amd64'
             tag-latest: 'false'
             tag-suffix: '-cublas-cuda12-core'
@@ -296,7 +296,7 @@ jobs:
             makeflags: "--jobs=4 --output-sync=target"
           - build-type: 'cublas'
             cuda-major-version: "11"
-            cuda-minor-version: "8"
+            cuda-minor-version: "7"
             platforms: 'linux/amd64'
             tag-latest: 'false'
             tag-suffix: '-cublas-cuda11-ffmpeg-core'
@@ -307,7 +307,7 @@ jobs:
             makeflags: "--jobs=4 --output-sync=target"
           - build-type: 'cublas'
             cuda-major-version: "12"
-            cuda-minor-version: "5"
+            cuda-minor-version: "4"
             platforms: 'linux/amd64'
             tag-latest: 'false'
             tag-suffix: '-cublas-cuda12-ffmpeg-core'
diff --git a/.github/workflows/image_build.yml b/.github/workflows/image_build.yml
index 53015565..c891d3dd 100644
--- a/.github/workflows/image_build.yml
+++ b/.github/workflows/image_build.yml
@@ -23,7 +23,7 @@ on:
         type: string
       cuda-minor-version:
         description: 'CUDA minor version'
-        default: "5"
+        default: "4"
         type: string
       platforms:
         description: 'Platforms'
diff --git a/.github/workflows/release.yaml b/.github/workflows/release.yaml
index efd33f83..86d60921 100644
--- a/.github/workflows/release.yaml
+++ b/.github/workflows/release.yaml
@@ -40,7 +40,7 @@ jobs:
           sudo apt-get update
           sudo apt-get install -y cuda-cross-aarch64 cuda-nvcc-cross-aarch64-${CUDA_VERSION} libcublas-cross-aarch64-${CUDA_VERSION}
         env:
-          CUDA_VERSION: 12-5
+          CUDA_VERSION: 12-4
       - name: Cache grpc
         id: cache-grpc
         uses: actions/cache@v4
diff --git a/Dockerfile b/Dockerfile
index ac42db5d..907ad54b 100644
--- a/Dockerfile
+++ b/Dockerfile
@@ -99,7 +99,7 @@ FROM requirements-${IMAGE_TYPE} AS requirements-drivers
 
 ARG BUILD_TYPE
 ARG CUDA_MAJOR_VERSION=12
-ARG CUDA_MINOR_VERSION=5
+ARG CUDA_MINOR_VERSION=4
 
 ENV BUILD_TYPE=${BUILD_TYPE}
 
diff --git a/Makefile b/Makefile
index e2ae5c49..d4ca1e4f 100644
--- a/Makefile
+++ b/Makefile
@@ -425,7 +425,7 @@ prepare-e2e:
 	mkdir -p $(TEST_DIR)
 	cp -rfv $(abspath ./tests/e2e-fixtures)/gpu.yaml $(TEST_DIR)/gpu.yaml
 	test -e $(TEST_DIR)/ggllm-test-model.bin || wget -q https://huggingface.co/TheBloke/CodeLlama-7B-Instruct-GGUF/resolve/main/codellama-7b-instruct.Q2_K.gguf -O $(TEST_DIR)/ggllm-test-model.bin
-	docker build --build-arg GRPC_BACKENDS="$(GRPC_BACKENDS)" --build-arg IMAGE_TYPE=core --build-arg BUILD_TYPE=$(BUILD_TYPE) --build-arg CUDA_MAJOR_VERSION=12 --build-arg CUDA_MINOR_VERSION=5 --build-arg FFMPEG=true -t localai-tests .
+	docker build --build-arg GRPC_BACKENDS="$(GRPC_BACKENDS)" --build-arg IMAGE_TYPE=core --build-arg BUILD_TYPE=$(BUILD_TYPE) --build-arg CUDA_MAJOR_VERSION=12 --build-arg CUDA_MINOR_VERSION=4 --build-arg FFMPEG=true -t localai-tests .
 
 run-e2e-image:
 	ls -liah $(abspath ./tests/e2e-fixtures)