From 96306a39a05894dee9ceb6a97f4215f45d359559 Mon Sep 17 00:00:00 2001 From: mintyleaf Date: Sat, 18 Jan 2025 11:58:38 +0400 Subject: [PATCH] chore(docs): extra-Usage and Machine-Tag docs (#4627) Rename LocalAI-Extra-Usage -> Extra-Usage, add MACHINE_TAG as cli flag option, add docs about extra-usage and machine-tag Signed-off-by: mintyleaf --- core/cli/run.go | 2 +- core/http/endpoints/openai/chat.go | 2 +- core/http/endpoints/openai/completion.go | 2 +- core/http/endpoints/openai/edit.go | 2 +- docs/content/docs/advanced/advanced-usage.md | 31 +++++++++++++++++++- 5 files changed, 34 insertions(+), 5 deletions(-) diff --git a/core/cli/run.go b/core/cli/run.go index b86fe2a6..279ff94b 100644 --- a/core/cli/run.go +++ b/core/cli/run.go @@ -70,7 +70,7 @@ type RunCMD struct { WatchdogBusyTimeout string `env:"LOCALAI_WATCHDOG_BUSY_TIMEOUT,WATCHDOG_BUSY_TIMEOUT" default:"5m" help:"Threshold beyond which a busy backend should be stopped" group:"backends"` Federated bool `env:"LOCALAI_FEDERATED,FEDERATED" help:"Enable federated instance" group:"federated"` DisableGalleryEndpoint bool `env:"LOCALAI_DISABLE_GALLERY_ENDPOINT,DISABLE_GALLERY_ENDPOINT" help:"Disable the gallery endpoints" group:"api"` - MachineTag string `env:"LOCALAI_MACHINE_TAG" help:"Add Machine-Tag header to each response which is useful to track the machine in the P2P network" group:"api"` + MachineTag string `env:"LOCALAI_MACHINE_TAG,MACHINE_TAG" help:"Add Machine-Tag header to each response which is useful to track the machine in the P2P network" group:"api"` LoadToMemory []string `env:"LOCALAI_LOAD_TO_MEMORY,LOAD_TO_MEMORY" help:"A list of models to load into memory at startup" group:"models"` } diff --git a/core/http/endpoints/openai/chat.go b/core/http/endpoints/openai/chat.go index cbce369a..3b8d3056 100644 --- a/core/http/endpoints/openai/chat.go +++ b/core/http/endpoints/openai/chat.go @@ -182,7 +182,7 @@ func ChatEndpoint(cl *config.BackendConfigLoader, ml *model.ModelLoader, evaluat c.Set("X-Correlation-ID", correlationID) // Opt-in extra usage flag - extraUsage := c.Get("LocalAI-Extra-Usage", "") != "" + extraUsage := c.Get("Extra-Usage", "") != "" modelFile, input, err := readRequest(c, cl, ml, startupOptions, true) if err != nil { diff --git a/core/http/endpoints/openai/completion.go b/core/http/endpoints/openai/completion.go index 339e9bc2..a353a0a1 100644 --- a/core/http/endpoints/openai/completion.go +++ b/core/http/endpoints/openai/completion.go @@ -67,7 +67,7 @@ func CompletionEndpoint(cl *config.BackendConfigLoader, ml *model.ModelLoader, e c.Set("X-Correlation-ID", id) // Opt-in extra usage flag - extraUsage := c.Get("LocalAI-Extra-Usage", "") != "" + extraUsage := c.Get("Extra-Usage", "") != "" modelFile, input, err := readRequest(c, cl, ml, appConfig, true) if err != nil { diff --git a/core/http/endpoints/openai/edit.go b/core/http/endpoints/openai/edit.go index e10a12d1..28a3597c 100644 --- a/core/http/endpoints/openai/edit.go +++ b/core/http/endpoints/openai/edit.go @@ -26,7 +26,7 @@ func EditEndpoint(cl *config.BackendConfigLoader, ml *model.ModelLoader, evaluat return func(c *fiber.Ctx) error { // Opt-in extra usage flag - extraUsage := c.Get("LocalAI-Extra-Usage", "") != "" + extraUsage := c.Get("Extra-Usage", "") != "" modelFile, input, err := readRequest(c, cl, ml, appConfig, true) if err != nil { diff --git a/docs/content/docs/advanced/advanced-usage.md b/docs/content/docs/advanced/advanced-usage.md index 35d3a2e4..dd9894ef 100644 --- a/docs/content/docs/advanced/advanced-usage.md +++ b/docs/content/docs/advanced/advanced-usage.md @@ -520,6 +520,7 @@ In the help text below, BASEPATH is the location that local-ai is being executed | --upload-limit | 15 | Default upload-limit in MB | $LOCALAI_UPLOAD_LIMIT | | --api-keys | API-KEYS,... | List of API Keys to enable API authentication. When this is set, all the requests must be authenticated with one of these API keys | $LOCALAI_API_KEY | | --disable-welcome | | Disable welcome pages | $LOCALAI_DISABLE_WELCOME | +| --machine-tag | | If not empty - put that string to Machine-Tag header in each response. Useful to track response from different machines using multiple P2P federated nodes | $LOCALAI_MACHINE_TAG | #### Backend Flags | Parameter | Default | Description | Environment Variable | @@ -553,6 +554,34 @@ LOCALAI_MODELS_PATH=/mnt/storage/localai/models LOCALAI_F16=true ``` +### Request headers + +You can use 'Extra-Usage' request header key presence ('Extra-Usage: true') to receive inference timings in milliseconds extending default OpenAI response model in the usage field: +``` +... +{ + "id": "...", + "created": ..., + "model": "...", + "choices": [ + { + ... + }, + ... + ], + "object": "...", + "usage": { + "prompt_tokens": ..., + "completion_tokens": ..., + "total_tokens": ..., + // Extra-Usage header key will include these two float fields: + "timing_prompt_processing: ..., + "timing_token_generation": ..., + }, +} +... +``` + ### Extra backends LocalAI can be extended with extra backends. The backends are implemented as `gRPC` services and can be written in any language. The container images that are built and published on [quay.io](https://quay.io/repository/go-skynet/local-ai?tab=tags) contain a set of images split in core and extra. By default Images bring all the dependencies and backends supported by LocalAI (we call those `extra` images). The `-core` images instead bring only the strictly necessary dependencies to run LocalAI without only a core set of backends. @@ -616,4 +645,4 @@ Note that, for llama.cpp you need to set accordingly `LLAMACPP_PARALLEL` to the LocalAI will automatically discover the CPU flagset available in your host and will use the most optimized version of the backends. -If you want to disable this behavior, you can set `DISABLE_AUTODETECT` to `true` in the environment variables. \ No newline at end of file +If you want to disable this behavior, you can set `DISABLE_AUTODETECT` to `true` in the environment variables.