Files
LocalAI/core/http/routes/localai.go
Ettore Di Giacinto 2d64269763
Some checks failed
Explorer deployment / build-linux (push) Has been cancelled
GPU tests / ubuntu-latest (1.21.x) (push) Has been cancelled
generate and publish intel docker caches / generate_caches (intel/oneapi-basekit:2025.1.0-0-devel-ubuntu22.04, linux/amd64, ubuntu-latest) (push) Has been cancelled
build container images / hipblas-jobs (-aio-gpu-hipblas, rocm/dev-ubuntu-22.04:6.1, hipblas, true, ubuntu:22.04, core, latest-gpu-hipblas, latest-aio-gpu-hipblas, --jobs=3 --output-sync=target, linux/amd64, arc-runner-set, false, -hipblas) (push) Has been cancelled
build container images / core-image-build (-aio-cpu, ubuntu:22.04, , true, core, latest-cpu, latest-aio-cpu, --jobs=4 --output-sync=target, linux/amd64,linux/arm64, arc-runner-set, false, auto, ) (push) Has been cancelled
build container images / core-image-build (-aio-gpu-intel-f16, quay.io/go-skynet/intel-oneapi-base:latest, sycl_f16, true, ubuntu:22.04, core, latest-gpu-intel-f16, latest-aio-gpu-intel-f16, --jobs=3 --output-sync=target, linux/amd64, arc-runner-set, false, -sycl-f16) (push) Has been cancelled
build container images / core-image-build (-aio-gpu-intel-f32, quay.io/go-skynet/intel-oneapi-base:latest, sycl_f32, true, ubuntu:22.04, core, latest-gpu-intel-f32, latest-aio-gpu-intel-f32, --jobs=3 --output-sync=target, linux/amd64, arc-runner-set, false, -sycl-f32) (push) Has been cancelled
build container images / core-image-build (-aio-gpu-nvidia-cuda-11, ubuntu:22.04, cublas, 11, 7, true, core, latest-gpu-nvidia-cuda-11, latest-aio-gpu-nvidia-cuda-11, --jobs=4 --output-sync=target, linux/amd64, arc-runner-set, false, false, -cublas-cuda11) (push) Has been cancelled
build container images / core-image-build (-aio-gpu-nvidia-cuda-12, ubuntu:22.04, cublas, 12, 0, true, core, latest-gpu-nvidia-cuda-12, latest-aio-gpu-nvidia-cuda-12, --jobs=4 --output-sync=target, linux/amd64, arc-runner-set, false, false, -cublas-cuda12) (push) Has been cancelled
build container images / core-image-build (-aio-gpu-vulkan, ubuntu:22.04, vulkan, true, core, latest-gpu-vulkan, latest-aio-gpu-vulkan, --jobs=4 --output-sync=target, linux/amd64, arc-runner-set, false, false, -vulkan) (push) Has been cancelled
build container images / gh-runner (nvcr.io/nvidia/l4t-jetpack:r36.4.0, cublas, 12, 0, true, core, latest-nvidia-l4t-arm64, --jobs=4 --output-sync=target, linux/arm64, ubuntu-24.04-arm, true, false, -nvidia-l4t-arm64) (push) Has been cancelled
build python backend container images / backend-jobs (bark, quay.io/go-skynet/intel-oneapi-base:latest, sycl_f16, , , latest-gpu-intel-sycl-f16-bark, linux/amd64, arc-runner-set, true, -gpu-intel-sycl-f16-bark) (push) Has been cancelled
build python backend container images / backend-jobs (bark, quay.io/go-skynet/intel-oneapi-base:latest, sycl_f32, , , latest-gpu-intel-sycl-f32-bark, linux/amd64, arc-runner-set, true, -gpu-intel-sycl-f32-bark) (push) Has been cancelled
build python backend container images / backend-jobs (bark, rocm/dev-ubuntu-22.04:6.1, hipblas, , , latest-gpu-rocm-hipblas-bark, linux/amd64, arc-runner-set, true, -gpu-rocm-hipblas-bark) (push) Has been cancelled
build python backend container images / backend-jobs (bark, ubuntu:22.04, cublas, 11, 7, latest-gpu-nvidia-cuda-11-bark, linux/amd64, arc-runner-set, true, -gpu-nvidia-cuda-11-bark) (push) Has been cancelled
build python backend container images / backend-jobs (bark, ubuntu:22.04, cublas, 12, 0, latest-gpu-nvidia-cuda-12-bark, linux/amd64, arc-runner-set, true, -gpu-nvidia-cuda-12-bark) (push) Has been cancelled
build python backend container images / backend-jobs (chatterbox, ubuntu:22.04, cublas, 11, 7, latest-gpu-nvidia-cuda-11-chatterbox, linux/amd64, arc-runner-set, true, -gpu-nvidia-cuda-11-chatterbox) (push) Has been cancelled
build python backend container images / backend-jobs (chatterbox, ubuntu:22.04, cublas, 12, 0, latest-gpu-nvidia-cuda-12-chatterbox, linux/amd64, arc-runner-set, true, -gpu-nvidia-cuda-12-chatterbox) (push) Has been cancelled
build python backend container images / backend-jobs (coqui, quay.io/go-skynet/intel-oneapi-base:latest, sycl_f16, , , latest-gpu-intel-sycl-f16-coqui, linux/amd64, arc-runner-set, true, -gpu-intel-sycl-f16-coqui) (push) Has been cancelled
build python backend container images / backend-jobs (coqui, quay.io/go-skynet/intel-oneapi-base:latest, sycl_f32, , , latest-gpu-intel-sycl-f32-coqui, linux/amd64, arc-runner-set, true, -gpu-intel-sycl-f32-coqui) (push) Has been cancelled
build python backend container images / backend-jobs (coqui, rocm/dev-ubuntu-22.04:6.1, hipblas, , , latest-gpu-rocm-hipblas-coqui, linux/amd64, arc-runner-set, true, -gpu-rocm-hipblas-coqui) (push) Has been cancelled
build python backend container images / backend-jobs (coqui, ubuntu:22.04, cublas, 11, 7, latest-gpu-nvidia-cuda-11-coqui, linux/amd64, arc-runner-set, true, -gpu-nvidia-cuda-11-coqui) (push) Has been cancelled
build python backend container images / backend-jobs (coqui, ubuntu:22.04, cublas, 12, 0, latest-gpu-nvidia-cuda-12-coqui, linux/amd64, arc-runner-set, true, -gpu-nvidia-cuda-12-coqui) (push) Has been cancelled
build python backend container images / backend-jobs (diffusers, quay.io/go-skynet/intel-oneapi-base:latest, sycl_f32, , , latest-gpu-intel-sycl-f32-diffusers, linux/amd64, arc-runner-set, true, -gpu-intel-sycl-f32-diffusers) (push) Has been cancelled
build python backend container images / backend-jobs (diffusers, rocm/dev-ubuntu-22.04:6.1, hipblas, , , latest-gpu-rocm-hipblas-diffusers, linux/amd64, arc-runner-set, true, -gpu-rocm-hipblas-diffusers) (push) Has been cancelled
build python backend container images / backend-jobs (diffusers, ubuntu:22.04, cublas, 11, 7, latest-gpu-nvidia-cuda-11-diffusers, linux/amd64, arc-runner-set, true, -gpu-nvidia-cuda-11-diffusers) (push) Has been cancelled
build python backend container images / backend-jobs (diffusers, ubuntu:22.04, cublas, 12, 0, latest-gpu-nvidia-cuda-12-diffusers, linux/amd64, arc-runner-set, true, -gpu-nvidia-cuda-12-diffusers) (push) Has been cancelled
build python backend container images / backend-jobs (faster-whisper, quay.io/go-skynet/intel-oneapi-base:latest, sycl_f16, , , latest-gpu-intel-sycl-f16-faster-whisper, linux/amd64, arc-runner-set, true, -gpu-intel-sycl-f16-faster-whisper) (push) Has been cancelled
build python backend container images / backend-jobs (faster-whisper, quay.io/go-skynet/intel-oneapi-base:latest, sycl_f32, , , latest-gpu-intel-sycl-f32-faster-whisper, linux/amd64, arc-runner-set, true, -gpu-intel-sycl-f32-faster-whisper) (push) Has been cancelled
build python backend container images / backend-jobs (faster-whisper, rocm/dev-ubuntu-22.04:6.1, hipblas, , , latest-gpu-rocm-hipblas-faster-whisper, linux/amd64, arc-runner-set, true, -gpu-rocm-hipblas-faster-whisper) (push) Has been cancelled
build python backend container images / backend-jobs (faster-whisper, ubuntu:22.04, cublas, 11, 7, latest-gpu-nvidia-cuda-11-faster-whisper, linux/amd64, arc-runner-set, true, -gpu-nvidia-cuda-11-faster-whisper) (push) Has been cancelled
build python backend container images / backend-jobs (faster-whisper, ubuntu:22.04, cublas, 12, 0, latest-gpu-nvidia-cuda-12-faster-whisper, linux/amd64, arc-runner-set, true, -gpu-nvidia-cuda-12-faster-whisper) (push) Has been cancelled
build python backend container images / backend-jobs (kokoro, quay.io/go-skynet/intel-oneapi-base:latest, sycl_f16, , , latest-gpu-intel-sycl-f16-kokoro, linux/amd64, arc-runner-set, true, -gpu-intel-sycl-f16-kokoro) (push) Has been cancelled
build python backend container images / backend-jobs (kokoro, quay.io/go-skynet/intel-oneapi-base:latest, sycl_f32, , , latest-gpu-intel-sycl-f32-kokoro, linux/amd64, arc-runner-set, true, -gpu-intel-sycl-f32-kokoro) (push) Has been cancelled
build python backend container images / backend-jobs (kokoro, rocm/dev-ubuntu-22.04:6.1, hipblas, , , latest-gpu-rocm-hipblas-kokoro, linux/amd64, arc-runner-set, true, -gpu-rocm-hipblas-kokoro) (push) Has been cancelled
build python backend container images / backend-jobs (kokoro, ubuntu:22.04, cublas, 11, 7, latest-gpu-nvidia-cuda-11-kokoro, linux/amd64, arc-runner-set, true, -gpu-nvidia-cuda-11-kokoro) (push) Has been cancelled
build python backend container images / backend-jobs (kokoro, ubuntu:22.04, cublas, 12, 0, latest-gpu-nvidia-cuda-12-kokoro, linux/amd64, arc-runner-set, true, -gpu-nvidia-cuda-12-kokoro) (push) Has been cancelled
build python backend container images / backend-jobs (rerankers, quay.io/go-skynet/intel-oneapi-base:latest, sycl_f16, , , latest-gpu-intel-sycl-f16-rerankers, linux/amd64, arc-runner-set, true, -gpu-intel-sycl-f16-rerankers) (push) Has been cancelled
build python backend container images / backend-jobs (rerankers, quay.io/go-skynet/intel-oneapi-base:latest, sycl_f32, , , latest-gpu-intel-sycl-f32-rerankers, linux/amd64, arc-runner-set, true, -gpu-intel-sycl-f32-rerankers) (push) Has been cancelled
build python backend container images / backend-jobs (rerankers, rocm/dev-ubuntu-22.04:6.1, hipblas, , , latest-gpu-rocm-hipblas-rerankers, linux/amd64, arc-runner-set, true, -gpu-rocm-hipblas-rerankers) (push) Has been cancelled
build python backend container images / backend-jobs (rerankers, ubuntu:22.04, cublas, 11, 7, latest-gpu-nvidia-cuda-11-rerankers, linux/amd64, arc-runner-set, true, -gpu-nvidia-cuda-11-rerankers) (push) Has been cancelled
build python backend container images / backend-jobs (rerankers, ubuntu:22.04, cublas, 12, 0, latest-gpu-nvidia-cuda-12-rerankers, linux/amd64, arc-runner-set, true, -gpu-nvidia-cuda-12-rerankers) (push) Has been cancelled
build python backend container images / backend-jobs (transformers, quay.io/go-skynet/intel-oneapi-base:latest, sycl_f16, , , latest-gpu-intel-sycl-f16-transformers, linux/amd64, arc-runner-set, true, -gpu-intel-sycl-f16-transformers) (push) Has been cancelled
build python backend container images / backend-jobs (transformers, quay.io/go-skynet/intel-oneapi-base:latest, sycl_f32, , , latest-gpu-intel-sycl-f32-transformers, linux/amd64, arc-runner-set, true, -gpu-intel-sycl-f32-transformers) (push) Has been cancelled
build python backend container images / backend-jobs (transformers, rocm/dev-ubuntu-22.04:6.1, hipblas, , , latest-gpu-rocm-hipblas-transformers, linux/amd64, arc-runner-set, true, -gpu-rocm-hipblas-transformers) (push) Has been cancelled
build python backend container images / backend-jobs (transformers, ubuntu:22.04, cublas, 11, 7, latest-gpu-nvidia-cuda-11-transformers, linux/amd64, arc-runner-set, true, -gpu-nvidia-cuda-11-transformers) (push) Has been cancelled
build python backend container images / backend-jobs (transformers, ubuntu:22.04, cublas, 12, 0, latest-gpu-nvidia-cuda-12-transformers, linux/amd64, arc-runner-set, true, -gpu-nvidia-cuda-12-transformers) (push) Has been cancelled
build python backend container images / backend-jobs (vllm, quay.io/go-skynet/intel-oneapi-base:latest, sycl_f16, , , latest-gpu-intel-sycl-f16-vllm, linux/amd64, arc-runner-set, true, -gpu-intel-sycl-f16-vllm) (push) Has been cancelled
build python backend container images / backend-jobs (vllm, quay.io/go-skynet/intel-oneapi-base:latest, sycl_f32, , , latest-gpu-intel-sycl-f32-vllm, linux/amd64, arc-runner-set, true, -gpu-intel-sycl-f32-vllm) (push) Has been cancelled
build python backend container images / backend-jobs (vllm, rocm/dev-ubuntu-22.04:6.1, hipblas, , , latest-gpu-rocm-hipblas-vllm, linux/amd64, arc-runner-set, true, -gpu-rocm-hipblas-vllm) (push) Has been cancelled
build python backend container images / backend-jobs (vllm, ubuntu:22.04, cublas, 11, 7, latest-gpu-nvidia-cuda-11-vllm, linux/amd64, arc-runner-set, true, -gpu-nvidia-cuda-11-vllm) (push) Has been cancelled
build python backend container images / backend-jobs (vllm, ubuntu:22.04, cublas, 12, 0, latest-gpu-nvidia-cuda-12-vllm, linux/amd64, arc-runner-set, true, -gpu-nvidia-cuda-12-vllm) (push) Has been cancelled
Security Scan / tests (push) Has been cancelled
Tests extras backends / tests-transformers (push) Has been cancelled
Tests extras backends / tests-rerankers (push) Has been cancelled
Tests extras backends / tests-diffusers (push) Has been cancelled
Tests extras backends / tests-coqui (push) Has been cancelled
tests / tests-linux (1.21.x) (push) Has been cancelled
tests / tests-aio-container (push) Has been cancelled
tests / tests-apple (1.21.x) (push) Has been cancelled
Update swagger / swagger (push) Has been cancelled
Check if checksums are up-to-date / checksum_check (push) Has been cancelled
Bump dependencies / bump (mudler/LocalAI) (push) Has been cancelled
Bump dependencies / bump (main, PABannier/bark.cpp, BARKCPP_VERSION) (push) Has been cancelled
Bump dependencies / bump (master, ggml-org/llama.cpp, CPPLLAMA_VERSION) (push) Has been cancelled
Bump dependencies / bump (master, ggml-org/whisper.cpp, WHISPER_CPP_VERSION) (push) Has been cancelled
Bump dependencies / bump (master, leejet/stable-diffusion.cpp, STABLEDIFFUSION_GGML_VERSION) (push) Has been cancelled
Bump dependencies / bump (master, mudler/go-piper, PIPER_VERSION) (push) Has been cancelled
Bump dependencies / bump (master, mudler/go-stable-diffusion, STABLEDIFFUSION_VERSION) (push) Has been cancelled
feat: Add backend gallery (#5607)
* feat: Add backend gallery

This PR add support to manage backends as similar to models. There is
now available a backend gallery which can be used to install and remove
extra backends.
The backend gallery can be configured similarly as a model gallery, and
API calls allows to install and remove new backends in runtime, and as
well during the startup phase of LocalAI.

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>

* Add backends docs

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>

* wip: Backend Dockerfile for python backends

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>

* feat: drop extras images, build python backends separately

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>

* fixup on all backends

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>

* test CI

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>

* Tweaks

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>

* Drop old backends leftovers

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>

* Fixup CI

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>

* Move dockerfile upper

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>

* Fix proto

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>

* Feature dropped for consistency - we prefer model galleries

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>

* Add missing packages in the build image

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>

* exllama is ponly available on cublas

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>

* pin torch on chatterbox

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>

* Fixups to index

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>

* CI

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>

* Debug CI

* Install accellerators deps

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>

* Add target arch

* Add cuda minor version

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>

* Use self-hosted runners

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>

* ci: use quay for test images

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>

* fixups for vllm and chatterbox

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>

* Small fixups on CI

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>

* chatterbox is only available for nvidia

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>

* Simplify CI builds

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>

* Adapt test, use qwen3

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>

* chore(model gallery): add jina-reranker-v1-tiny-en-gguf

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>

* fix(gguf-parser): recover from potential panics that can happen while reading ggufs with gguf-parser

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>

* Use reranker from llama.cpp in AIO images

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>

* Limit concurrent jobs

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>

---------

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
Signed-off-by: Ettore Di Giacinto <mudler@users.noreply.github.com>
2025-06-15 14:56:52 +02:00

103 lines
4.7 KiB
Go

package routes
import (
"github.com/gofiber/fiber/v2"
"github.com/gofiber/swagger"
"github.com/mudler/LocalAI/core/config"
"github.com/mudler/LocalAI/core/http/endpoints/localai"
"github.com/mudler/LocalAI/core/http/middleware"
"github.com/mudler/LocalAI/core/p2p"
"github.com/mudler/LocalAI/core/schema"
"github.com/mudler/LocalAI/core/services"
"github.com/mudler/LocalAI/internal"
"github.com/mudler/LocalAI/pkg/model"
)
func RegisterLocalAIRoutes(router *fiber.App,
requestExtractor *middleware.RequestExtractor,
cl *config.BackendConfigLoader,
ml *model.ModelLoader,
appConfig *config.ApplicationConfig,
galleryService *services.GalleryService) {
router.Get("/swagger/*", swagger.HandlerDefault) // default
// LocalAI API endpoints
if !appConfig.DisableGalleryEndpoint {
modelGalleryEndpointService := localai.CreateModelGalleryEndpointService(appConfig.Galleries, appConfig.ModelPath, galleryService)
router.Post("/models/apply", modelGalleryEndpointService.ApplyModelGalleryEndpoint())
router.Post("/models/delete/:name", modelGalleryEndpointService.DeleteModelGalleryEndpoint())
router.Get("/models/available", modelGalleryEndpointService.ListModelFromGalleryEndpoint())
router.Get("/models/galleries", modelGalleryEndpointService.ListModelGalleriesEndpoint())
router.Get("/models/jobs/:uuid", modelGalleryEndpointService.GetOpStatusEndpoint())
router.Get("/models/jobs", modelGalleryEndpointService.GetAllStatusEndpoint())
backendGalleryEndpointService := localai.CreateBackendEndpointService(appConfig.BackendGalleries, appConfig.BackendsPath, galleryService)
router.Post("/backends/apply", backendGalleryEndpointService.ApplyBackendEndpoint())
router.Post("/backends/delete/:name", backendGalleryEndpointService.DeleteBackendEndpoint())
router.Get("/backends", backendGalleryEndpointService.ListBackendsEndpoint())
router.Get("/backends/available", backendGalleryEndpointService.ListAvailableBackendsEndpoint())
router.Get("/backends/galleries", backendGalleryEndpointService.ListBackendGalleriesEndpoint())
router.Get("/backends/jobs/:uuid", backendGalleryEndpointService.GetOpStatusEndpoint())
}
router.Post("/tts",
requestExtractor.BuildFilteredFirstAvailableDefaultModel(config.BuildUsecaseFilterFn(config.FLAG_TTS)),
requestExtractor.SetModelAndConfig(func() schema.LocalAIRequest { return new(schema.TTSRequest) }),
localai.TTSEndpoint(cl, ml, appConfig))
vadChain := []fiber.Handler{
requestExtractor.BuildFilteredFirstAvailableDefaultModel(config.BuildUsecaseFilterFn(config.FLAG_VAD)),
requestExtractor.SetModelAndConfig(func() schema.LocalAIRequest { return new(schema.VADRequest) }),
localai.VADEndpoint(cl, ml, appConfig),
}
router.Post("/vad", vadChain...)
router.Post("/v1/vad", vadChain...)
// Stores
router.Post("/stores/set", localai.StoresSetEndpoint(ml, appConfig))
router.Post("/stores/delete", localai.StoresDeleteEndpoint(ml, appConfig))
router.Post("/stores/get", localai.StoresGetEndpoint(ml, appConfig))
router.Post("/stores/find", localai.StoresFindEndpoint(ml, appConfig))
if !appConfig.DisableMetrics {
router.Get("/metrics", localai.LocalAIMetricsEndpoint())
}
router.Post("/video",
requestExtractor.BuildFilteredFirstAvailableDefaultModel(config.BuildUsecaseFilterFn(config.FLAG_VIDEO)),
requestExtractor.SetModelAndConfig(func() schema.LocalAIRequest { return new(schema.VideoRequest) }),
localai.VideoEndpoint(cl, ml, appConfig))
// Backend Statistics Module
// TODO: Should these use standard middlewares? Refactor later, they are extremely simple.
backendMonitorService := services.NewBackendMonitorService(ml, cl, appConfig) // Split out for now
router.Get("/backend/monitor", localai.BackendMonitorEndpoint(backendMonitorService))
router.Post("/backend/shutdown", localai.BackendShutdownEndpoint(backendMonitorService))
// The v1/* urls are exactly the same as above - makes local e2e testing easier if they are registered.
router.Get("/v1/backend/monitor", localai.BackendMonitorEndpoint(backendMonitorService))
router.Post("/v1/backend/shutdown", localai.BackendShutdownEndpoint(backendMonitorService))
// p2p
if p2p.IsP2PEnabled() {
router.Get("/api/p2p", localai.ShowP2PNodes(appConfig))
router.Get("/api/p2p/token", localai.ShowP2PToken(appConfig))
}
router.Get("/version", func(c *fiber.Ctx) error {
return c.JSON(struct {
Version string `json:"version"`
}{Version: internal.PrintableVersion()})
})
router.Get("/system", localai.SystemInformations(ml, appConfig))
// misc
router.Post("/v1/tokenize",
requestExtractor.BuildFilteredFirstAvailableDefaultModel(config.BuildUsecaseFilterFn(config.FLAG_TOKENIZE)),
requestExtractor.SetModelAndConfig(func() schema.LocalAIRequest { return new(schema.TokenizeRequest) }),
localai.TokenizeEndpoint(cl, ml, appConfig))
}