mirror of
https://github.com/mudler/LocalAI.git
synced 2025-06-22 16:39:05 +00:00
Some checks failed
Explorer deployment / build-linux (push) Has been cancelled
GPU tests / ubuntu-latest (1.21.x) (push) Has been cancelled
generate and publish intel docker caches / generate_caches (intel/oneapi-basekit:2025.1.0-0-devel-ubuntu22.04, linux/amd64, ubuntu-latest) (push) Has been cancelled
build container images / hipblas-jobs (-aio-gpu-hipblas, rocm/dev-ubuntu-22.04:6.1, hipblas, true, ubuntu:22.04, core, latest-gpu-hipblas, latest-aio-gpu-hipblas, --jobs=3 --output-sync=target, linux/amd64, arc-runner-set, false, -hipblas) (push) Has been cancelled
build container images / core-image-build (-aio-cpu, ubuntu:22.04, , true, core, latest-cpu, latest-aio-cpu, --jobs=4 --output-sync=target, linux/amd64,linux/arm64, arc-runner-set, false, auto, ) (push) Has been cancelled
build container images / core-image-build (-aio-gpu-intel-f16, quay.io/go-skynet/intel-oneapi-base:latest, sycl_f16, true, ubuntu:22.04, core, latest-gpu-intel-f16, latest-aio-gpu-intel-f16, --jobs=3 --output-sync=target, linux/amd64, arc-runner-set, false, -sycl-f16) (push) Has been cancelled
build container images / core-image-build (-aio-gpu-intel-f32, quay.io/go-skynet/intel-oneapi-base:latest, sycl_f32, true, ubuntu:22.04, core, latest-gpu-intel-f32, latest-aio-gpu-intel-f32, --jobs=3 --output-sync=target, linux/amd64, arc-runner-set, false, -sycl-f32) (push) Has been cancelled
build container images / core-image-build (-aio-gpu-nvidia-cuda-11, ubuntu:22.04, cublas, 11, 7, true, core, latest-gpu-nvidia-cuda-11, latest-aio-gpu-nvidia-cuda-11, --jobs=4 --output-sync=target, linux/amd64, arc-runner-set, false, false, -cublas-cuda11) (push) Has been cancelled
build container images / core-image-build (-aio-gpu-nvidia-cuda-12, ubuntu:22.04, cublas, 12, 0, true, core, latest-gpu-nvidia-cuda-12, latest-aio-gpu-nvidia-cuda-12, --jobs=4 --output-sync=target, linux/amd64, arc-runner-set, false, false, -cublas-cuda12) (push) Has been cancelled
build container images / core-image-build (-aio-gpu-vulkan, ubuntu:22.04, vulkan, true, core, latest-gpu-vulkan, latest-aio-gpu-vulkan, --jobs=4 --output-sync=target, linux/amd64, arc-runner-set, false, false, -vulkan) (push) Has been cancelled
build container images / gh-runner (nvcr.io/nvidia/l4t-jetpack:r36.4.0, cublas, 12, 0, true, core, latest-nvidia-l4t-arm64, --jobs=4 --output-sync=target, linux/arm64, ubuntu-24.04-arm, true, false, -nvidia-l4t-arm64) (push) Has been cancelled
build python backend container images / backend-jobs (bark, quay.io/go-skynet/intel-oneapi-base:latest, sycl_f16, , , latest-gpu-intel-sycl-f16-bark, linux/amd64, arc-runner-set, true, -gpu-intel-sycl-f16-bark) (push) Has been cancelled
build python backend container images / backend-jobs (bark, quay.io/go-skynet/intel-oneapi-base:latest, sycl_f32, , , latest-gpu-intel-sycl-f32-bark, linux/amd64, arc-runner-set, true, -gpu-intel-sycl-f32-bark) (push) Has been cancelled
build python backend container images / backend-jobs (bark, rocm/dev-ubuntu-22.04:6.1, hipblas, , , latest-gpu-rocm-hipblas-bark, linux/amd64, arc-runner-set, true, -gpu-rocm-hipblas-bark) (push) Has been cancelled
build python backend container images / backend-jobs (bark, ubuntu:22.04, cublas, 11, 7, latest-gpu-nvidia-cuda-11-bark, linux/amd64, arc-runner-set, true, -gpu-nvidia-cuda-11-bark) (push) Has been cancelled
build python backend container images / backend-jobs (bark, ubuntu:22.04, cublas, 12, 0, latest-gpu-nvidia-cuda-12-bark, linux/amd64, arc-runner-set, true, -gpu-nvidia-cuda-12-bark) (push) Has been cancelled
build python backend container images / backend-jobs (chatterbox, ubuntu:22.04, cublas, 11, 7, latest-gpu-nvidia-cuda-11-chatterbox, linux/amd64, arc-runner-set, true, -gpu-nvidia-cuda-11-chatterbox) (push) Has been cancelled
build python backend container images / backend-jobs (chatterbox, ubuntu:22.04, cublas, 12, 0, latest-gpu-nvidia-cuda-12-chatterbox, linux/amd64, arc-runner-set, true, -gpu-nvidia-cuda-12-chatterbox) (push) Has been cancelled
build python backend container images / backend-jobs (coqui, quay.io/go-skynet/intel-oneapi-base:latest, sycl_f16, , , latest-gpu-intel-sycl-f16-coqui, linux/amd64, arc-runner-set, true, -gpu-intel-sycl-f16-coqui) (push) Has been cancelled
build python backend container images / backend-jobs (coqui, quay.io/go-skynet/intel-oneapi-base:latest, sycl_f32, , , latest-gpu-intel-sycl-f32-coqui, linux/amd64, arc-runner-set, true, -gpu-intel-sycl-f32-coqui) (push) Has been cancelled
build python backend container images / backend-jobs (coqui, rocm/dev-ubuntu-22.04:6.1, hipblas, , , latest-gpu-rocm-hipblas-coqui, linux/amd64, arc-runner-set, true, -gpu-rocm-hipblas-coqui) (push) Has been cancelled
build python backend container images / backend-jobs (coqui, ubuntu:22.04, cublas, 11, 7, latest-gpu-nvidia-cuda-11-coqui, linux/amd64, arc-runner-set, true, -gpu-nvidia-cuda-11-coqui) (push) Has been cancelled
build python backend container images / backend-jobs (coqui, ubuntu:22.04, cublas, 12, 0, latest-gpu-nvidia-cuda-12-coqui, linux/amd64, arc-runner-set, true, -gpu-nvidia-cuda-12-coqui) (push) Has been cancelled
build python backend container images / backend-jobs (diffusers, quay.io/go-skynet/intel-oneapi-base:latest, sycl_f32, , , latest-gpu-intel-sycl-f32-diffusers, linux/amd64, arc-runner-set, true, -gpu-intel-sycl-f32-diffusers) (push) Has been cancelled
build python backend container images / backend-jobs (diffusers, rocm/dev-ubuntu-22.04:6.1, hipblas, , , latest-gpu-rocm-hipblas-diffusers, linux/amd64, arc-runner-set, true, -gpu-rocm-hipblas-diffusers) (push) Has been cancelled
build python backend container images / backend-jobs (diffusers, ubuntu:22.04, cublas, 11, 7, latest-gpu-nvidia-cuda-11-diffusers, linux/amd64, arc-runner-set, true, -gpu-nvidia-cuda-11-diffusers) (push) Has been cancelled
build python backend container images / backend-jobs (diffusers, ubuntu:22.04, cublas, 12, 0, latest-gpu-nvidia-cuda-12-diffusers, linux/amd64, arc-runner-set, true, -gpu-nvidia-cuda-12-diffusers) (push) Has been cancelled
build python backend container images / backend-jobs (faster-whisper, quay.io/go-skynet/intel-oneapi-base:latest, sycl_f16, , , latest-gpu-intel-sycl-f16-faster-whisper, linux/amd64, arc-runner-set, true, -gpu-intel-sycl-f16-faster-whisper) (push) Has been cancelled
build python backend container images / backend-jobs (faster-whisper, quay.io/go-skynet/intel-oneapi-base:latest, sycl_f32, , , latest-gpu-intel-sycl-f32-faster-whisper, linux/amd64, arc-runner-set, true, -gpu-intel-sycl-f32-faster-whisper) (push) Has been cancelled
build python backend container images / backend-jobs (faster-whisper, rocm/dev-ubuntu-22.04:6.1, hipblas, , , latest-gpu-rocm-hipblas-faster-whisper, linux/amd64, arc-runner-set, true, -gpu-rocm-hipblas-faster-whisper) (push) Has been cancelled
build python backend container images / backend-jobs (faster-whisper, ubuntu:22.04, cublas, 11, 7, latest-gpu-nvidia-cuda-11-faster-whisper, linux/amd64, arc-runner-set, true, -gpu-nvidia-cuda-11-faster-whisper) (push) Has been cancelled
build python backend container images / backend-jobs (faster-whisper, ubuntu:22.04, cublas, 12, 0, latest-gpu-nvidia-cuda-12-faster-whisper, linux/amd64, arc-runner-set, true, -gpu-nvidia-cuda-12-faster-whisper) (push) Has been cancelled
build python backend container images / backend-jobs (kokoro, quay.io/go-skynet/intel-oneapi-base:latest, sycl_f16, , , latest-gpu-intel-sycl-f16-kokoro, linux/amd64, arc-runner-set, true, -gpu-intel-sycl-f16-kokoro) (push) Has been cancelled
build python backend container images / backend-jobs (kokoro, quay.io/go-skynet/intel-oneapi-base:latest, sycl_f32, , , latest-gpu-intel-sycl-f32-kokoro, linux/amd64, arc-runner-set, true, -gpu-intel-sycl-f32-kokoro) (push) Has been cancelled
build python backend container images / backend-jobs (kokoro, rocm/dev-ubuntu-22.04:6.1, hipblas, , , latest-gpu-rocm-hipblas-kokoro, linux/amd64, arc-runner-set, true, -gpu-rocm-hipblas-kokoro) (push) Has been cancelled
build python backend container images / backend-jobs (kokoro, ubuntu:22.04, cublas, 11, 7, latest-gpu-nvidia-cuda-11-kokoro, linux/amd64, arc-runner-set, true, -gpu-nvidia-cuda-11-kokoro) (push) Has been cancelled
build python backend container images / backend-jobs (kokoro, ubuntu:22.04, cublas, 12, 0, latest-gpu-nvidia-cuda-12-kokoro, linux/amd64, arc-runner-set, true, -gpu-nvidia-cuda-12-kokoro) (push) Has been cancelled
build python backend container images / backend-jobs (rerankers, quay.io/go-skynet/intel-oneapi-base:latest, sycl_f16, , , latest-gpu-intel-sycl-f16-rerankers, linux/amd64, arc-runner-set, true, -gpu-intel-sycl-f16-rerankers) (push) Has been cancelled
build python backend container images / backend-jobs (rerankers, quay.io/go-skynet/intel-oneapi-base:latest, sycl_f32, , , latest-gpu-intel-sycl-f32-rerankers, linux/amd64, arc-runner-set, true, -gpu-intel-sycl-f32-rerankers) (push) Has been cancelled
build python backend container images / backend-jobs (rerankers, rocm/dev-ubuntu-22.04:6.1, hipblas, , , latest-gpu-rocm-hipblas-rerankers, linux/amd64, arc-runner-set, true, -gpu-rocm-hipblas-rerankers) (push) Has been cancelled
build python backend container images / backend-jobs (rerankers, ubuntu:22.04, cublas, 11, 7, latest-gpu-nvidia-cuda-11-rerankers, linux/amd64, arc-runner-set, true, -gpu-nvidia-cuda-11-rerankers) (push) Has been cancelled
build python backend container images / backend-jobs (rerankers, ubuntu:22.04, cublas, 12, 0, latest-gpu-nvidia-cuda-12-rerankers, linux/amd64, arc-runner-set, true, -gpu-nvidia-cuda-12-rerankers) (push) Has been cancelled
build python backend container images / backend-jobs (transformers, quay.io/go-skynet/intel-oneapi-base:latest, sycl_f16, , , latest-gpu-intel-sycl-f16-transformers, linux/amd64, arc-runner-set, true, -gpu-intel-sycl-f16-transformers) (push) Has been cancelled
build python backend container images / backend-jobs (transformers, quay.io/go-skynet/intel-oneapi-base:latest, sycl_f32, , , latest-gpu-intel-sycl-f32-transformers, linux/amd64, arc-runner-set, true, -gpu-intel-sycl-f32-transformers) (push) Has been cancelled
build python backend container images / backend-jobs (transformers, rocm/dev-ubuntu-22.04:6.1, hipblas, , , latest-gpu-rocm-hipblas-transformers, linux/amd64, arc-runner-set, true, -gpu-rocm-hipblas-transformers) (push) Has been cancelled
build python backend container images / backend-jobs (transformers, ubuntu:22.04, cublas, 11, 7, latest-gpu-nvidia-cuda-11-transformers, linux/amd64, arc-runner-set, true, -gpu-nvidia-cuda-11-transformers) (push) Has been cancelled
build python backend container images / backend-jobs (transformers, ubuntu:22.04, cublas, 12, 0, latest-gpu-nvidia-cuda-12-transformers, linux/amd64, arc-runner-set, true, -gpu-nvidia-cuda-12-transformers) (push) Has been cancelled
build python backend container images / backend-jobs (vllm, quay.io/go-skynet/intel-oneapi-base:latest, sycl_f16, , , latest-gpu-intel-sycl-f16-vllm, linux/amd64, arc-runner-set, true, -gpu-intel-sycl-f16-vllm) (push) Has been cancelled
build python backend container images / backend-jobs (vllm, quay.io/go-skynet/intel-oneapi-base:latest, sycl_f32, , , latest-gpu-intel-sycl-f32-vllm, linux/amd64, arc-runner-set, true, -gpu-intel-sycl-f32-vllm) (push) Has been cancelled
build python backend container images / backend-jobs (vllm, rocm/dev-ubuntu-22.04:6.1, hipblas, , , latest-gpu-rocm-hipblas-vllm, linux/amd64, arc-runner-set, true, -gpu-rocm-hipblas-vllm) (push) Has been cancelled
build python backend container images / backend-jobs (vllm, ubuntu:22.04, cublas, 11, 7, latest-gpu-nvidia-cuda-11-vllm, linux/amd64, arc-runner-set, true, -gpu-nvidia-cuda-11-vllm) (push) Has been cancelled
build python backend container images / backend-jobs (vllm, ubuntu:22.04, cublas, 12, 0, latest-gpu-nvidia-cuda-12-vllm, linux/amd64, arc-runner-set, true, -gpu-nvidia-cuda-12-vllm) (push) Has been cancelled
Security Scan / tests (push) Has been cancelled
Tests extras backends / tests-transformers (push) Has been cancelled
Tests extras backends / tests-rerankers (push) Has been cancelled
Tests extras backends / tests-diffusers (push) Has been cancelled
Tests extras backends / tests-coqui (push) Has been cancelled
tests / tests-linux (1.21.x) (push) Has been cancelled
tests / tests-aio-container (push) Has been cancelled
tests / tests-apple (1.21.x) (push) Has been cancelled
Update swagger / swagger (push) Has been cancelled
Check if checksums are up-to-date / checksum_check (push) Has been cancelled
Bump dependencies / bump (mudler/LocalAI) (push) Has been cancelled
Bump dependencies / bump (main, PABannier/bark.cpp, BARKCPP_VERSION) (push) Has been cancelled
Bump dependencies / bump (master, ggml-org/llama.cpp, CPPLLAMA_VERSION) (push) Has been cancelled
Bump dependencies / bump (master, ggml-org/whisper.cpp, WHISPER_CPP_VERSION) (push) Has been cancelled
Bump dependencies / bump (master, leejet/stable-diffusion.cpp, STABLEDIFFUSION_GGML_VERSION) (push) Has been cancelled
Bump dependencies / bump (master, mudler/go-piper, PIPER_VERSION) (push) Has been cancelled
Bump dependencies / bump (master, mudler/go-stable-diffusion, STABLEDIFFUSION_VERSION) (push) Has been cancelled
* feat: Add backend gallery This PR add support to manage backends as similar to models. There is now available a backend gallery which can be used to install and remove extra backends. The backend gallery can be configured similarly as a model gallery, and API calls allows to install and remove new backends in runtime, and as well during the startup phase of LocalAI. Signed-off-by: Ettore Di Giacinto <mudler@localai.io> * Add backends docs Signed-off-by: Ettore Di Giacinto <mudler@localai.io> * wip: Backend Dockerfile for python backends Signed-off-by: Ettore Di Giacinto <mudler@localai.io> * feat: drop extras images, build python backends separately Signed-off-by: Ettore Di Giacinto <mudler@localai.io> * fixup on all backends Signed-off-by: Ettore Di Giacinto <mudler@localai.io> * test CI Signed-off-by: Ettore Di Giacinto <mudler@localai.io> * Tweaks Signed-off-by: Ettore Di Giacinto <mudler@localai.io> * Drop old backends leftovers Signed-off-by: Ettore Di Giacinto <mudler@localai.io> * Fixup CI Signed-off-by: Ettore Di Giacinto <mudler@localai.io> * Move dockerfile upper Signed-off-by: Ettore Di Giacinto <mudler@localai.io> * Fix proto Signed-off-by: Ettore Di Giacinto <mudler@localai.io> * Feature dropped for consistency - we prefer model galleries Signed-off-by: Ettore Di Giacinto <mudler@localai.io> * Add missing packages in the build image Signed-off-by: Ettore Di Giacinto <mudler@localai.io> * exllama is ponly available on cublas Signed-off-by: Ettore Di Giacinto <mudler@localai.io> * pin torch on chatterbox Signed-off-by: Ettore Di Giacinto <mudler@localai.io> * Fixups to index Signed-off-by: Ettore Di Giacinto <mudler@localai.io> * CI Signed-off-by: Ettore Di Giacinto <mudler@localai.io> * Debug CI * Install accellerators deps Signed-off-by: Ettore Di Giacinto <mudler@localai.io> * Add target arch * Add cuda minor version Signed-off-by: Ettore Di Giacinto <mudler@localai.io> * Use self-hosted runners Signed-off-by: Ettore Di Giacinto <mudler@localai.io> * ci: use quay for test images Signed-off-by: Ettore Di Giacinto <mudler@localai.io> * fixups for vllm and chatterbox Signed-off-by: Ettore Di Giacinto <mudler@localai.io> * Small fixups on CI Signed-off-by: Ettore Di Giacinto <mudler@localai.io> * chatterbox is only available for nvidia Signed-off-by: Ettore Di Giacinto <mudler@localai.io> * Simplify CI builds Signed-off-by: Ettore Di Giacinto <mudler@localai.io> * Adapt test, use qwen3 Signed-off-by: Ettore Di Giacinto <mudler@localai.io> * chore(model gallery): add jina-reranker-v1-tiny-en-gguf Signed-off-by: Ettore Di Giacinto <mudler@localai.io> * fix(gguf-parser): recover from potential panics that can happen while reading ggufs with gguf-parser Signed-off-by: Ettore Di Giacinto <mudler@localai.io> * Use reranker from llama.cpp in AIO images Signed-off-by: Ettore Di Giacinto <mudler@localai.io> * Limit concurrent jobs Signed-off-by: Ettore Di Giacinto <mudler@localai.io> --------- Signed-off-by: Ettore Di Giacinto <mudler@localai.io> Signed-off-by: Ettore Di Giacinto <mudler@users.noreply.github.com>
219 lines
4.8 KiB
Go
219 lines
4.8 KiB
Go
package model
|
|
|
|
import (
|
|
"context"
|
|
"fmt"
|
|
"maps"
|
|
"os"
|
|
"path/filepath"
|
|
"strings"
|
|
"sync"
|
|
"time"
|
|
|
|
"github.com/mudler/LocalAI/pkg/utils"
|
|
|
|
"github.com/rs/zerolog/log"
|
|
)
|
|
|
|
// new idea: what if we declare a struct of these here, and use a loop to check?
|
|
|
|
// TODO: Split ModelLoader and TemplateLoader? Just to keep things more organized. Left together to share a mutex until I look into that. Would split if we separate directories for .bin/.yaml and .tmpl
|
|
type ModelLoader struct {
|
|
ModelPath string
|
|
mu sync.Mutex
|
|
singletonLock sync.Mutex
|
|
singletonMode bool
|
|
models map[string]*Model
|
|
wd *WatchDog
|
|
externalBackends map[string]string
|
|
}
|
|
|
|
func NewModelLoader(modelPath string, singleActiveBackend bool) *ModelLoader {
|
|
nml := &ModelLoader{
|
|
ModelPath: modelPath,
|
|
models: make(map[string]*Model),
|
|
singletonMode: singleActiveBackend,
|
|
externalBackends: make(map[string]string),
|
|
}
|
|
|
|
return nml
|
|
}
|
|
|
|
func (ml *ModelLoader) SetWatchDog(wd *WatchDog) {
|
|
ml.wd = wd
|
|
}
|
|
|
|
func (ml *ModelLoader) ExistsInModelPath(s string) bool {
|
|
return utils.ExistsInPath(ml.ModelPath, s)
|
|
}
|
|
|
|
func (ml *ModelLoader) SetExternalBackend(name, uri string) {
|
|
ml.mu.Lock()
|
|
defer ml.mu.Unlock()
|
|
ml.externalBackends[name] = uri
|
|
}
|
|
|
|
func (ml *ModelLoader) DeleteExternalBackend(name string) {
|
|
ml.mu.Lock()
|
|
defer ml.mu.Unlock()
|
|
delete(ml.externalBackends, name)
|
|
}
|
|
|
|
func (ml *ModelLoader) GetExternalBackend(name string) string {
|
|
ml.mu.Lock()
|
|
defer ml.mu.Unlock()
|
|
return ml.externalBackends[name]
|
|
}
|
|
|
|
func (ml *ModelLoader) GetAllExternalBackends(o *Options) map[string]string {
|
|
backends := make(map[string]string)
|
|
maps.Copy(backends, ml.externalBackends)
|
|
if o != nil {
|
|
maps.Copy(backends, o.externalBackends)
|
|
}
|
|
return backends
|
|
}
|
|
|
|
var knownFilesToSkip []string = []string{
|
|
"MODEL_CARD",
|
|
"README",
|
|
"README.md",
|
|
}
|
|
|
|
var knownModelsNameSuffixToSkip []string = []string{
|
|
".tmpl",
|
|
".keep",
|
|
".yaml",
|
|
".yml",
|
|
".json",
|
|
".txt",
|
|
".pt",
|
|
".onnx",
|
|
".md",
|
|
".MD",
|
|
".DS_Store",
|
|
".",
|
|
".safetensors",
|
|
".partial",
|
|
".tar.gz",
|
|
}
|
|
|
|
const retryTimeout = time.Duration(2 * time.Minute)
|
|
|
|
func (ml *ModelLoader) ListFilesInModelPath() ([]string, error) {
|
|
files, err := os.ReadDir(ml.ModelPath)
|
|
if err != nil {
|
|
return []string{}, err
|
|
}
|
|
|
|
models := []string{}
|
|
FILE:
|
|
for _, file := range files {
|
|
|
|
for _, skip := range knownFilesToSkip {
|
|
if strings.EqualFold(file.Name(), skip) {
|
|
continue FILE
|
|
}
|
|
}
|
|
|
|
// Skip templates, YAML, .keep, .json, and .DS_Store files
|
|
for _, skip := range knownModelsNameSuffixToSkip {
|
|
if strings.HasSuffix(file.Name(), skip) {
|
|
continue FILE
|
|
}
|
|
}
|
|
|
|
// Skip directories
|
|
if file.IsDir() {
|
|
continue
|
|
}
|
|
|
|
models = append(models, file.Name())
|
|
}
|
|
|
|
return models, nil
|
|
}
|
|
|
|
func (ml *ModelLoader) ListModels() []*Model {
|
|
ml.mu.Lock()
|
|
defer ml.mu.Unlock()
|
|
|
|
models := []*Model{}
|
|
for _, model := range ml.models {
|
|
models = append(models, model)
|
|
}
|
|
|
|
return models
|
|
}
|
|
|
|
func (ml *ModelLoader) LoadModel(modelID, modelName string, loader func(string, string, string) (*Model, error)) (*Model, error) {
|
|
// Check if we already have a loaded model
|
|
if model := ml.CheckIsLoaded(modelID); model != nil {
|
|
return model, nil
|
|
}
|
|
|
|
// Load the model and keep it in memory for later use
|
|
modelFile := filepath.Join(ml.ModelPath, modelName)
|
|
log.Debug().Msgf("Loading model in memory from file: %s", modelFile)
|
|
|
|
ml.mu.Lock()
|
|
defer ml.mu.Unlock()
|
|
model, err := loader(modelID, modelName, modelFile)
|
|
if err != nil {
|
|
return nil, fmt.Errorf("failed to load model with internal loader: %s", err)
|
|
}
|
|
|
|
if model == nil {
|
|
return nil, fmt.Errorf("loader didn't return a model")
|
|
}
|
|
|
|
ml.models[modelID] = model
|
|
|
|
return model, nil
|
|
}
|
|
|
|
func (ml *ModelLoader) ShutdownModel(modelName string) error {
|
|
ml.mu.Lock()
|
|
defer ml.mu.Unlock()
|
|
|
|
return ml.deleteProcess(modelName)
|
|
}
|
|
|
|
func (ml *ModelLoader) CheckIsLoaded(s string) *Model {
|
|
ml.mu.Lock()
|
|
defer ml.mu.Unlock()
|
|
m, ok := ml.models[s]
|
|
if !ok {
|
|
return nil
|
|
}
|
|
|
|
log.Debug().Msgf("Model already loaded in memory: %s", s)
|
|
client := m.GRPC(false, ml.wd)
|
|
|
|
log.Debug().Msgf("Checking model availability (%s)", s)
|
|
cTimeout, cancel := context.WithTimeout(context.Background(), 2*time.Minute)
|
|
defer cancel()
|
|
|
|
alive, err := client.HealthCheck(cTimeout)
|
|
if !alive {
|
|
log.Warn().Msgf("GRPC Model not responding: %s", err.Error())
|
|
log.Warn().Msgf("Deleting the process in order to recreate it")
|
|
process := m.Process()
|
|
if process == nil {
|
|
log.Error().Msgf("Process not found for '%s' and the model is not responding anymore !", s)
|
|
return m
|
|
}
|
|
if !process.IsAlive() {
|
|
log.Debug().Msgf("GRPC Process is not responding: %s", s)
|
|
// stop and delete the process, this forces to re-load the model and re-create again the service
|
|
err := ml.deleteProcess(s)
|
|
if err != nil {
|
|
log.Error().Err(err).Str("process", s).Msg("error stopping process")
|
|
}
|
|
return nil
|
|
}
|
|
}
|
|
|
|
return m
|
|
}
|