2023-04-12 00:02:39 +02:00
package model
2023-04-07 11:30:59 +02:00
import (
2023-07-15 01:19:43 +02:00
"context"
2023-04-07 11:30:59 +02:00
"fmt"
"os"
"path/filepath"
2023-04-10 12:02:40 +02:00
"strings"
2023-04-07 11:30:59 +02:00
"sync"
2024-08-30 15:20:39 +02:00
"time"
2023-04-07 11:30:59 +02:00
2024-06-23 01:24:36 -07:00
"github.com/mudler/LocalAI/pkg/utils"
2024-04-19 04:40:18 +02:00
2023-05-11 01:12:58 +02:00
"github.com/rs/zerolog/log"
2023-04-07 11:30:59 +02:00
)
2023-07-22 11:31:39 -04:00
// new idea: what if we declare a struct of these here, and use a loop to check?
// TODO: Split ModelLoader and TemplateLoader? Just to keep things more organized. Left together to share a mutex until I look into that. Would split if we seperate directories for .bin/.yaml and .tmpl
2023-04-07 11:30:59 +02:00
type ModelLoader struct {
2024-09-26 12:44:55 +02:00
ModelPath string
mu sync . Mutex
models map [ string ] * Model
wd * WatchDog
2023-04-07 11:30:59 +02:00
}
func NewModelLoader ( modelPath string ) * ModelLoader {
2023-07-22 11:31:39 -04:00
nml := & ModelLoader {
2024-09-26 12:44:55 +02:00
ModelPath : modelPath ,
models : make ( map [ string ] * Model ) ,
2023-04-21 00:06:55 +02:00
}
2023-11-26 18:36:23 +01:00
2023-07-22 11:31:39 -04:00
return nml
2023-04-07 11:30:59 +02:00
}
2023-11-26 18:36:23 +01:00
func ( ml * ModelLoader ) SetWatchDog ( wd * WatchDog ) {
ml . wd = wd
}
2023-04-20 18:33:02 +02:00
func ( ml * ModelLoader ) ExistsInModelPath ( s string ) bool {
2024-04-19 04:40:18 +02:00
return utils . ExistsInPath ( ml . ModelPath , s )
2023-04-20 18:33:02 +02:00
}
2024-07-10 15:28:39 +02:00
var knownFilesToSkip [ ] string = [ ] string {
"MODEL_CARD" ,
"README" ,
"README.md" ,
}
var knownModelsNameSuffixToSkip [ ] string = [ ] string {
".tmpl" ,
".keep" ,
".yaml" ,
".yml" ,
".json" ,
2024-07-18 14:44:44 +02:00
".txt" ,
".md" ,
".MD" ,
2024-07-10 15:28:39 +02:00
".DS_Store" ,
"." ,
2024-11-06 12:04:39 +01:00
".safetensors" ,
2024-07-11 19:55:01 +02:00
".partial" ,
".tar.gz" ,
2024-07-10 15:28:39 +02:00
}
2024-09-17 06:50:57 +02:00
const retryTimeout = time . Duration ( 2 * time . Minute )
2024-07-10 15:28:39 +02:00
func ( ml * ModelLoader ) ListFilesInModelPath ( ) ( [ ] string , error ) {
2023-07-22 11:31:39 -04:00
files , err := os . ReadDir ( ml . ModelPath )
2023-04-10 12:02:40 +02:00
if err != nil {
return [ ] string { } , err
}
models := [ ] string { }
2024-07-10 15:28:39 +02:00
FILE :
2023-04-10 12:02:40 +02:00
for _ , file := range files {
2024-07-10 15:28:39 +02:00
for _ , skip := range knownFilesToSkip {
if strings . EqualFold ( file . Name ( ) , skip ) {
continue FILE
}
}
// Skip templates, YAML, .keep, .json, and .DS_Store files
for _ , skip := range knownModelsNameSuffixToSkip {
if strings . HasSuffix ( file . Name ( ) , skip ) {
continue FILE
}
}
// Skip directories
if file . IsDir ( ) {
2023-04-20 18:33:02 +02:00
continue
2023-04-10 12:02:40 +02:00
}
2023-04-20 18:33:02 +02:00
models = append ( models , file . Name ( ) )
2023-04-10 12:02:40 +02:00
}
return models , nil
}
2024-11-14 14:12:29 +01:00
func ( ml * ModelLoader ) ListModels ( ) [ ] * Model {
2024-08-30 15:20:39 +02:00
ml . mu . Lock ( )
defer ml . mu . Unlock ( )
2024-11-14 14:12:29 +01:00
models := [ ] * Model { }
2024-08-30 15:20:39 +02:00
for _ , model := range ml . models {
2024-11-14 14:12:29 +01:00
models = append ( models , model )
2024-08-30 15:20:39 +02:00
}
return models
}
2024-10-02 08:55:58 +02:00
func ( ml * ModelLoader ) LoadModel ( modelID , modelName string , loader func ( string , string , string ) ( * Model , error ) ) ( * Model , error ) {
2023-05-11 01:12:58 +02:00
// Check if we already have a loaded model
2024-10-02 08:55:58 +02:00
if model := ml . CheckIsLoaded ( modelID ) ; model != nil {
2023-07-15 01:19:43 +02:00
return model , nil
2023-04-19 17:10:29 +02:00
}
2023-04-20 18:33:02 +02:00
2023-04-19 17:10:29 +02:00
// Load the model and keep it in memory for later use
2023-04-27 06:18:18 +02:00
modelFile := filepath . Join ( ml . ModelPath , modelName )
2023-04-20 18:33:02 +02:00
log . Debug ( ) . Msgf ( "Loading model in memory from file: %s" , modelFile )
2024-09-26 12:44:55 +02:00
ml . mu . Lock ( )
defer ml . mu . Unlock ( )
2024-10-02 08:55:58 +02:00
model , err := loader ( modelID , modelName , modelFile )
2023-04-19 17:10:29 +02:00
if err != nil {
2024-10-02 20:37:40 +02:00
return nil , fmt . Errorf ( "failed to load model with internal loader: %s" , err )
2023-04-19 17:10:29 +02:00
}
2024-08-30 15:20:39 +02:00
if model == nil {
return nil , fmt . Errorf ( "loader didn't return a model" )
}
2024-10-02 08:55:58 +02:00
ml . models [ modelID ] = model
2024-08-30 15:20:39 +02:00
2023-05-11 16:34:16 +02:00
return model , nil
2023-05-05 11:20:06 +02:00
}
2023-07-15 01:19:43 +02:00
2023-08-23 12:38:37 -04:00
func ( ml * ModelLoader ) ShutdownModel ( modelName string ) error {
ml . mu . Lock ( )
defer ml . mu . Unlock ( )
2024-09-26 12:44:55 +02:00
model , ok := ml . models [ modelName ]
2024-09-17 06:50:57 +02:00
if ! ok {
2023-08-23 12:38:37 -04:00
return fmt . Errorf ( "model %s not found" , modelName )
}
2024-09-17 06:50:57 +02:00
retries := 1
2024-09-26 12:44:55 +02:00
for model . GRPC ( false , ml . wd ) . IsBusy ( ) {
2024-09-17 06:50:57 +02:00
log . Debug ( ) . Msgf ( "%s busy. Waiting." , modelName )
dur := time . Duration ( retries * 2 ) * time . Second
if dur > retryTimeout {
dur = retryTimeout
}
time . Sleep ( dur )
retries ++
2024-10-05 10:41:35 +02:00
if retries > 10 && os . Getenv ( "LOCALAI_FORCE_BACKEND_SHUTDOWN" ) == "true" {
log . Warn ( ) . Msgf ( "Model %s is still busy after %d retries. Forcing shutdown." , modelName , retries )
break
}
2024-09-17 06:50:57 +02:00
}
return ml . deleteProcess ( modelName )
2023-08-23 12:38:37 -04:00
}
2024-08-25 14:36:09 +02:00
func ( ml * ModelLoader ) CheckIsLoaded ( s string ) * Model {
2024-09-17 16:51:40 +02:00
ml . mu . Lock ( )
defer ml . mu . Unlock ( )
2024-08-25 14:36:09 +02:00
m , ok := ml . models [ s ]
if ! ok {
return nil
2023-07-15 01:19:43 +02:00
}
2024-08-25 14:36:09 +02:00
log . Debug ( ) . Msgf ( "Model already loaded in memory: %s" , s )
2024-08-30 15:20:39 +02:00
client := m . GRPC ( false , ml . wd )
log . Debug ( ) . Msgf ( "Checking model availability (%s)" , s )
cTimeout , cancel := context . WithTimeout ( context . Background ( ) , 2 * time . Minute )
defer cancel ( )
alive , err := client . HealthCheck ( cTimeout )
2024-08-25 14:36:09 +02:00
if ! alive {
log . Warn ( ) . Msgf ( "GRPC Model not responding: %s" , err . Error ( ) )
log . Warn ( ) . Msgf ( "Deleting the process in order to recreate it" )
2024-09-26 12:44:55 +02:00
process := m . Process ( )
if process == nil {
2024-08-30 15:20:39 +02:00
log . Error ( ) . Msgf ( "Process not found for '%s' and the model is not responding anymore !" , s )
return m
}
if ! process . IsAlive ( ) {
2024-08-25 14:36:09 +02:00
log . Debug ( ) . Msgf ( "GRPC Process is not responding: %s" , s )
// stop and delete the process, this forces to re-load the model and re-create again the service
err := ml . deleteProcess ( s )
if err != nil {
log . Error ( ) . Err ( err ) . Str ( "process" , s ) . Msg ( "error stopping process" )
}
return nil
}
2023-07-22 11:31:39 -04:00
}
2024-08-25 14:36:09 +02:00
return m
2023-07-22 11:31:39 -04:00
}