2023-04-11 22:02:39 +00:00
package model
2023-04-07 09:30:59 +00:00
import (
2023-07-14 23:19:43 +00:00
"context"
2023-04-07 09:30:59 +00:00
"fmt"
"os"
"path/filepath"
2023-04-10 10:02:40 +00:00
"strings"
2023-04-07 09:30:59 +00:00
"sync"
2024-08-30 13:20:39 +00:00
"time"
2023-04-07 09:30:59 +00:00
2024-06-23 08:24:36 +00:00
"github.com/mudler/LocalAI/pkg/templates"
2024-04-19 02:40:18 +00:00
2024-06-23 08:24:36 +00:00
"github.com/mudler/LocalAI/pkg/utils"
2024-04-19 02:40:18 +00:00
2023-05-10 23:12:58 +00:00
"github.com/rs/zerolog/log"
2023-04-07 09:30:59 +00:00
)
2023-07-22 15:31:39 +00:00
// new idea: what if we declare a struct of these here, and use a loop to check?
// TODO: Split ModelLoader and TemplateLoader? Just to keep things more organized. Left together to share a mutex until I look into that. Would split if we seperate directories for .bin/.yaml and .tmpl
2023-04-07 09:30:59 +00:00
type ModelLoader struct {
2024-09-26 10:44:55 +00:00
ModelPath string
mu sync . Mutex
models map [ string ] * Model
templates * templates . TemplateCache
wd * WatchDog
2023-04-07 09:30:59 +00:00
}
func NewModelLoader ( modelPath string ) * ModelLoader {
2023-07-22 15:31:39 +00:00
nml := & ModelLoader {
2024-09-26 10:44:55 +00:00
ModelPath : modelPath ,
models : make ( map [ string ] * Model ) ,
templates : templates . NewTemplateCache ( modelPath ) ,
2023-04-20 22:06:55 +00:00
}
2023-11-26 17:36:23 +00:00
2023-07-22 15:31:39 +00:00
return nml
2023-04-07 09:30:59 +00:00
}
2023-11-26 17:36:23 +00:00
func ( ml * ModelLoader ) SetWatchDog ( wd * WatchDog ) {
ml . wd = wd
}
2023-04-20 16:33:02 +00:00
func ( ml * ModelLoader ) ExistsInModelPath ( s string ) bool {
2024-04-19 02:40:18 +00:00
return utils . ExistsInPath ( ml . ModelPath , s )
2023-04-20 16:33:02 +00:00
}
2024-07-10 13:28:39 +00:00
var knownFilesToSkip [ ] string = [ ] string {
"MODEL_CARD" ,
"README" ,
"README.md" ,
}
var knownModelsNameSuffixToSkip [ ] string = [ ] string {
".tmpl" ,
".keep" ,
".yaml" ,
".yml" ,
".json" ,
2024-07-18 12:44:44 +00:00
".txt" ,
".md" ,
".MD" ,
2024-07-10 13:28:39 +00:00
".DS_Store" ,
"." ,
2024-11-06 11:04:39 +00:00
".safetensors" ,
2024-07-11 17:55:01 +00:00
".partial" ,
".tar.gz" ,
2024-07-10 13:28:39 +00:00
}
2024-09-17 04:50:57 +00:00
const retryTimeout = time . Duration ( 2 * time . Minute )
2024-07-10 13:28:39 +00:00
func ( ml * ModelLoader ) ListFilesInModelPath ( ) ( [ ] string , error ) {
2023-07-22 15:31:39 +00:00
files , err := os . ReadDir ( ml . ModelPath )
2023-04-10 10:02:40 +00:00
if err != nil {
return [ ] string { } , err
}
models := [ ] string { }
2024-07-10 13:28:39 +00:00
FILE :
2023-04-10 10:02:40 +00:00
for _ , file := range files {
2024-07-10 13:28:39 +00:00
for _ , skip := range knownFilesToSkip {
if strings . EqualFold ( file . Name ( ) , skip ) {
continue FILE
}
}
// Skip templates, YAML, .keep, .json, and .DS_Store files
for _ , skip := range knownModelsNameSuffixToSkip {
if strings . HasSuffix ( file . Name ( ) , skip ) {
continue FILE
}
}
// Skip directories
if file . IsDir ( ) {
2023-04-20 16:33:02 +00:00
continue
2023-04-10 10:02:40 +00:00
}
2023-04-20 16:33:02 +00:00
models = append ( models , file . Name ( ) )
2023-04-10 10:02:40 +00:00
}
return models , nil
}
2024-11-14 13:12:29 +00:00
func ( ml * ModelLoader ) ListModels ( ) [ ] * Model {
2024-08-30 13:20:39 +00:00
ml . mu . Lock ( )
defer ml . mu . Unlock ( )
2024-11-14 13:12:29 +00:00
models := [ ] * Model { }
2024-08-30 13:20:39 +00:00
for _ , model := range ml . models {
2024-11-14 13:12:29 +00:00
models = append ( models , model )
2024-08-30 13:20:39 +00:00
}
return models
}
2024-10-02 06:55:58 +00:00
func ( ml * ModelLoader ) LoadModel ( modelID , modelName string , loader func ( string , string , string ) ( * Model , error ) ) ( * Model , error ) {
2023-05-10 23:12:58 +00:00
// Check if we already have a loaded model
2024-10-02 06:55:58 +00:00
if model := ml . CheckIsLoaded ( modelID ) ; model != nil {
2023-07-14 23:19:43 +00:00
return model , nil
2023-04-19 15:10:29 +00:00
}
2023-04-20 16:33:02 +00:00
2023-04-19 15:10:29 +00:00
// Load the model and keep it in memory for later use
2023-04-27 04:18:18 +00:00
modelFile := filepath . Join ( ml . ModelPath , modelName )
2023-04-20 16:33:02 +00:00
log . Debug ( ) . Msgf ( "Loading model in memory from file: %s" , modelFile )
2024-09-26 10:44:55 +00:00
ml . mu . Lock ( )
defer ml . mu . Unlock ( )
2024-10-02 06:55:58 +00:00
model , err := loader ( modelID , modelName , modelFile )
2023-04-19 15:10:29 +00:00
if err != nil {
2024-10-02 18:37:40 +00:00
return nil , fmt . Errorf ( "failed to load model with internal loader: %s" , err )
2023-04-19 15:10:29 +00:00
}
2024-08-30 13:20:39 +00:00
if model == nil {
return nil , fmt . Errorf ( "loader didn't return a model" )
}
2024-10-02 06:55:58 +00:00
ml . models [ modelID ] = model
2024-08-30 13:20:39 +00:00
2023-05-11 14:34:16 +00:00
return model , nil
2023-05-05 09:20:06 +00:00
}
2023-07-14 23:19:43 +00:00
2023-08-23 16:38:37 +00:00
func ( ml * ModelLoader ) ShutdownModel ( modelName string ) error {
ml . mu . Lock ( )
defer ml . mu . Unlock ( )
2024-09-26 10:44:55 +00:00
model , ok := ml . models [ modelName ]
2024-09-17 04:50:57 +00:00
if ! ok {
2023-08-23 16:38:37 +00:00
return fmt . Errorf ( "model %s not found" , modelName )
}
2024-09-17 04:50:57 +00:00
retries := 1
2024-09-26 10:44:55 +00:00
for model . GRPC ( false , ml . wd ) . IsBusy ( ) {
2024-09-17 04:50:57 +00:00
log . Debug ( ) . Msgf ( "%s busy. Waiting." , modelName )
dur := time . Duration ( retries * 2 ) * time . Second
if dur > retryTimeout {
dur = retryTimeout
}
time . Sleep ( dur )
retries ++
2024-10-05 08:41:35 +00:00
if retries > 10 && os . Getenv ( "LOCALAI_FORCE_BACKEND_SHUTDOWN" ) == "true" {
log . Warn ( ) . Msgf ( "Model %s is still busy after %d retries. Forcing shutdown." , modelName , retries )
break
}
2024-09-17 04:50:57 +00:00
}
return ml . deleteProcess ( modelName )
2023-08-23 16:38:37 +00:00
}
2024-08-25 12:36:09 +00:00
func ( ml * ModelLoader ) CheckIsLoaded ( s string ) * Model {
2024-09-17 14:51:40 +00:00
ml . mu . Lock ( )
defer ml . mu . Unlock ( )
2024-08-25 12:36:09 +00:00
m , ok := ml . models [ s ]
if ! ok {
return nil
2023-07-14 23:19:43 +00:00
}
2024-08-25 12:36:09 +00:00
log . Debug ( ) . Msgf ( "Model already loaded in memory: %s" , s )
2024-08-30 13:20:39 +00:00
client := m . GRPC ( false , ml . wd )
log . Debug ( ) . Msgf ( "Checking model availability (%s)" , s )
cTimeout , cancel := context . WithTimeout ( context . Background ( ) , 2 * time . Minute )
defer cancel ( )
alive , err := client . HealthCheck ( cTimeout )
2024-08-25 12:36:09 +00:00
if ! alive {
log . Warn ( ) . Msgf ( "GRPC Model not responding: %s" , err . Error ( ) )
log . Warn ( ) . Msgf ( "Deleting the process in order to recreate it" )
2024-09-26 10:44:55 +00:00
process := m . Process ( )
if process == nil {
2024-08-30 13:20:39 +00:00
log . Error ( ) . Msgf ( "Process not found for '%s' and the model is not responding anymore !" , s )
return m
}
if ! process . IsAlive ( ) {
2024-08-25 12:36:09 +00:00
log . Debug ( ) . Msgf ( "GRPC Process is not responding: %s" , s )
// stop and delete the process, this forces to re-load the model and re-create again the service
err := ml . deleteProcess ( s )
if err != nil {
log . Error ( ) . Err ( err ) . Str ( "process" , s ) . Msg ( "error stopping process" )
}
return nil
}
2023-07-22 15:31:39 +00:00
}
2024-08-25 12:36:09 +00:00
return m
2023-07-22 15:31:39 +00:00
}