2023-04-11 22:02:39 +00:00
package model
2023-04-07 09:30:59 +00:00
import (
2023-04-08 08:46:51 +00:00
"bytes"
2023-07-14 23:19:43 +00:00
"context"
2023-04-07 09:30:59 +00:00
"fmt"
"os"
"path/filepath"
2023-04-10 10:02:40 +00:00
"strings"
2023-04-07 09:30:59 +00:00
"sync"
2023-04-08 08:46:51 +00:00
"text/template"
2023-04-07 09:30:59 +00:00
2024-03-21 00:12:20 +00:00
"github.com/Masterminds/sprig/v3"
2024-01-05 17:04:46 +00:00
grammar "github.com/go-skynet/LocalAI/pkg/grammar"
2023-07-14 23:19:43 +00:00
"github.com/go-skynet/LocalAI/pkg/grpc"
2023-07-14 23:19:43 +00:00
process "github.com/mudler/go-processmanager"
2023-05-10 23:12:58 +00:00
"github.com/rs/zerolog/log"
2023-04-07 09:30:59 +00:00
)
2023-07-22 15:31:39 +00:00
// Rather than pass an interface{} to the prompt template:
// These are the definitions of all possible variables LocalAI will currently populate for use in a prompt template file
// Please note: Not all of these are populated on every endpoint - your template should either be tested for each endpoint you map it to, or tolerant of zero values.
type PromptTemplateData struct {
2023-08-02 22:19:55 +00:00
SystemPrompt string
SuppressSystemPrompt bool // used by chat specifically to indicate that SystemPrompt above should be _ignored_
Input string
Instruction string
Functions [ ] grammar . Function
MessageIndex int
2023-07-22 15:31:39 +00:00
}
// TODO: Ask mudler about FunctionCall stuff being useful at the message level?
type ChatMessageTemplateData struct {
SystemPrompt string
Role string
RoleName string
2024-02-17 09:00:34 +00:00
FunctionName string
2023-07-22 15:31:39 +00:00
Content string
MessageIndex int
2024-03-21 00:12:20 +00:00
Function bool
FunctionCall interface { }
LastMessage bool
2023-07-22 15:31:39 +00:00
}
// Keep this in sync with config.TemplateConfig. Is there a more idiomatic way to accomplish this in go?
// Technically, order doesn't _really_ matter, but the count must stay in sync, see tests/integration/reflect_test.go
type TemplateType int
const (
ChatPromptTemplate TemplateType = iota
ChatMessageTemplate
CompletionPromptTemplate
EditPromptTemplate
FunctionsPromptTemplate
// The following TemplateType is **NOT** a valid value and MUST be last. It exists to make the sanity integration tests simpler!
IntegrationTestTemplate
)
// new idea: what if we declare a struct of these here, and use a loop to check?
// TODO: Split ModelLoader and TemplateLoader? Just to keep things more organized. Left together to share a mutex until I look into that. Would split if we seperate directories for .bin/.yaml and .tmpl
2023-04-07 09:30:59 +00:00
type ModelLoader struct {
2023-04-27 04:18:18 +00:00
ModelPath string
2023-04-20 17:33:36 +00:00
mu sync . Mutex
2023-05-10 13:20:21 +00:00
// TODO: this needs generics
2024-01-23 07:56:36 +00:00
grpcClients map [ string ] grpc . Backend
2023-11-16 07:20:05 +00:00
models map [ string ] ModelAddress
2023-07-22 15:31:39 +00:00
grpcProcesses map [ string ] * process . Process
templates map [ TemplateType ] map [ string ] * template . Template
2023-11-26 17:36:23 +00:00
wd * WatchDog
2023-04-07 09:30:59 +00:00
}
2023-11-16 07:20:05 +00:00
type ModelAddress string
2024-01-23 07:56:36 +00:00
func ( m ModelAddress ) GRPC ( parallel bool , wd * WatchDog ) grpc . Backend {
2023-11-26 17:36:23 +00:00
enableWD := false
if wd != nil {
enableWD = true
}
return grpc . NewClient ( string ( m ) , parallel , wd , enableWD )
2023-11-16 07:20:05 +00:00
}
2023-04-07 09:30:59 +00:00
func NewModelLoader ( modelPath string ) * ModelLoader {
2023-07-22 15:31:39 +00:00
nml := & ModelLoader {
ModelPath : modelPath ,
2024-01-23 07:56:36 +00:00
grpcClients : make ( map [ string ] grpc . Backend ) ,
2023-11-16 07:20:05 +00:00
models : make ( map [ string ] ModelAddress ) ,
2023-07-22 15:31:39 +00:00
templates : make ( map [ TemplateType ] map [ string ] * template . Template ) ,
grpcProcesses : make ( map [ string ] * process . Process ) ,
2023-04-20 22:06:55 +00:00
}
2023-11-26 17:36:23 +00:00
2023-07-22 15:31:39 +00:00
nml . initializeTemplateMap ( )
return nml
2023-04-07 09:30:59 +00:00
}
2023-11-26 17:36:23 +00:00
func ( ml * ModelLoader ) SetWatchDog ( wd * WatchDog ) {
ml . wd = wd
}
2023-04-20 16:33:02 +00:00
func ( ml * ModelLoader ) ExistsInModelPath ( s string ) bool {
2023-07-22 15:31:39 +00:00
return existsInPath ( ml . ModelPath , s )
2023-04-20 16:33:02 +00:00
}
2023-04-10 10:02:40 +00:00
func ( ml * ModelLoader ) ListModels ( ) ( [ ] string , error ) {
2023-07-22 15:31:39 +00:00
files , err := os . ReadDir ( ml . ModelPath )
2023-04-10 10:02:40 +00:00
if err != nil {
return [ ] string { } , err
}
models := [ ] string { }
for _ , file := range files {
2023-07-31 17:14:32 +00:00
// Skip templates, YAML, .keep, .json, and .DS_Store files - TODO: as this list grows, is there a more efficient method?
if strings . HasSuffix ( file . Name ( ) , ".tmpl" ) || strings . HasSuffix ( file . Name ( ) , ".keep" ) || strings . HasSuffix ( file . Name ( ) , ".yaml" ) || strings . HasSuffix ( file . Name ( ) , ".yml" ) || strings . HasSuffix ( file . Name ( ) , ".json" ) || strings . HasSuffix ( file . Name ( ) , ".DS_Store" ) {
2023-04-20 16:33:02 +00:00
continue
2023-04-10 10:02:40 +00:00
}
2023-04-20 16:33:02 +00:00
models = append ( models , file . Name ( ) )
2023-04-10 10:02:40 +00:00
}
return models , nil
}
2023-11-16 07:20:05 +00:00
func ( ml * ModelLoader ) LoadModel ( modelName string , loader func ( string , string ) ( ModelAddress , error ) ) ( ModelAddress , error ) {
2023-05-10 23:12:58 +00:00
ml . mu . Lock ( )
defer ml . mu . Unlock ( )
// Check if we already have a loaded model
2023-11-16 07:20:05 +00:00
if model := ml . CheckIsLoaded ( modelName ) ; model != "" {
2023-07-14 23:19:43 +00:00
return model , nil
2023-04-19 15:10:29 +00:00
}
2023-04-20 16:33:02 +00:00
2023-04-19 15:10:29 +00:00
// Load the model and keep it in memory for later use
2023-04-27 04:18:18 +00:00
modelFile := filepath . Join ( ml . ModelPath , modelName )
2023-04-20 16:33:02 +00:00
log . Debug ( ) . Msgf ( "Loading model in memory from file: %s" , modelFile )
2023-08-07 20:39:10 +00:00
model , err := loader ( modelName , modelFile )
2023-04-19 15:10:29 +00:00
if err != nil {
2023-11-16 07:20:05 +00:00
return "" , err
2023-04-19 15:10:29 +00:00
}
2023-07-22 15:31:39 +00:00
// TODO: Add a helper method to iterate all prompt templates associated with a config if and only if it's YAML?
// Minor perf loss here until this is fixed, but we initialize on first request
// // If there is a prompt template, load it
// if err := ml.loadTemplateIfExists(modelName); err != nil {
// return nil, err
// }
2023-04-08 08:46:51 +00:00
2023-04-20 16:33:02 +00:00
ml . models [ modelName ] = model
2023-05-11 14:34:16 +00:00
return model , nil
2023-05-05 09:20:06 +00:00
}
2023-07-14 23:19:43 +00:00
2023-08-23 16:38:37 +00:00
func ( ml * ModelLoader ) ShutdownModel ( modelName string ) error {
ml . mu . Lock ( )
defer ml . mu . Unlock ( )
2023-11-26 17:36:23 +00:00
2024-03-23 15:19:57 +00:00
return ml . stopModel ( modelName )
2023-11-26 17:36:23 +00:00
}
2024-03-23 15:19:57 +00:00
func ( ml * ModelLoader ) stopModel ( modelName string ) error {
2023-11-26 17:36:23 +00:00
defer ml . deleteProcess ( modelName )
2023-08-23 16:38:37 +00:00
if _ , ok := ml . models [ modelName ] ; ! ok {
return fmt . Errorf ( "model %s not found" , modelName )
}
2023-11-26 17:36:23 +00:00
return nil
//return ml.deleteProcess(modelName)
2023-08-23 16:38:37 +00:00
}
2023-11-16 07:20:05 +00:00
func ( ml * ModelLoader ) CheckIsLoaded ( s string ) ModelAddress {
2024-01-23 07:56:36 +00:00
var client grpc . Backend
2023-07-14 23:19:43 +00:00
if m , ok := ml . models [ s ] ; ok {
log . Debug ( ) . Msgf ( "Model already loaded in memory: %s" , s )
2023-11-16 21:20:16 +00:00
if c , ok := ml . grpcClients [ s ] ; ok {
client = c
} else {
2023-11-26 17:36:23 +00:00
client = m . GRPC ( false , ml . wd )
2023-11-16 21:20:16 +00:00
}
2024-01-07 23:37:02 +00:00
alive , err := client . HealthCheck ( context . Background ( ) )
if ! alive {
log . Warn ( ) . Msgf ( "GRPC Model not responding: %s" , err . Error ( ) )
log . Warn ( ) . Msgf ( "Deleting the process in order to recreate it" )
2023-07-14 23:19:43 +00:00
if ! ml . grpcProcesses [ s ] . IsAlive ( ) {
2023-07-22 15:31:39 +00:00
log . Debug ( ) . Msgf ( "GRPC Process is not responding: %s" , s )
2023-07-14 23:19:43 +00:00
// stop and delete the process, this forces to re-load the model and re-create again the service
2023-08-18 23:49:33 +00:00
ml . deleteProcess ( s )
2023-11-16 07:20:05 +00:00
return ""
2023-07-14 23:19:43 +00:00
}
}
return m
}
2023-11-16 07:20:05 +00:00
return ""
2023-07-14 23:19:43 +00:00
}
2023-07-22 15:31:39 +00:00
func ( ml * ModelLoader ) EvaluateTemplateForPrompt ( templateType TemplateType , templateName string , in PromptTemplateData ) ( string , error ) {
// TODO: should this check be improved?
if templateType == ChatMessageTemplate {
return "" , fmt . Errorf ( "invalid templateType: ChatMessage" )
}
return ml . evaluateTemplate ( templateType , templateName , in )
}
func ( ml * ModelLoader ) EvaluateTemplateForChatMessage ( templateName string , messageData ChatMessageTemplateData ) ( string , error ) {
return ml . evaluateTemplate ( ChatMessageTemplate , templateName , messageData )
}
func existsInPath ( path string , s string ) bool {
_ , err := os . Stat ( filepath . Join ( path , s ) )
return err == nil
}
func ( ml * ModelLoader ) initializeTemplateMap ( ) {
// This also seems somewhat clunky as we reference the Test / End of valid data value slug, but it works?
for tt := TemplateType ( 0 ) ; tt < IntegrationTestTemplate ; tt ++ {
ml . templates [ tt ] = make ( map [ string ] * template . Template )
}
}
func ( ml * ModelLoader ) evaluateTemplate ( templateType TemplateType , templateName string , in interface { } ) ( string , error ) {
ml . mu . Lock ( )
defer ml . mu . Unlock ( )
m , ok := ml . templates [ templateType ] [ templateName ]
if ! ok {
// return "", fmt.Errorf("template not loaded: %s", templateName)
loadErr := ml . loadTemplateIfExists ( templateType , templateName )
if loadErr != nil {
return "" , loadErr
}
m = ml . templates [ templateType ] [ templateName ] // ok is not important since we check m on the next line, and wealready checked
}
if m == nil {
return "" , fmt . Errorf ( "failed loading a template for %s" , templateName )
}
var buf bytes . Buffer
if err := m . Execute ( & buf , in ) ; err != nil {
return "" , err
}
return buf . String ( ) , nil
}
func ( ml * ModelLoader ) loadTemplateIfExists ( templateType TemplateType , templateName string ) error {
// Check if the template was already loaded
if _ , ok := ml . templates [ templateType ] [ templateName ] ; ok {
return nil
}
// Check if the model path exists
// skip any error here - we run anyway if a template does not exist
modelTemplateFile := fmt . Sprintf ( "%s.tmpl" , templateName )
2023-12-18 17:58:44 +00:00
dat := ""
if ml . ExistsInModelPath ( modelTemplateFile ) {
d , err := os . ReadFile ( filepath . Join ( ml . ModelPath , modelTemplateFile ) )
if err != nil {
return err
}
dat = string ( d )
} else {
dat = templateName
2023-07-22 15:31:39 +00:00
}
// Parse the template
2024-03-21 00:12:20 +00:00
tmpl , err := template . New ( "prompt" ) . Funcs ( sprig . FuncMap ( ) ) . Parse ( dat )
2023-07-22 15:31:39 +00:00
if err != nil {
return err
}
ml . templates [ templateType ] [ templateName ] = tmpl
return nil
}