LocalAI/pkg/model/loader.go

package model

import (
	"bytes"
	"fmt"
	"io/ioutil"
	"os"
	"path/filepath"
	"strings"
	"sync"
	"text/template"

	"github.com/rs/zerolog/log"

	gpt2 "github.com/go-skynet/go-gpt2.cpp"
	gptj "github.com/go-skynet/go-gpt4all-j.cpp"
	llama "github.com/go-skynet/go-llama.cpp"
)

type ModelLoader struct {
	modelPath string
	mu        sync.Mutex

	models            map[string]*llama.LLama
	gptmodels         map[string]*gptj.GPTJ
	gpt2models        map[string]*gpt2.GPT2
	gptstablelmmodels map[string]*gpt2.StableLM

	promptsTemplates map[string]*template.Template
}

func NewModelLoader(modelPath string) *ModelLoader {
	return &ModelLoader{
		modelPath:         modelPath,
		gpt2models:        make(map[string]*gpt2.GPT2),
		gptmodels:         make(map[string]*gptj.GPTJ),
		gptstablelmmodels: make(map[string]*gpt2.StableLM),
		models:            make(map[string]*llama.LLama),
		promptsTemplates:  make(map[string]*template.Template),
	}
}

func (ml *ModelLoader) ExistsInModelPath(s string) bool {
	_, err := os.Stat(filepath.Join(ml.modelPath, s))
	return err == nil
}

func (ml *ModelLoader) ListModels() ([]string, error) {
	files, err := ioutil.ReadDir(ml.modelPath)
	if err != nil {
		return []string{}, err
	}

	models := []string{}
	for _, file := range files {
		// Skip templates, YAML and .keep files
		if strings.HasSuffix(file.Name(), ".tmpl") || strings.HasSuffix(file.Name(), ".keep") || strings.HasSuffix(file.Name(), ".yaml") || strings.HasSuffix(file.Name(), ".yml") {
			continue
		}

		models = append(models, file.Name())
	}

	return models, nil
}

func (ml *ModelLoader) TemplatePrefix(modelName string, in interface{}) (string, error) {
	ml.mu.Lock()
	defer ml.mu.Unlock()

	m, ok := ml.promptsTemplates[modelName]
	if !ok {
		return "", fmt.Errorf("no prompt template available")
	}

	var buf bytes.Buffer

	if err := m.Execute(&buf, in); err != nil {
		return "", err
	}
	return buf.String(), nil
}

func (ml *ModelLoader) loadTemplateIfExists(modelName, modelFile string) error {
	// Check if the template was already loaded
	if _, ok := ml.promptsTemplates[modelName]; ok {
		return nil
	}

	// Check if the model path exists
	// skip any error here - we run anyway if a template is not exist
	modelTemplateFile := fmt.Sprintf("%s.tmpl", modelName)

	if !ml.ExistsInModelPath(modelTemplateFile) {
		return nil
	}

	dat, err := os.ReadFile(filepath.Join(ml.modelPath, modelTemplateFile))
	if err != nil {
		return err
	}

	// Parse the template
	tmpl, err := template.New("prompt").Parse(string(dat))
	if err != nil {
		return err
	}
	ml.promptsTemplates[modelName] = tmpl

	return nil
}

func (ml *ModelLoader) LoadStableLMModel(modelName string) (*gpt2.StableLM, error) {
	ml.mu.Lock()
	defer ml.mu.Unlock()

	// Check if we already have a loaded model
	if !ml.ExistsInModelPath(modelName) {
		return nil, fmt.Errorf("model does not exist")
	}

	if m, ok := ml.gptstablelmmodels[modelName]; ok {
		log.Debug().Msgf("Model already loaded in memory: %s", modelName)
		return m, nil
	}

	// Load the model and keep it in memory for later use
	modelFile := filepath.Join(ml.modelPath, modelName)
	log.Debug().Msgf("Loading model in memory from file: %s", modelFile)

	model, err := gpt2.NewStableLM(modelFile)
	if err != nil {
		return nil, err
	}

	// If there is a prompt template, load it
	if err := ml.loadTemplateIfExists(modelName, modelFile); err != nil {
		return nil, err
	}

	ml.gptstablelmmodels[modelName] = model
	return model, err
}

func (ml *ModelLoader) LoadGPT2Model(modelName string) (*gpt2.GPT2, error) {
	ml.mu.Lock()
	defer ml.mu.Unlock()

	// Check if we already have a loaded model
	if !ml.ExistsInModelPath(modelName) {
		return nil, fmt.Errorf("model does not exist")
	}

	if m, ok := ml.gpt2models[modelName]; ok {
		log.Debug().Msgf("Model already loaded in memory: %s", modelName)
		return m, nil
	}

	// TODO: This needs refactoring, it's really bad to have it in here
	// Check if we have a GPTStable model loaded instead - if we do we return an error so the API tries with StableLM
	if _, ok := ml.gptstablelmmodels[modelName]; ok {
		log.Debug().Msgf("Model is GPTStableLM: %s", modelName)
		return nil, fmt.Errorf("this model is a GPTStableLM one")
	}

	// Load the model and keep it in memory for later use
	modelFile := filepath.Join(ml.modelPath, modelName)
	log.Debug().Msgf("Loading model in memory from file: %s", modelFile)

	model, err := gpt2.New(modelFile)
	if err != nil {
		return nil, err
	}

	// If there is a prompt template, load it
	if err := ml.loadTemplateIfExists(modelName, modelFile); err != nil {
		return nil, err
	}

	ml.gpt2models[modelName] = model
	return model, err
}

func (ml *ModelLoader) LoadGPTJModel(modelName string) (*gptj.GPTJ, error) {
	ml.mu.Lock()
	defer ml.mu.Unlock()

	// Check if we already have a loaded model
	if !ml.ExistsInModelPath(modelName) {
		return nil, fmt.Errorf("model does not exist")
	}

	if m, ok := ml.gptmodels[modelName]; ok {
		log.Debug().Msgf("Model already loaded in memory: %s", modelName)
		return m, nil
	}

	// TODO: This needs refactoring, it's really bad to have it in here
	// Check if we have a GPT2 model loaded instead - if we do we return an error so the API tries with GPT2
	if _, ok := ml.gpt2models[modelName]; ok {
		log.Debug().Msgf("Model is GPT2: %s", modelName)
		return nil, fmt.Errorf("this model is a GPT2 one")
	}
	if _, ok := ml.gptstablelmmodels[modelName]; ok {
		log.Debug().Msgf("Model is GPTStableLM: %s", modelName)
		return nil, fmt.Errorf("this model is a GPTStableLM one")
	}

	// Load the model and keep it in memory for later use
	modelFile := filepath.Join(ml.modelPath, modelName)
	log.Debug().Msgf("Loading model in memory from file: %s", modelFile)

	model, err := gptj.New(modelFile)
	if err != nil {
		return nil, err
	}

	// If there is a prompt template, load it
	if err := ml.loadTemplateIfExists(modelName, modelFile); err != nil {
		return nil, err
	}

	ml.gptmodels[modelName] = model
	return model, err
}

func (ml *ModelLoader) LoadLLaMAModel(modelName string, opts ...llama.ModelOption) (*llama.LLama, error) {
	ml.mu.Lock()
	defer ml.mu.Unlock()

	log.Debug().Msgf("Loading model name: %s", modelName)

	// Check if we already have a loaded model
	if !ml.ExistsInModelPath(modelName) {
		return nil, fmt.Errorf("model does not exist")
	}

	if m, ok := ml.models[modelName]; ok {
		log.Debug().Msgf("Model already loaded in memory: %s", modelName)
		return m, nil
	}

	// TODO: This needs refactoring, it's really bad to have it in here
	// Check if we have a GPTJ model loaded instead - if we do we return an error so the API tries with GPTJ
	if _, ok := ml.gptmodels[modelName]; ok {
		log.Debug().Msgf("Model is GPTJ: %s", modelName)
		return nil, fmt.Errorf("this model is a GPTJ one")
	}
	if _, ok := ml.gpt2models[modelName]; ok {
		log.Debug().Msgf("Model is GPT2: %s", modelName)
		return nil, fmt.Errorf("this model is a GPT2 one")
	}
	if _, ok := ml.gptstablelmmodels[modelName]; ok {
		log.Debug().Msgf("Model is GPTStableLM: %s", modelName)
		return nil, fmt.Errorf("this model is a GPTStableLM one")
	}

	// Load the model and keep it in memory for later use
	modelFile := filepath.Join(ml.modelPath, modelName)
	log.Debug().Msgf("Loading model in memory from file: %s", modelFile)

	model, err := llama.New(modelFile, opts...)
	if err != nil {
		return nil, err
	}

	// If there is a prompt template, load it
	if err := ml.loadTemplateIfExists(modelName, modelFile); err != nil {
		return nil, err
	}

	ml.models[modelName] = model
	return model, err
}