mirror of
https://github.com/mudler/LocalAI.git
synced 2024-12-30 09:28:51 +00:00
feat: allow to set a prompt cache path and enable saving state (#395)
Signed-off-by: mudler <mudler@mocaccino.org>
This commit is contained in:
parent
76c881043e
commit
217dbb448e
2
Makefile
2
Makefile
@ -3,7 +3,7 @@ GOTEST=$(GOCMD) test
|
||||
GOVET=$(GOCMD) vet
|
||||
BINARY_NAME=local-ai
|
||||
|
||||
GOLLAMA_VERSION?=8bd97d532e90cf34e755b3ea2d8aa17000443cf2
|
||||
GOLLAMA_VERSION?=fbec625895ba0c458f783b62c8569135c5e80d79
|
||||
GPT4ALL_REPO?=https://github.com/nomic-ai/gpt4all
|
||||
GPT4ALL_VERSION?=73db20ba85fbbdc66a56e2619394c0eea40dc72b
|
||||
GOGGMLTRANSFORMERS_VERSION?=c4c581f1853cf1b66276501c7c0dbea1e3e564b7
|
||||
|
@ -34,6 +34,10 @@ type Config struct {
|
||||
Mirostat int `yaml:"mirostat"`
|
||||
NGPULayers int `yaml:"gpu_layers"`
|
||||
ImageGenerationAssets string `yaml:"asset_dir"`
|
||||
|
||||
PromptCachePath string `yaml:"prompt_cache_path"`
|
||||
PromptCacheAll bool `yaml:"prompt_cache_all"`
|
||||
|
||||
PromptStrings, InputStrings []string
|
||||
InputToken [][]int
|
||||
}
|
||||
|
@ -2,6 +2,8 @@ package api
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"os"
|
||||
"path/filepath"
|
||||
"regexp"
|
||||
"strings"
|
||||
"sync"
|
||||
@ -102,7 +104,7 @@ func ModelEmbedding(s string, tokens []int, loader *model.ModelLoader, c Config)
|
||||
switch model := inferenceModel.(type) {
|
||||
case *llama.LLama:
|
||||
fn = func() ([]float32, error) {
|
||||
predictOptions := buildLLamaPredictOptions(c)
|
||||
predictOptions := buildLLamaPredictOptions(c, loader.ModelPath)
|
||||
if len(tokens) > 0 {
|
||||
return model.TokenEmbeddings(tokens, predictOptions...)
|
||||
}
|
||||
@ -151,7 +153,7 @@ func ModelEmbedding(s string, tokens []int, loader *model.ModelLoader, c Config)
|
||||
}, nil
|
||||
}
|
||||
|
||||
func buildLLamaPredictOptions(c Config) []llama.PredictOption {
|
||||
func buildLLamaPredictOptions(c Config, modelPath string) []llama.PredictOption {
|
||||
// Generate the prediction using the language model
|
||||
predictOptions := []llama.PredictOption{
|
||||
llama.SetTemperature(c.Temperature),
|
||||
@ -161,6 +163,17 @@ func buildLLamaPredictOptions(c Config) []llama.PredictOption {
|
||||
llama.SetThreads(c.Threads),
|
||||
}
|
||||
|
||||
if c.PromptCacheAll {
|
||||
predictOptions = append(predictOptions, llama.EnablePromptCacheAll)
|
||||
}
|
||||
|
||||
if c.PromptCachePath != "" {
|
||||
// Create parent directory
|
||||
p := filepath.Join(modelPath, c.PromptCachePath)
|
||||
os.MkdirAll(filepath.Dir(p), 0755)
|
||||
predictOptions = append(predictOptions, llama.SetPathPromptCache(p))
|
||||
}
|
||||
|
||||
if c.Mirostat != 0 {
|
||||
predictOptions = append(predictOptions, llama.SetMirostat(c.Mirostat))
|
||||
}
|
||||
@ -469,7 +482,7 @@ func ModelInference(s string, loader *model.ModelLoader, c Config, tokenCallback
|
||||
model.SetTokenCallback(tokenCallback)
|
||||
}
|
||||
|
||||
predictOptions := buildLLamaPredictOptions(c)
|
||||
predictOptions := buildLLamaPredictOptions(c, loader.ModelPath)
|
||||
|
||||
str, er := model.Predict(
|
||||
s,
|
||||
|
Loading…
Reference in New Issue
Block a user