2023-11-13 21:40:16 +00:00
|
|
|
package main
|
2023-07-14 23:19:43 +00:00
|
|
|
|
|
|
|
// This is a wrapper to statisfy the GRPC service interface
|
|
|
|
// It is meant to be used by the main executable that is the server for the specific backend type (falcon, gpt3, etc)
|
|
|
|
import (
|
|
|
|
"fmt"
|
|
|
|
"path/filepath"
|
|
|
|
|
|
|
|
"github.com/donomii/go-rwkv.cpp"
|
2024-06-23 08:24:36 +00:00
|
|
|
"github.com/mudler/LocalAI/pkg/grpc/base"
|
|
|
|
pb "github.com/mudler/LocalAI/pkg/grpc/proto"
|
2023-07-14 23:19:43 +00:00
|
|
|
)
|
|
|
|
|
|
|
|
const tokenizerSuffix = ".tokenizer.json"
|
|
|
|
|
|
|
|
type LLM struct {
|
2023-08-20 12:04:45 +00:00
|
|
|
base.SingleThread
|
2023-07-14 23:19:43 +00:00
|
|
|
|
|
|
|
rwkv *rwkv.RwkvState
|
|
|
|
}
|
|
|
|
|
|
|
|
func (llm *LLM) Load(opts *pb.ModelOptions) error {
|
2023-08-22 16:48:06 +00:00
|
|
|
tokenizerFile := opts.Tokenizer
|
|
|
|
if tokenizerFile == "" {
|
|
|
|
modelFile := filepath.Base(opts.ModelFile)
|
|
|
|
tokenizerFile = modelFile + tokenizerSuffix
|
|
|
|
}
|
2023-08-07 20:39:10 +00:00
|
|
|
modelPath := filepath.Dir(opts.ModelFile)
|
2023-08-22 16:48:06 +00:00
|
|
|
tokenizerPath := filepath.Join(modelPath, tokenizerFile)
|
|
|
|
|
|
|
|
model := rwkv.LoadFiles(opts.ModelFile, tokenizerPath, uint32(opts.GetThreads()))
|
2023-07-14 23:19:43 +00:00
|
|
|
|
|
|
|
if model == nil {
|
2024-06-24 06:34:36 +00:00
|
|
|
return fmt.Errorf("rwkv could not load model")
|
2023-07-14 23:19:43 +00:00
|
|
|
}
|
|
|
|
llm.rwkv = model
|
|
|
|
return nil
|
|
|
|
}
|
|
|
|
|
|
|
|
func (llm *LLM) Predict(opts *pb.PredictOptions) (string, error) {
|
|
|
|
stopWord := "\n"
|
|
|
|
if len(opts.StopPrompts) > 0 {
|
|
|
|
stopWord = opts.StopPrompts[0]
|
|
|
|
}
|
|
|
|
|
|
|
|
if err := llm.rwkv.ProcessInput(opts.Prompt); err != nil {
|
|
|
|
return "", err
|
|
|
|
}
|
|
|
|
|
|
|
|
response := llm.rwkv.GenerateResponse(int(opts.Tokens), stopWord, float32(opts.Temperature), float32(opts.TopP), nil)
|
|
|
|
|
|
|
|
return response, nil
|
|
|
|
}
|
|
|
|
|
|
|
|
func (llm *LLM) PredictStream(opts *pb.PredictOptions, results chan string) error {
|
|
|
|
go func() {
|
|
|
|
|
|
|
|
stopWord := "\n"
|
|
|
|
if len(opts.StopPrompts) > 0 {
|
|
|
|
stopWord = opts.StopPrompts[0]
|
|
|
|
}
|
|
|
|
|
|
|
|
if err := llm.rwkv.ProcessInput(opts.Prompt); err != nil {
|
|
|
|
fmt.Println("Error processing input: ", err)
|
|
|
|
return
|
|
|
|
}
|
|
|
|
|
|
|
|
llm.rwkv.GenerateResponse(int(opts.Tokens), stopWord, float32(opts.Temperature), float32(opts.TopP), func(s string) bool {
|
|
|
|
results <- s
|
|
|
|
return true
|
|
|
|
})
|
|
|
|
close(results)
|
|
|
|
}()
|
|
|
|
|
|
|
|
return nil
|
|
|
|
}
|
2023-08-22 16:48:06 +00:00
|
|
|
|
|
|
|
func (llm *LLM) TokenizeString(opts *pb.PredictOptions) (pb.TokenizationResponse, error) {
|
|
|
|
tokens, err := llm.rwkv.Tokenizer.Encode(opts.Prompt)
|
|
|
|
if err != nil {
|
|
|
|
return pb.TokenizationResponse{}, err
|
|
|
|
}
|
|
|
|
|
|
|
|
l := len(tokens)
|
|
|
|
i32Tokens := make([]int32, l)
|
|
|
|
|
|
|
|
for i, t := range tokens {
|
|
|
|
i32Tokens[i] = int32(t.ID)
|
|
|
|
}
|
|
|
|
|
|
|
|
return pb.TokenizationResponse{
|
|
|
|
Length: int32(l),
|
|
|
|
Tokens: i32Tokens,
|
|
|
|
}, nil
|
|
|
|
}
|