feat: add bark and AutoGPTQ (#871)

This commit is contained in:
Ettore Di Giacinto 2023-08-08 20:41:49 +02:00 committed by GitHub
commit 5b3fedebfe
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
45 changed files with 1176 additions and 185 deletions

View File

@ -11,15 +11,15 @@ ARG TARGETARCH
ARG TARGETVARIANT
ENV BUILD_TYPE=${BUILD_TYPE}
ENV EXTERNAL_GRPC_BACKENDS="huggingface-embeddings:/build/extra/grpc/huggingface/huggingface.py"
ENV EXTERNAL_GRPC_BACKENDS="huggingface-embeddings:/build/extra/grpc/huggingface/huggingface.py,autogptq:/build/extra/grpc/autogptq/autogptq.py,bark:/build/extra/grpc/bark/ttsbark.py"
ARG GO_TAGS="stablediffusion tts"
RUN apt-get update && \
apt-get install -y ca-certificates cmake curl patch pip
# Extras requirements
COPY extra/requirements.txt /build/extra/requirements.txt
RUN pip install -r /build/extra/requirements.txt && rm -rf /build/extra/requirements.txt
# Use the variables in subsequent instructions
RUN echo "Target Architecture: $TARGETARCH"
RUN echo "Target Variant: $TARGETVARIANT"
# CuBLAS requirements
RUN if [ "${BUILD_TYPE}" = "cublas" ]; then \
@ -33,6 +33,14 @@ RUN if [ "${BUILD_TYPE}" = "cublas" ]; then \
; fi
ENV PATH /usr/local/cuda/bin:${PATH}
# Extras requirements
COPY extra/requirements.txt /build/extra/requirements.txt
RUN if [ "${TARGETARCH}" = "amd64" ]; then \
pip install auto-gptq;\
fi
RUN pip install -r /build/extra/requirements.txt && rm -rf /build/extra/requirements.txt
WORKDIR /build
# OpenBLAS requirements
@ -42,9 +50,6 @@ RUN apt-get install -y libopenblas-dev
RUN apt-get install -y libopencv-dev && \
ln -s /usr/include/opencv4/opencv2 /usr/include/opencv2
# Use the variables in subsequent instructions
RUN echo "Target Architecture: $TARGETARCH"
RUN echo "Target Variant: $TARGETVARIANT"
# piper requirements
# Use pre-compiled Piper phonemization library (includes onnxruntime)

View File

@ -335,7 +335,9 @@ protogen-go:
pkg/grpc/proto/backend.proto
protogen-python:
python -m grpc_tools.protoc -Ipkg/grpc/proto/ --python_out=extra/grpc/huggingface/ --grpc_python_out=extra/grpc/huggingface/ pkg/grpc/proto/backend.proto
python3 -m grpc_tools.protoc -Ipkg/grpc/proto/ --python_out=extra/grpc/huggingface/ --grpc_python_out=extra/grpc/huggingface/ pkg/grpc/proto/backend.proto
python3 -m grpc_tools.protoc -Ipkg/grpc/proto/ --python_out=extra/grpc/autogptq/ --grpc_python_out=extra/grpc/autogptq/ pkg/grpc/proto/backend.proto
python3 -m grpc_tools.protoc -Ipkg/grpc/proto/ --python_out=extra/grpc/bark/ --grpc_python_out=extra/grpc/bark/ pkg/grpc/proto/backend.proto
## GRPC

View File

@ -26,7 +26,7 @@ func ModelEmbedding(s string, tokens []int, loader *model.ModelLoader, c config.
model.WithLoadGRPCLLMModelOpts(grpcOpts),
model.WithThreads(uint32(c.Threads)),
model.WithAssetDir(o.AssetsDestination),
model.WithModelFile(modelFile),
model.WithModel(modelFile),
model.WithContext(o.Context),
}

View File

@ -20,7 +20,7 @@ func ImageGeneration(height, width, mode, step, seed int, positive_prompt, negat
model.WithAssetDir(o.AssetsDestination),
model.WithThreads(uint32(c.Threads)),
model.WithContext(o.Context),
model.WithModelFile(c.ImageGenerationAssets),
model.WithModel(c.ImageGenerationAssets),
}
for k, v := range o.ExternalGRPCBackends {

View File

@ -27,7 +27,7 @@ func ModelInference(ctx context.Context, s string, loader *model.ModelLoader, c
model.WithLoadGRPCLLMModelOpts(grpcOpts),
model.WithThreads(uint32(c.Threads)), // some models uses this to allocate threads during startup
model.WithAssetDir(o.AssetsDestination),
model.WithModelFile(modelFile),
model.WithModel(modelFile),
model.WithContext(o.Context),
}

View File

@ -15,23 +15,27 @@ func gRPCModelOpts(c config.Config) *pb.ModelOptions {
b = c.Batch
}
return &pb.ModelOptions{
ContextSize: int32(c.ContextSize),
Seed: int32(c.Seed),
NBatch: int32(b),
NGQA: c.NGQA,
RMSNormEps: c.RMSNormEps,
F16Memory: c.F16,
MLock: c.MMlock,
RopeFreqBase: c.RopeFreqBase,
RopeFreqScale: c.RopeFreqScale,
NUMA: c.NUMA,
Embeddings: c.Embeddings,
LowVRAM: c.LowVRAM,
NGPULayers: int32(c.NGPULayers),
MMap: c.MMap,
MainGPU: c.MainGPU,
Threads: int32(c.Threads),
TensorSplit: c.TensorSplit,
ContextSize: int32(c.ContextSize),
Seed: int32(c.Seed),
NBatch: int32(b),
NGQA: c.NGQA,
ModelBaseName: c.ModelBaseName,
UseFastTokenizer: c.UseFastTokenizer,
Device: c.Device,
UseTriton: c.Triton,
RMSNormEps: c.RMSNormEps,
F16Memory: c.F16,
MLock: c.MMlock,
RopeFreqBase: c.RopeFreqBase,
RopeFreqScale: c.RopeFreqScale,
NUMA: c.NUMA,
Embeddings: c.Embeddings,
LowVRAM: c.LowVRAM,
NGPULayers: int32(c.NGPULayers),
MMap: c.MMap,
MainGPU: c.MainGPU,
Threads: int32(c.Threads),
TensorSplit: c.TensorSplit,
}
}

View File

@ -15,7 +15,7 @@ import (
func ModelTranscription(audio, language string, loader *model.ModelLoader, c config.Config, o *options.Option) (*api.Result, error) {
opts := []model.Option{
model.WithBackendString(model.WhisperBackend),
model.WithModelFile(c.Model),
model.WithModel(c.Model),
model.WithContext(o.Context),
model.WithThreads(uint32(c.Threads)),
model.WithAssetDir(o.AssetsDestination),

View File

@ -28,10 +28,14 @@ func generateUniqueFileName(dir, baseName, ext string) string {
}
}
func ModelTTS(text, modelFile string, loader *model.ModelLoader, o *options.Option) (string, *proto.Result, error) {
func ModelTTS(backend, text, modelFile string, loader *model.ModelLoader, o *options.Option) (string, *proto.Result, error) {
bb := backend
if bb == "" {
bb = model.PiperBackend
}
opts := []model.Option{
model.WithBackendString(model.PiperBackend),
model.WithModelFile(modelFile),
model.WithBackendString(bb),
model.WithModel(modelFile),
model.WithContext(o.Context),
model.WithAssetDir(o.AssetsDestination),
}
@ -56,10 +60,13 @@ func ModelTTS(text, modelFile string, loader *model.ModelLoader, o *options.Opti
fileName := generateUniqueFileName(o.AudioDir, "piper", ".wav")
filePath := filepath.Join(o.AudioDir, fileName)
modelPath := filepath.Join(o.Loader.ModelPath, modelFile)
if err := utils.VerifyPath(modelPath, o.Loader.ModelPath); err != nil {
return "", nil, err
// If the model file is not empty, we pass it joined with the model path
modelPath := ""
if modelFile != "" {
modelPath = filepath.Join(o.Loader.ModelPath, modelFile)
if err := utils.VerifyPath(modelPath, o.Loader.ModelPath); err != nil {
return "", nil, err
}
}
res, err := piperModel.TTS(context.Background(), &proto.TTSRequest{

View File

@ -54,6 +54,12 @@ type Config struct {
RMSNormEps float32 `yaml:"rms_norm_eps"`
NGQA int32 `yaml:"ngqa"`
// AutoGPTQ
ModelBaseName string `yaml:"model_base_name"`
Device string `yaml:"device"`
Triton bool `yaml:"triton"`
UseFastTokenizer bool `yaml:"use_fast_tokenizer"`
}
type Functions struct {

View File

@ -39,4 +39,6 @@ type PredictionOptions struct {
RopeFreqBase float32 `json:"rope_freq_base" yaml:"rope_freq_base"`
RopeFreqScale float32 `json:"rope_freq_scale" yaml:"rope_freq_scale"`
NegativePromptScale float32 `json:"negative_prompt_scale" yaml:"negative_prompt_scale"`
// AutoGPTQ
UseFastTokenizer bool `json:"use_fast_tokenizer" yaml:"use_fast_tokenizer"`
}

View File

@ -9,8 +9,9 @@ import (
)
type TTSRequest struct {
Model string `json:"model" yaml:"model"`
Input string `json:"input" yaml:"input"`
Model string `json:"model" yaml:"model"`
Input string `json:"input" yaml:"input"`
Backend string `json:"backend" yaml:"backend"`
}
func TTSEndpoint(cm *config.ConfigLoader, o *options.Option) func(c *fiber.Ctx) error {
@ -22,7 +23,7 @@ func TTSEndpoint(cm *config.ConfigLoader, o *options.Option) func(c *fiber.Ctx)
return err
}
filePath, _, err := backend.ModelTTS(input.Input, input.Model, o.Loader, o)
filePath, _, err := backend.ModelTTS(input.Backend, input.Input, input.Model, o.Loader, o)
if err != nil {
return err
}

View File

@ -2,6 +2,7 @@ package openai
import (
"context"
config "github.com/go-skynet/LocalAI/api/config"
"github.com/go-skynet/LocalAI/pkg/grammar"
@ -106,4 +107,9 @@ type OpenAIRequest struct {
Grammar string `json:"grammar" yaml:"grammar"`
JSONFunctionGrammarObject *grammar.JSONFunctionStructure `json:"grammar_json_functions" yaml:"grammar_json_functions"`
Backend string `json:"backend" yaml:"backend"`
// AutoGPTQ
ModelBaseName string `json:"model_base_name" yaml:"model_base_name"`
}

View File

@ -71,10 +71,22 @@ func updateConfig(config *config.Config, input *OpenAIRequest) {
config.TopP = input.TopP
}
if input.Backend != "" {
config.Backend = input.Backend
}
if input.ModelBaseName != "" {
config.ModelBaseName = input.ModelBaseName
}
if input.NegativePromptScale != 0 {
config.NegativePromptScale = input.NegativePromptScale
}
if input.UseFastTokenizer {
config.UseFastTokenizer = input.UseFastTokenizer
}
if input.NegativePrompt != "" {
config.NegativePrompt = input.NegativePrompt
}

109
extra/grpc/autogptq/autogptq.py Executable file
View File

@ -0,0 +1,109 @@
#!/usr/bin/env python3
import grpc
from concurrent import futures
import time
import backend_pb2
import backend_pb2_grpc
import argparse
import signal
import sys
import os
from auto_gptq import AutoGPTQForCausalLM, BaseQuantizeConfig
from pathlib import Path
from transformers import AutoTokenizer
from transformers import TextGenerationPipeline
_ONE_DAY_IN_SECONDS = 60 * 60 * 24
# Implement the BackendServicer class with the service methods
class BackendServicer(backend_pb2_grpc.BackendServicer):
def Health(self, request, context):
return backend_pb2.Reply(message=bytes("OK", 'utf-8'))
def LoadModel(self, request, context):
try:
device = "cuda:0"
if request.Device != "":
device = request.Device
tokenizer = AutoTokenizer.from_pretrained(request.Model, use_fast=request.UseFastTokenizer)
model = AutoGPTQForCausalLM.from_quantized(request.Model,
model_basename=request.ModelBaseName,
use_safetensors=True,
trust_remote_code=True,
device=device,
use_triton=request.UseTriton,
quantize_config=None)
self.model = model
self.tokenizer = tokenizer
except Exception as err:
return backend_pb2.Result(success=False, message=f"Unexpected {err=}, {type(err)=}")
return backend_pb2.Result(message="Model loaded successfully", success=True)
def Predict(self, request, context):
penalty = 1.0
if request.Penalty != 0.0:
penalty = request.Penalty
tokens = 512
if request.Tokens != 0:
tokens = request.Tokens
top_p = 0.95
if request.TopP != 0.0:
top_p = request.TopP
# Implement Predict RPC
pipeline = TextGenerationPipeline(
model=self.model,
tokenizer=self.tokenizer,
max_new_tokens=tokens,
temperature=request.Temperature,
top_p=top_p,
repetition_penalty=penalty,
)
t = pipeline(request.Prompt)[0]["generated_text"]
# Remove prompt from response if present
if request.Prompt in t:
t = t.replace(request.Prompt, "")
return backend_pb2.Result(message=bytes(t, encoding='utf-8'))
def PredictStream(self, request, context):
# Implement PredictStream RPC
#for reply in some_data_generator():
# yield reply
# Not implemented yet
return self.Predict(request, context)
def serve(address):
server = grpc.server(futures.ThreadPoolExecutor(max_workers=10))
backend_pb2_grpc.add_BackendServicer_to_server(BackendServicer(), server)
server.add_insecure_port(address)
server.start()
print("Server started. Listening on: " + address, file=sys.stderr)
# Define the signal handler function
def signal_handler(sig, frame):
print("Received termination signal. Shutting down...")
server.stop(0)
sys.exit(0)
# Set the signal handlers for SIGINT and SIGTERM
signal.signal(signal.SIGINT, signal_handler)
signal.signal(signal.SIGTERM, signal_handler)
try:
while True:
time.sleep(_ONE_DAY_IN_SECONDS)
except KeyboardInterrupt:
server.stop(0)
if __name__ == "__main__":
parser = argparse.ArgumentParser(description="Run the gRPC server.")
parser.add_argument(
"--addr", default="localhost:50051", help="The address to bind the server to."
)
args = parser.parse_args()
serve(args.addr)

View File

@ -0,0 +1,49 @@
# -*- coding: utf-8 -*-
# Generated by the protocol buffer compiler. DO NOT EDIT!
# source: backend.proto
"""Generated protocol buffer code."""
from google.protobuf import descriptor as _descriptor
from google.protobuf import descriptor_pool as _descriptor_pool
from google.protobuf import symbol_database as _symbol_database
from google.protobuf.internal import builder as _builder
# @@protoc_insertion_point(imports)
_sym_db = _symbol_database.Default()
DESCRIPTOR = _descriptor_pool.Default().AddSerializedFile(b'\n\rbackend.proto\x12\x07\x62\x61\x63kend\"\x0f\n\rHealthMessage\"\x86\x06\n\x0ePredictOptions\x12\x0e\n\x06Prompt\x18\x01 \x01(\t\x12\x0c\n\x04Seed\x18\x02 \x01(\x05\x12\x0f\n\x07Threads\x18\x03 \x01(\x05\x12\x0e\n\x06Tokens\x18\x04 \x01(\x05\x12\x0c\n\x04TopK\x18\x05 \x01(\x05\x12\x0e\n\x06Repeat\x18\x06 \x01(\x05\x12\r\n\x05\x42\x61tch\x18\x07 \x01(\x05\x12\r\n\x05NKeep\x18\x08 \x01(\x05\x12\x13\n\x0bTemperature\x18\t \x01(\x02\x12\x0f\n\x07Penalty\x18\n \x01(\x02\x12\r\n\x05\x46\x31\x36KV\x18\x0b \x01(\x08\x12\x11\n\tDebugMode\x18\x0c \x01(\x08\x12\x13\n\x0bStopPrompts\x18\r \x03(\t\x12\x11\n\tIgnoreEOS\x18\x0e \x01(\x08\x12\x19\n\x11TailFreeSamplingZ\x18\x0f \x01(\x02\x12\x10\n\x08TypicalP\x18\x10 \x01(\x02\x12\x18\n\x10\x46requencyPenalty\x18\x11 \x01(\x02\x12\x17\n\x0fPresencePenalty\x18\x12 \x01(\x02\x12\x10\n\x08Mirostat\x18\x13 \x01(\x05\x12\x13\n\x0bMirostatETA\x18\x14 \x01(\x02\x12\x13\n\x0bMirostatTAU\x18\x15 \x01(\x02\x12\x12\n\nPenalizeNL\x18\x16 \x01(\x08\x12\x11\n\tLogitBias\x18\x17 \x01(\t\x12\r\n\x05MLock\x18\x19 \x01(\x08\x12\x0c\n\x04MMap\x18\x1a \x01(\x08\x12\x16\n\x0ePromptCacheAll\x18\x1b \x01(\x08\x12\x15\n\rPromptCacheRO\x18\x1c \x01(\x08\x12\x0f\n\x07Grammar\x18\x1d \x01(\t\x12\x0f\n\x07MainGPU\x18\x1e \x01(\t\x12\x13\n\x0bTensorSplit\x18\x1f \x01(\t\x12\x0c\n\x04TopP\x18 \x01(\x02\x12\x17\n\x0fPromptCachePath\x18! \x01(\t\x12\r\n\x05\x44\x65\x62ug\x18\" \x01(\x08\x12\x17\n\x0f\x45mbeddingTokens\x18# \x03(\x05\x12\x12\n\nEmbeddings\x18$ \x01(\t\x12\x14\n\x0cRopeFreqBase\x18% \x01(\x02\x12\x15\n\rRopeFreqScale\x18& \x01(\x02\x12\x1b\n\x13NegativePromptScale\x18\' \x01(\x02\x12\x16\n\x0eNegativePrompt\x18( \x01(\t\"\x18\n\x05Reply\x12\x0f\n\x07message\x18\x01 \x01(\x0c\"\xe2\x03\n\x0cModelOptions\x12\r\n\x05Model\x18\x01 \x01(\t\x12\x13\n\x0b\x43ontextSize\x18\x02 \x01(\x05\x12\x0c\n\x04Seed\x18\x03 \x01(\x05\x12\x0e\n\x06NBatch\x18\x04 \x01(\x05\x12\x11\n\tF16Memory\x18\x05 \x01(\x08\x12\r\n\x05MLock\x18\x06 \x01(\x08\x12\x0c\n\x04MMap\x18\x07 \x01(\x08\x12\x11\n\tVocabOnly\x18\x08 \x01(\x08\x12\x0f\n\x07LowVRAM\x18\t \x01(\x08\x12\x12\n\nEmbeddings\x18\n \x01(\x08\x12\x0c\n\x04NUMA\x18\x0b \x01(\x08\x12\x12\n\nNGPULayers\x18\x0c \x01(\x05\x12\x0f\n\x07MainGPU\x18\r \x01(\t\x12\x13\n\x0bTensorSplit\x18\x0e \x01(\t\x12\x0f\n\x07Threads\x18\x0f \x01(\x05\x12\x19\n\x11LibrarySearchPath\x18\x10 \x01(\t\x12\x14\n\x0cRopeFreqBase\x18\x11 \x01(\x02\x12\x15\n\rRopeFreqScale\x18\x12 \x01(\x02\x12\x12\n\nRMSNormEps\x18\x13 \x01(\x02\x12\x0c\n\x04NGQA\x18\x14 \x01(\x05\x12\x11\n\tModelFile\x18\x15 \x01(\t\x12\x0e\n\x06\x44\x65vice\x18\x16 \x01(\t\x12\x11\n\tUseTriton\x18\x17 \x01(\x08\x12\x15\n\rModelBaseName\x18\x18 \x01(\t\x12\x18\n\x10UseFastTokenizer\x18\x19 \x01(\x08\"*\n\x06Result\x12\x0f\n\x07message\x18\x01 \x01(\t\x12\x0f\n\x07success\x18\x02 \x01(\x08\"%\n\x0f\x45mbeddingResult\x12\x12\n\nembeddings\x18\x01 \x03(\x02\"C\n\x11TranscriptRequest\x12\x0b\n\x03\x64st\x18\x02 \x01(\t\x12\x10\n\x08language\x18\x03 \x01(\t\x12\x0f\n\x07threads\x18\x04 \x01(\r\"N\n\x10TranscriptResult\x12,\n\x08segments\x18\x01 \x03(\x0b\x32\x1a.backend.TranscriptSegment\x12\x0c\n\x04text\x18\x02 \x01(\t\"Y\n\x11TranscriptSegment\x12\n\n\x02id\x18\x01 \x01(\x05\x12\r\n\x05start\x18\x02 \x01(\x03\x12\x0b\n\x03\x65nd\x18\x03 \x01(\x03\x12\x0c\n\x04text\x18\x04 \x01(\t\x12\x0e\n\x06tokens\x18\x05 \x03(\x05\"\x9e\x01\n\x14GenerateImageRequest\x12\x0e\n\x06height\x18\x01 \x01(\x05\x12\r\n\x05width\x18\x02 \x01(\x05\x12\x0c\n\x04mode\x18\x03 \x01(\x05\x12\x0c\n\x04step\x18\x04 \x01(\x05\x12\x0c\n\x04seed\x18\x05 \x01(\x05\x12\x17\n\x0fpositive_prompt\x18\x06 \x01(\t\x12\x17\n\x0fnegative_prompt\x18\x07 \x01(\t\x12\x0b\n\x03\x64st\x18\x08 \x01(\t\"6\n\nTTSRequest\x12\x0c\n\x04text\x18\x01 \x01(\t\x12\r\n\x05model\x18\x02 \x01(\t\x12\x0b\n\x03\x64st\x18\x03 \x01(\t2\xeb\x03\n\x07\x42\x61\x63kend\x12\x32\n\x06Health\x12\x16.backend.HealthMessage\x1a\x0e.backend.Reply\"\x00\x12\x34\n\x07Predict\x12\x17.backend.PredictOptions\x1a\x0e.backend.Reply\"\x00\x12\x35\n\tLoadModel\x12\x15.backend.ModelOptions\x1a\x0f.backend.Result\"\x00\x12<\n\rPredictStream\x12\x17.backend.PredictOptions\x1a\x0e.backend.Reply\"\x00\x30\x01\x12@\n\tEmbedding\x12\x17.backend.PredictOptions\x1a\x18.backend.EmbeddingResult\"\x00\x12\x41\n\rGenerateImage\x12\x1d.backend.GenerateImageRequest\x1a\x0f.backend.Result\"\x00\x12M\n\x12\x41udioTranscription\x12\x1a.backend.TranscriptRequest\x1a\x19.backend.TranscriptResult\"\x00\x12-\n\x03TTS\x12\x13.backend.TTSRequest\x1a\x0f.backend.Result\"\x00\x42Z\n\x19io.skynet.localai.backendB\x0eLocalAIBackendP\x01Z+github.com/go-skynet/LocalAI/pkg/grpc/protob\x06proto3')
_globals = globals()
_builder.BuildMessageAndEnumDescriptors(DESCRIPTOR, _globals)
_builder.BuildTopDescriptorsAndMessages(DESCRIPTOR, 'backend_pb2', _globals)
if _descriptor._USE_C_DESCRIPTORS == False:
DESCRIPTOR._options = None
DESCRIPTOR._serialized_options = b'\n\031io.skynet.localai.backendB\016LocalAIBackendP\001Z+github.com/go-skynet/LocalAI/pkg/grpc/proto'
_globals['_HEALTHMESSAGE']._serialized_start=26
_globals['_HEALTHMESSAGE']._serialized_end=41
_globals['_PREDICTOPTIONS']._serialized_start=44
_globals['_PREDICTOPTIONS']._serialized_end=818
_globals['_REPLY']._serialized_start=820
_globals['_REPLY']._serialized_end=844
_globals['_MODELOPTIONS']._serialized_start=847
_globals['_MODELOPTIONS']._serialized_end=1329
_globals['_RESULT']._serialized_start=1331
_globals['_RESULT']._serialized_end=1373
_globals['_EMBEDDINGRESULT']._serialized_start=1375
_globals['_EMBEDDINGRESULT']._serialized_end=1412
_globals['_TRANSCRIPTREQUEST']._serialized_start=1414
_globals['_TRANSCRIPTREQUEST']._serialized_end=1481
_globals['_TRANSCRIPTRESULT']._serialized_start=1483
_globals['_TRANSCRIPTRESULT']._serialized_end=1561
_globals['_TRANSCRIPTSEGMENT']._serialized_start=1563
_globals['_TRANSCRIPTSEGMENT']._serialized_end=1652
_globals['_GENERATEIMAGEREQUEST']._serialized_start=1655
_globals['_GENERATEIMAGEREQUEST']._serialized_end=1813
_globals['_TTSREQUEST']._serialized_start=1815
_globals['_TTSREQUEST']._serialized_end=1869
_globals['_BACKEND']._serialized_start=1872
_globals['_BACKEND']._serialized_end=2363
# @@protoc_insertion_point(module_scope)

View File

@ -0,0 +1,297 @@
# Generated by the gRPC Python protocol compiler plugin. DO NOT EDIT!
"""Client and server classes corresponding to protobuf-defined services."""
import grpc
import backend_pb2 as backend__pb2
class BackendStub(object):
"""Missing associated documentation comment in .proto file."""
def __init__(self, channel):
"""Constructor.
Args:
channel: A grpc.Channel.
"""
self.Health = channel.unary_unary(
'/backend.Backend/Health',
request_serializer=backend__pb2.HealthMessage.SerializeToString,
response_deserializer=backend__pb2.Reply.FromString,
)
self.Predict = channel.unary_unary(
'/backend.Backend/Predict',
request_serializer=backend__pb2.PredictOptions.SerializeToString,
response_deserializer=backend__pb2.Reply.FromString,
)
self.LoadModel = channel.unary_unary(
'/backend.Backend/LoadModel',
request_serializer=backend__pb2.ModelOptions.SerializeToString,
response_deserializer=backend__pb2.Result.FromString,
)
self.PredictStream = channel.unary_stream(
'/backend.Backend/PredictStream',
request_serializer=backend__pb2.PredictOptions.SerializeToString,
response_deserializer=backend__pb2.Reply.FromString,
)
self.Embedding = channel.unary_unary(
'/backend.Backend/Embedding',
request_serializer=backend__pb2.PredictOptions.SerializeToString,
response_deserializer=backend__pb2.EmbeddingResult.FromString,
)
self.GenerateImage = channel.unary_unary(
'/backend.Backend/GenerateImage',
request_serializer=backend__pb2.GenerateImageRequest.SerializeToString,
response_deserializer=backend__pb2.Result.FromString,
)
self.AudioTranscription = channel.unary_unary(
'/backend.Backend/AudioTranscription',
request_serializer=backend__pb2.TranscriptRequest.SerializeToString,
response_deserializer=backend__pb2.TranscriptResult.FromString,
)
self.TTS = channel.unary_unary(
'/backend.Backend/TTS',
request_serializer=backend__pb2.TTSRequest.SerializeToString,
response_deserializer=backend__pb2.Result.FromString,
)
class BackendServicer(object):
"""Missing associated documentation comment in .proto file."""
def Health(self, request, context):
"""Missing associated documentation comment in .proto file."""
context.set_code(grpc.StatusCode.UNIMPLEMENTED)
context.set_details('Method not implemented!')
raise NotImplementedError('Method not implemented!')
def Predict(self, request, context):
"""Missing associated documentation comment in .proto file."""
context.set_code(grpc.StatusCode.UNIMPLEMENTED)
context.set_details('Method not implemented!')
raise NotImplementedError('Method not implemented!')
def LoadModel(self, request, context):
"""Missing associated documentation comment in .proto file."""
context.set_code(grpc.StatusCode.UNIMPLEMENTED)
context.set_details('Method not implemented!')
raise NotImplementedError('Method not implemented!')
def PredictStream(self, request, context):
"""Missing associated documentation comment in .proto file."""
context.set_code(grpc.StatusCode.UNIMPLEMENTED)
context.set_details('Method not implemented!')
raise NotImplementedError('Method not implemented!')
def Embedding(self, request, context):
"""Missing associated documentation comment in .proto file."""
context.set_code(grpc.StatusCode.UNIMPLEMENTED)
context.set_details('Method not implemented!')
raise NotImplementedError('Method not implemented!')
def GenerateImage(self, request, context):
"""Missing associated documentation comment in .proto file."""
context.set_code(grpc.StatusCode.UNIMPLEMENTED)
context.set_details('Method not implemented!')
raise NotImplementedError('Method not implemented!')
def AudioTranscription(self, request, context):
"""Missing associated documentation comment in .proto file."""
context.set_code(grpc.StatusCode.UNIMPLEMENTED)
context.set_details('Method not implemented!')
raise NotImplementedError('Method not implemented!')
def TTS(self, request, context):
"""Missing associated documentation comment in .proto file."""
context.set_code(grpc.StatusCode.UNIMPLEMENTED)
context.set_details('Method not implemented!')
raise NotImplementedError('Method not implemented!')
def add_BackendServicer_to_server(servicer, server):
rpc_method_handlers = {
'Health': grpc.unary_unary_rpc_method_handler(
servicer.Health,
request_deserializer=backend__pb2.HealthMessage.FromString,
response_serializer=backend__pb2.Reply.SerializeToString,
),
'Predict': grpc.unary_unary_rpc_method_handler(
servicer.Predict,
request_deserializer=backend__pb2.PredictOptions.FromString,
response_serializer=backend__pb2.Reply.SerializeToString,
),
'LoadModel': grpc.unary_unary_rpc_method_handler(
servicer.LoadModel,
request_deserializer=backend__pb2.ModelOptions.FromString,
response_serializer=backend__pb2.Result.SerializeToString,
),
'PredictStream': grpc.unary_stream_rpc_method_handler(
servicer.PredictStream,
request_deserializer=backend__pb2.PredictOptions.FromString,
response_serializer=backend__pb2.Reply.SerializeToString,
),
'Embedding': grpc.unary_unary_rpc_method_handler(
servicer.Embedding,
request_deserializer=backend__pb2.PredictOptions.FromString,
response_serializer=backend__pb2.EmbeddingResult.SerializeToString,
),
'GenerateImage': grpc.unary_unary_rpc_method_handler(
servicer.GenerateImage,
request_deserializer=backend__pb2.GenerateImageRequest.FromString,
response_serializer=backend__pb2.Result.SerializeToString,
),
'AudioTranscription': grpc.unary_unary_rpc_method_handler(
servicer.AudioTranscription,
request_deserializer=backend__pb2.TranscriptRequest.FromString,
response_serializer=backend__pb2.TranscriptResult.SerializeToString,
),
'TTS': grpc.unary_unary_rpc_method_handler(
servicer.TTS,
request_deserializer=backend__pb2.TTSRequest.FromString,
response_serializer=backend__pb2.Result.SerializeToString,
),
}
generic_handler = grpc.method_handlers_generic_handler(
'backend.Backend', rpc_method_handlers)
server.add_generic_rpc_handlers((generic_handler,))
# This class is part of an EXPERIMENTAL API.
class Backend(object):
"""Missing associated documentation comment in .proto file."""
@staticmethod
def Health(request,
target,
options=(),
channel_credentials=None,
call_credentials=None,
insecure=False,
compression=None,
wait_for_ready=None,
timeout=None,
metadata=None):
return grpc.experimental.unary_unary(request, target, '/backend.Backend/Health',
backend__pb2.HealthMessage.SerializeToString,
backend__pb2.Reply.FromString,
options, channel_credentials,
insecure, call_credentials, compression, wait_for_ready, timeout, metadata)
@staticmethod
def Predict(request,
target,
options=(),
channel_credentials=None,
call_credentials=None,
insecure=False,
compression=None,
wait_for_ready=None,
timeout=None,
metadata=None):
return grpc.experimental.unary_unary(request, target, '/backend.Backend/Predict',
backend__pb2.PredictOptions.SerializeToString,
backend__pb2.Reply.FromString,
options, channel_credentials,
insecure, call_credentials, compression, wait_for_ready, timeout, metadata)
@staticmethod
def LoadModel(request,
target,
options=(),
channel_credentials=None,
call_credentials=None,
insecure=False,
compression=None,
wait_for_ready=None,
timeout=None,
metadata=None):
return grpc.experimental.unary_unary(request, target, '/backend.Backend/LoadModel',
backend__pb2.ModelOptions.SerializeToString,
backend__pb2.Result.FromString,
options, channel_credentials,
insecure, call_credentials, compression, wait_for_ready, timeout, metadata)
@staticmethod
def PredictStream(request,
target,
options=(),
channel_credentials=None,
call_credentials=None,
insecure=False,
compression=None,
wait_for_ready=None,
timeout=None,
metadata=None):
return grpc.experimental.unary_stream(request, target, '/backend.Backend/PredictStream',
backend__pb2.PredictOptions.SerializeToString,
backend__pb2.Reply.FromString,
options, channel_credentials,
insecure, call_credentials, compression, wait_for_ready, timeout, metadata)
@staticmethod
def Embedding(request,
target,
options=(),
channel_credentials=None,
call_credentials=None,
insecure=False,
compression=None,
wait_for_ready=None,
timeout=None,
metadata=None):
return grpc.experimental.unary_unary(request, target, '/backend.Backend/Embedding',
backend__pb2.PredictOptions.SerializeToString,
backend__pb2.EmbeddingResult.FromString,
options, channel_credentials,
insecure, call_credentials, compression, wait_for_ready, timeout, metadata)
@staticmethod
def GenerateImage(request,
target,
options=(),
channel_credentials=None,
call_credentials=None,
insecure=False,
compression=None,
wait_for_ready=None,
timeout=None,
metadata=None):
return grpc.experimental.unary_unary(request, target, '/backend.Backend/GenerateImage',
backend__pb2.GenerateImageRequest.SerializeToString,
backend__pb2.Result.FromString,
options, channel_credentials,
insecure, call_credentials, compression, wait_for_ready, timeout, metadata)
@staticmethod
def AudioTranscription(request,
target,
options=(),
channel_credentials=None,
call_credentials=None,
insecure=False,
compression=None,
wait_for_ready=None,
timeout=None,
metadata=None):
return grpc.experimental.unary_unary(request, target, '/backend.Backend/AudioTranscription',
backend__pb2.TranscriptRequest.SerializeToString,
backend__pb2.TranscriptResult.FromString,
options, channel_credentials,
insecure, call_credentials, compression, wait_for_ready, timeout, metadata)
@staticmethod
def TTS(request,
target,
options=(),
channel_credentials=None,
call_credentials=None,
insecure=False,
compression=None,
wait_for_ready=None,
timeout=None,
metadata=None):
return grpc.experimental.unary_unary(request, target, '/backend.Backend/TTS',
backend__pb2.TTSRequest.SerializeToString,
backend__pb2.Result.FromString,
options, channel_credentials,
insecure, call_credentials, compression, wait_for_ready, timeout, metadata)

View File

@ -0,0 +1,49 @@
# -*- coding: utf-8 -*-
# Generated by the protocol buffer compiler. DO NOT EDIT!
# source: backend.proto
"""Generated protocol buffer code."""
from google.protobuf import descriptor as _descriptor
from google.protobuf import descriptor_pool as _descriptor_pool
from google.protobuf import symbol_database as _symbol_database
from google.protobuf.internal import builder as _builder
# @@protoc_insertion_point(imports)
_sym_db = _symbol_database.Default()
DESCRIPTOR = _descriptor_pool.Default().AddSerializedFile(b'\n\rbackend.proto\x12\x07\x62\x61\x63kend\"\x0f\n\rHealthMessage\"\x86\x06\n\x0ePredictOptions\x12\x0e\n\x06Prompt\x18\x01 \x01(\t\x12\x0c\n\x04Seed\x18\x02 \x01(\x05\x12\x0f\n\x07Threads\x18\x03 \x01(\x05\x12\x0e\n\x06Tokens\x18\x04 \x01(\x05\x12\x0c\n\x04TopK\x18\x05 \x01(\x05\x12\x0e\n\x06Repeat\x18\x06 \x01(\x05\x12\r\n\x05\x42\x61tch\x18\x07 \x01(\x05\x12\r\n\x05NKeep\x18\x08 \x01(\x05\x12\x13\n\x0bTemperature\x18\t \x01(\x02\x12\x0f\n\x07Penalty\x18\n \x01(\x02\x12\r\n\x05\x46\x31\x36KV\x18\x0b \x01(\x08\x12\x11\n\tDebugMode\x18\x0c \x01(\x08\x12\x13\n\x0bStopPrompts\x18\r \x03(\t\x12\x11\n\tIgnoreEOS\x18\x0e \x01(\x08\x12\x19\n\x11TailFreeSamplingZ\x18\x0f \x01(\x02\x12\x10\n\x08TypicalP\x18\x10 \x01(\x02\x12\x18\n\x10\x46requencyPenalty\x18\x11 \x01(\x02\x12\x17\n\x0fPresencePenalty\x18\x12 \x01(\x02\x12\x10\n\x08Mirostat\x18\x13 \x01(\x05\x12\x13\n\x0bMirostatETA\x18\x14 \x01(\x02\x12\x13\n\x0bMirostatTAU\x18\x15 \x01(\x02\x12\x12\n\nPenalizeNL\x18\x16 \x01(\x08\x12\x11\n\tLogitBias\x18\x17 \x01(\t\x12\r\n\x05MLock\x18\x19 \x01(\x08\x12\x0c\n\x04MMap\x18\x1a \x01(\x08\x12\x16\n\x0ePromptCacheAll\x18\x1b \x01(\x08\x12\x15\n\rPromptCacheRO\x18\x1c \x01(\x08\x12\x0f\n\x07Grammar\x18\x1d \x01(\t\x12\x0f\n\x07MainGPU\x18\x1e \x01(\t\x12\x13\n\x0bTensorSplit\x18\x1f \x01(\t\x12\x0c\n\x04TopP\x18 \x01(\x02\x12\x17\n\x0fPromptCachePath\x18! \x01(\t\x12\r\n\x05\x44\x65\x62ug\x18\" \x01(\x08\x12\x17\n\x0f\x45mbeddingTokens\x18# \x03(\x05\x12\x12\n\nEmbeddings\x18$ \x01(\t\x12\x14\n\x0cRopeFreqBase\x18% \x01(\x02\x12\x15\n\rRopeFreqScale\x18& \x01(\x02\x12\x1b\n\x13NegativePromptScale\x18\' \x01(\x02\x12\x16\n\x0eNegativePrompt\x18( \x01(\t\"\x18\n\x05Reply\x12\x0f\n\x07message\x18\x01 \x01(\x0c\"\xe2\x03\n\x0cModelOptions\x12\r\n\x05Model\x18\x01 \x01(\t\x12\x13\n\x0b\x43ontextSize\x18\x02 \x01(\x05\x12\x0c\n\x04Seed\x18\x03 \x01(\x05\x12\x0e\n\x06NBatch\x18\x04 \x01(\x05\x12\x11\n\tF16Memory\x18\x05 \x01(\x08\x12\r\n\x05MLock\x18\x06 \x01(\x08\x12\x0c\n\x04MMap\x18\x07 \x01(\x08\x12\x11\n\tVocabOnly\x18\x08 \x01(\x08\x12\x0f\n\x07LowVRAM\x18\t \x01(\x08\x12\x12\n\nEmbeddings\x18\n \x01(\x08\x12\x0c\n\x04NUMA\x18\x0b \x01(\x08\x12\x12\n\nNGPULayers\x18\x0c \x01(\x05\x12\x0f\n\x07MainGPU\x18\r \x01(\t\x12\x13\n\x0bTensorSplit\x18\x0e \x01(\t\x12\x0f\n\x07Threads\x18\x0f \x01(\x05\x12\x19\n\x11LibrarySearchPath\x18\x10 \x01(\t\x12\x14\n\x0cRopeFreqBase\x18\x11 \x01(\x02\x12\x15\n\rRopeFreqScale\x18\x12 \x01(\x02\x12\x12\n\nRMSNormEps\x18\x13 \x01(\x02\x12\x0c\n\x04NGQA\x18\x14 \x01(\x05\x12\x11\n\tModelFile\x18\x15 \x01(\t\x12\x0e\n\x06\x44\x65vice\x18\x16 \x01(\t\x12\x11\n\tUseTriton\x18\x17 \x01(\x08\x12\x15\n\rModelBaseName\x18\x18 \x01(\t\x12\x18\n\x10UseFastTokenizer\x18\x19 \x01(\x08\"*\n\x06Result\x12\x0f\n\x07message\x18\x01 \x01(\t\x12\x0f\n\x07success\x18\x02 \x01(\x08\"%\n\x0f\x45mbeddingResult\x12\x12\n\nembeddings\x18\x01 \x03(\x02\"C\n\x11TranscriptRequest\x12\x0b\n\x03\x64st\x18\x02 \x01(\t\x12\x10\n\x08language\x18\x03 \x01(\t\x12\x0f\n\x07threads\x18\x04 \x01(\r\"N\n\x10TranscriptResult\x12,\n\x08segments\x18\x01 \x03(\x0b\x32\x1a.backend.TranscriptSegment\x12\x0c\n\x04text\x18\x02 \x01(\t\"Y\n\x11TranscriptSegment\x12\n\n\x02id\x18\x01 \x01(\x05\x12\r\n\x05start\x18\x02 \x01(\x03\x12\x0b\n\x03\x65nd\x18\x03 \x01(\x03\x12\x0c\n\x04text\x18\x04 \x01(\t\x12\x0e\n\x06tokens\x18\x05 \x03(\x05\"\x9e\x01\n\x14GenerateImageRequest\x12\x0e\n\x06height\x18\x01 \x01(\x05\x12\r\n\x05width\x18\x02 \x01(\x05\x12\x0c\n\x04mode\x18\x03 \x01(\x05\x12\x0c\n\x04step\x18\x04 \x01(\x05\x12\x0c\n\x04seed\x18\x05 \x01(\x05\x12\x17\n\x0fpositive_prompt\x18\x06 \x01(\t\x12\x17\n\x0fnegative_prompt\x18\x07 \x01(\t\x12\x0b\n\x03\x64st\x18\x08 \x01(\t\"6\n\nTTSRequest\x12\x0c\n\x04text\x18\x01 \x01(\t\x12\r\n\x05model\x18\x02 \x01(\t\x12\x0b\n\x03\x64st\x18\x03 \x01(\t2\xeb\x03\n\x07\x42\x61\x63kend\x12\x32\n\x06Health\x12\x16.backend.HealthMessage\x1a\x0e.backend.Reply\"\x00\x12\x34\n\x07Predict\x12\x17.backend.PredictOptions\x1a\x0e.backend.Reply\"\x00\x12\x35\n\tLoadModel\x12\x15.backend.ModelOptions\x1a\x0f.backend.Result\"\x00\x12<\n\rPredictStream\x12\x17.backend.PredictOptions\x1a\x0e.backend.Reply\"\x00\x30\x01\x12@\n\tEmbedding\x12\x17.backend.PredictOptions\x1a\x18.backend.EmbeddingResult\"\x00\x12\x41\n\rGenerateImage\x12\x1d.backend.GenerateImageRequest\x1a\x0f.backend.Result\"\x00\x12M\n\x12\x41udioTranscription\x12\x1a.backend.TranscriptRequest\x1a\x19.backend.TranscriptResult\"\x00\x12-\n\x03TTS\x12\x13.backend.TTSRequest\x1a\x0f.backend.Result\"\x00\x42Z\n\x19io.skynet.localai.backendB\x0eLocalAIBackendP\x01Z+github.com/go-skynet/LocalAI/pkg/grpc/protob\x06proto3')
_globals = globals()
_builder.BuildMessageAndEnumDescriptors(DESCRIPTOR, _globals)
_builder.BuildTopDescriptorsAndMessages(DESCRIPTOR, 'backend_pb2', _globals)
if _descriptor._USE_C_DESCRIPTORS == False:
DESCRIPTOR._options = None
DESCRIPTOR._serialized_options = b'\n\031io.skynet.localai.backendB\016LocalAIBackendP\001Z+github.com/go-skynet/LocalAI/pkg/grpc/proto'
_globals['_HEALTHMESSAGE']._serialized_start=26
_globals['_HEALTHMESSAGE']._serialized_end=41
_globals['_PREDICTOPTIONS']._serialized_start=44
_globals['_PREDICTOPTIONS']._serialized_end=818
_globals['_REPLY']._serialized_start=820
_globals['_REPLY']._serialized_end=844
_globals['_MODELOPTIONS']._serialized_start=847
_globals['_MODELOPTIONS']._serialized_end=1329
_globals['_RESULT']._serialized_start=1331
_globals['_RESULT']._serialized_end=1373
_globals['_EMBEDDINGRESULT']._serialized_start=1375
_globals['_EMBEDDINGRESULT']._serialized_end=1412
_globals['_TRANSCRIPTREQUEST']._serialized_start=1414
_globals['_TRANSCRIPTREQUEST']._serialized_end=1481
_globals['_TRANSCRIPTRESULT']._serialized_start=1483
_globals['_TRANSCRIPTRESULT']._serialized_end=1561
_globals['_TRANSCRIPTSEGMENT']._serialized_start=1563
_globals['_TRANSCRIPTSEGMENT']._serialized_end=1652
_globals['_GENERATEIMAGEREQUEST']._serialized_start=1655
_globals['_GENERATEIMAGEREQUEST']._serialized_end=1813
_globals['_TTSREQUEST']._serialized_start=1815
_globals['_TTSREQUEST']._serialized_end=1869
_globals['_BACKEND']._serialized_start=1872
_globals['_BACKEND']._serialized_end=2363
# @@protoc_insertion_point(module_scope)

View File

@ -0,0 +1,297 @@
# Generated by the gRPC Python protocol compiler plugin. DO NOT EDIT!
"""Client and server classes corresponding to protobuf-defined services."""
import grpc
import backend_pb2 as backend__pb2
class BackendStub(object):
"""Missing associated documentation comment in .proto file."""
def __init__(self, channel):
"""Constructor.
Args:
channel: A grpc.Channel.
"""
self.Health = channel.unary_unary(
'/backend.Backend/Health',
request_serializer=backend__pb2.HealthMessage.SerializeToString,
response_deserializer=backend__pb2.Reply.FromString,
)
self.Predict = channel.unary_unary(
'/backend.Backend/Predict',
request_serializer=backend__pb2.PredictOptions.SerializeToString,
response_deserializer=backend__pb2.Reply.FromString,
)
self.LoadModel = channel.unary_unary(
'/backend.Backend/LoadModel',
request_serializer=backend__pb2.ModelOptions.SerializeToString,
response_deserializer=backend__pb2.Result.FromString,
)
self.PredictStream = channel.unary_stream(
'/backend.Backend/PredictStream',
request_serializer=backend__pb2.PredictOptions.SerializeToString,
response_deserializer=backend__pb2.Reply.FromString,
)
self.Embedding = channel.unary_unary(
'/backend.Backend/Embedding',
request_serializer=backend__pb2.PredictOptions.SerializeToString,
response_deserializer=backend__pb2.EmbeddingResult.FromString,
)
self.GenerateImage = channel.unary_unary(
'/backend.Backend/GenerateImage',
request_serializer=backend__pb2.GenerateImageRequest.SerializeToString,
response_deserializer=backend__pb2.Result.FromString,
)
self.AudioTranscription = channel.unary_unary(
'/backend.Backend/AudioTranscription',
request_serializer=backend__pb2.TranscriptRequest.SerializeToString,
response_deserializer=backend__pb2.TranscriptResult.FromString,
)
self.TTS = channel.unary_unary(
'/backend.Backend/TTS',
request_serializer=backend__pb2.TTSRequest.SerializeToString,
response_deserializer=backend__pb2.Result.FromString,
)
class BackendServicer(object):
"""Missing associated documentation comment in .proto file."""
def Health(self, request, context):
"""Missing associated documentation comment in .proto file."""
context.set_code(grpc.StatusCode.UNIMPLEMENTED)
context.set_details('Method not implemented!')
raise NotImplementedError('Method not implemented!')
def Predict(self, request, context):
"""Missing associated documentation comment in .proto file."""
context.set_code(grpc.StatusCode.UNIMPLEMENTED)
context.set_details('Method not implemented!')
raise NotImplementedError('Method not implemented!')
def LoadModel(self, request, context):
"""Missing associated documentation comment in .proto file."""
context.set_code(grpc.StatusCode.UNIMPLEMENTED)
context.set_details('Method not implemented!')
raise NotImplementedError('Method not implemented!')
def PredictStream(self, request, context):
"""Missing associated documentation comment in .proto file."""
context.set_code(grpc.StatusCode.UNIMPLEMENTED)
context.set_details('Method not implemented!')
raise NotImplementedError('Method not implemented!')
def Embedding(self, request, context):
"""Missing associated documentation comment in .proto file."""
context.set_code(grpc.StatusCode.UNIMPLEMENTED)
context.set_details('Method not implemented!')
raise NotImplementedError('Method not implemented!')
def GenerateImage(self, request, context):
"""Missing associated documentation comment in .proto file."""
context.set_code(grpc.StatusCode.UNIMPLEMENTED)
context.set_details('Method not implemented!')
raise NotImplementedError('Method not implemented!')
def AudioTranscription(self, request, context):
"""Missing associated documentation comment in .proto file."""
context.set_code(grpc.StatusCode.UNIMPLEMENTED)
context.set_details('Method not implemented!')
raise NotImplementedError('Method not implemented!')
def TTS(self, request, context):
"""Missing associated documentation comment in .proto file."""
context.set_code(grpc.StatusCode.UNIMPLEMENTED)
context.set_details('Method not implemented!')
raise NotImplementedError('Method not implemented!')
def add_BackendServicer_to_server(servicer, server):
rpc_method_handlers = {
'Health': grpc.unary_unary_rpc_method_handler(
servicer.Health,
request_deserializer=backend__pb2.HealthMessage.FromString,
response_serializer=backend__pb2.Reply.SerializeToString,
),
'Predict': grpc.unary_unary_rpc_method_handler(
servicer.Predict,
request_deserializer=backend__pb2.PredictOptions.FromString,
response_serializer=backend__pb2.Reply.SerializeToString,
),
'LoadModel': grpc.unary_unary_rpc_method_handler(
servicer.LoadModel,
request_deserializer=backend__pb2.ModelOptions.FromString,
response_serializer=backend__pb2.Result.SerializeToString,
),
'PredictStream': grpc.unary_stream_rpc_method_handler(
servicer.PredictStream,
request_deserializer=backend__pb2.PredictOptions.FromString,
response_serializer=backend__pb2.Reply.SerializeToString,
),
'Embedding': grpc.unary_unary_rpc_method_handler(
servicer.Embedding,
request_deserializer=backend__pb2.PredictOptions.FromString,
response_serializer=backend__pb2.EmbeddingResult.SerializeToString,
),
'GenerateImage': grpc.unary_unary_rpc_method_handler(
servicer.GenerateImage,
request_deserializer=backend__pb2.GenerateImageRequest.FromString,
response_serializer=backend__pb2.Result.SerializeToString,
),
'AudioTranscription': grpc.unary_unary_rpc_method_handler(
servicer.AudioTranscription,
request_deserializer=backend__pb2.TranscriptRequest.FromString,
response_serializer=backend__pb2.TranscriptResult.SerializeToString,
),
'TTS': grpc.unary_unary_rpc_method_handler(
servicer.TTS,
request_deserializer=backend__pb2.TTSRequest.FromString,
response_serializer=backend__pb2.Result.SerializeToString,
),
}
generic_handler = grpc.method_handlers_generic_handler(
'backend.Backend', rpc_method_handlers)
server.add_generic_rpc_handlers((generic_handler,))
# This class is part of an EXPERIMENTAL API.
class Backend(object):
"""Missing associated documentation comment in .proto file."""
@staticmethod
def Health(request,
target,
options=(),
channel_credentials=None,
call_credentials=None,
insecure=False,
compression=None,
wait_for_ready=None,
timeout=None,
metadata=None):
return grpc.experimental.unary_unary(request, target, '/backend.Backend/Health',
backend__pb2.HealthMessage.SerializeToString,
backend__pb2.Reply.FromString,
options, channel_credentials,
insecure, call_credentials, compression, wait_for_ready, timeout, metadata)
@staticmethod
def Predict(request,
target,
options=(),
channel_credentials=None,
call_credentials=None,
insecure=False,
compression=None,
wait_for_ready=None,
timeout=None,
metadata=None):
return grpc.experimental.unary_unary(request, target, '/backend.Backend/Predict',
backend__pb2.PredictOptions.SerializeToString,
backend__pb2.Reply.FromString,
options, channel_credentials,
insecure, call_credentials, compression, wait_for_ready, timeout, metadata)
@staticmethod
def LoadModel(request,
target,
options=(),
channel_credentials=None,
call_credentials=None,
insecure=False,
compression=None,
wait_for_ready=None,
timeout=None,
metadata=None):
return grpc.experimental.unary_unary(request, target, '/backend.Backend/LoadModel',
backend__pb2.ModelOptions.SerializeToString,
backend__pb2.Result.FromString,
options, channel_credentials,
insecure, call_credentials, compression, wait_for_ready, timeout, metadata)
@staticmethod
def PredictStream(request,
target,
options=(),
channel_credentials=None,
call_credentials=None,
insecure=False,
compression=None,
wait_for_ready=None,
timeout=None,
metadata=None):
return grpc.experimental.unary_stream(request, target, '/backend.Backend/PredictStream',
backend__pb2.PredictOptions.SerializeToString,
backend__pb2.Reply.FromString,
options, channel_credentials,
insecure, call_credentials, compression, wait_for_ready, timeout, metadata)
@staticmethod
def Embedding(request,
target,
options=(),
channel_credentials=None,
call_credentials=None,
insecure=False,
compression=None,
wait_for_ready=None,
timeout=None,
metadata=None):
return grpc.experimental.unary_unary(request, target, '/backend.Backend/Embedding',
backend__pb2.PredictOptions.SerializeToString,
backend__pb2.EmbeddingResult.FromString,
options, channel_credentials,
insecure, call_credentials, compression, wait_for_ready, timeout, metadata)
@staticmethod
def GenerateImage(request,
target,
options=(),
channel_credentials=None,
call_credentials=None,
insecure=False,
compression=None,
wait_for_ready=None,
timeout=None,
metadata=None):
return grpc.experimental.unary_unary(request, target, '/backend.Backend/GenerateImage',
backend__pb2.GenerateImageRequest.SerializeToString,
backend__pb2.Result.FromString,
options, channel_credentials,
insecure, call_credentials, compression, wait_for_ready, timeout, metadata)
@staticmethod
def AudioTranscription(request,
target,
options=(),
channel_credentials=None,
call_credentials=None,
insecure=False,
compression=None,
wait_for_ready=None,
timeout=None,
metadata=None):
return grpc.experimental.unary_unary(request, target, '/backend.Backend/AudioTranscription',
backend__pb2.TranscriptRequest.SerializeToString,
backend__pb2.TranscriptResult.FromString,
options, channel_credentials,
insecure, call_credentials, compression, wait_for_ready, timeout, metadata)
@staticmethod
def TTS(request,
target,
options=(),
channel_credentials=None,
call_credentials=None,
insecure=False,
compression=None,
wait_for_ready=None,
timeout=None,
metadata=None):
return grpc.experimental.unary_unary(request, target, '/backend.Backend/TTS',
backend__pb2.TTSRequest.SerializeToString,
backend__pb2.Result.FromString,
options, channel_credentials,
insecure, call_credentials, compression, wait_for_ready, timeout, metadata)

View File

@ -0,0 +1,83 @@
#!/usr/bin/env python3
import grpc
from concurrent import futures
import time
import backend_pb2
import backend_pb2_grpc
import argparse
import signal
import sys
import os
from auto_gptq import AutoGPTQForCausalLM, BaseQuantizeConfig
from pathlib import Path
from bark import SAMPLE_RATE, generate_audio, preload_models
from scipy.io.wavfile import write as write_wav
_ONE_DAY_IN_SECONDS = 60 * 60 * 24
# Implement the BackendServicer class with the service methods
class BackendServicer(backend_pb2_grpc.BackendServicer):
def Health(self, request, context):
return backend_pb2.Reply(message=bytes("OK", 'utf-8'))
def LoadModel(self, request, context):
model_name = request.Model
try:
print("Preparing models, please wait", file=sys.stderr)
# download and load all models
preload_models()
except Exception as err:
return backend_pb2.Result(success=False, message=f"Unexpected {err=}, {type(err)=}")
# Implement your logic here for the LoadModel service
# Replace this with your desired response
return backend_pb2.Result(message="Model loaded successfully", success=True)
def TTS(self, request, context):
model = request.model
print(request, file=sys.stderr)
try:
audio_array = None
if model != "":
audio_array = generate_audio(request.text, history_prompt=model)
else:
audio_array = generate_audio(request.text)
print("saving to", request.dst, file=sys.stderr)
# save audio to disk
write_wav(request.dst, SAMPLE_RATE, audio_array)
print("saved to", request.dst, file=sys.stderr)
print("tts for", file=sys.stderr)
print(request, file=sys.stderr)
except Exception as err:
return backend_pb2.Result(success=False, message=f"Unexpected {err=}, {type(err)=}")
return backend_pb2.Result(success=True)
def serve(address):
server = grpc.server(futures.ThreadPoolExecutor(max_workers=10))
backend_pb2_grpc.add_BackendServicer_to_server(BackendServicer(), server)
server.add_insecure_port(address)
server.start()
print("Server started. Listening on: " + address, file=sys.stderr)
# Define the signal handler function
def signal_handler(sig, frame):
print("Received termination signal. Shutting down...")
server.stop(0)
sys.exit(0)
# Set the signal handlers for SIGINT and SIGTERM
signal.signal(signal.SIGINT, signal_handler)
signal.signal(signal.SIGTERM, signal_handler)
try:
while True:
time.sleep(_ONE_DAY_IN_SECONDS)
except KeyboardInterrupt:
server.stop(0)
if __name__ == "__main__":
parser = argparse.ArgumentParser(description="Run the gRPC server.")
parser.add_argument(
"--addr", default="localhost:50051", help="The address to bind the server to."
)
args = parser.parse_args()
serve(args.addr)

View File

@ -13,7 +13,7 @@ _sym_db = _symbol_database.Default()
DESCRIPTOR = _descriptor_pool.Default().AddSerializedFile(b'\n\rbackend.proto\x12\x07\x62\x61\x63kend\"\x0f\n\rHealthMessage\"\x86\x06\n\x0ePredictOptions\x12\x0e\n\x06Prompt\x18\x01 \x01(\t\x12\x0c\n\x04Seed\x18\x02 \x01(\x05\x12\x0f\n\x07Threads\x18\x03 \x01(\x05\x12\x0e\n\x06Tokens\x18\x04 \x01(\x05\x12\x0c\n\x04TopK\x18\x05 \x01(\x05\x12\x0e\n\x06Repeat\x18\x06 \x01(\x05\x12\r\n\x05\x42\x61tch\x18\x07 \x01(\x05\x12\r\n\x05NKeep\x18\x08 \x01(\x05\x12\x13\n\x0bTemperature\x18\t \x01(\x02\x12\x0f\n\x07Penalty\x18\n \x01(\x02\x12\r\n\x05\x46\x31\x36KV\x18\x0b \x01(\x08\x12\x11\n\tDebugMode\x18\x0c \x01(\x08\x12\x13\n\x0bStopPrompts\x18\r \x03(\t\x12\x11\n\tIgnoreEOS\x18\x0e \x01(\x08\x12\x19\n\x11TailFreeSamplingZ\x18\x0f \x01(\x02\x12\x10\n\x08TypicalP\x18\x10 \x01(\x02\x12\x18\n\x10\x46requencyPenalty\x18\x11 \x01(\x02\x12\x17\n\x0fPresencePenalty\x18\x12 \x01(\x02\x12\x10\n\x08Mirostat\x18\x13 \x01(\x05\x12\x13\n\x0bMirostatETA\x18\x14 \x01(\x02\x12\x13\n\x0bMirostatTAU\x18\x15 \x01(\x02\x12\x12\n\nPenalizeNL\x18\x16 \x01(\x08\x12\x11\n\tLogitBias\x18\x17 \x01(\t\x12\r\n\x05MLock\x18\x19 \x01(\x08\x12\x0c\n\x04MMap\x18\x1a \x01(\x08\x12\x16\n\x0ePromptCacheAll\x18\x1b \x01(\x08\x12\x15\n\rPromptCacheRO\x18\x1c \x01(\x08\x12\x0f\n\x07Grammar\x18\x1d \x01(\t\x12\x0f\n\x07MainGPU\x18\x1e \x01(\t\x12\x13\n\x0bTensorSplit\x18\x1f \x01(\t\x12\x0c\n\x04TopP\x18 \x01(\x02\x12\x17\n\x0fPromptCachePath\x18! \x01(\t\x12\r\n\x05\x44\x65\x62ug\x18\" \x01(\x08\x12\x17\n\x0f\x45mbeddingTokens\x18# \x03(\x05\x12\x12\n\nEmbeddings\x18$ \x01(\t\x12\x14\n\x0cRopeFreqBase\x18% \x01(\x02\x12\x15\n\rRopeFreqScale\x18& \x01(\x02\x12\x1b\n\x13NegativePromptScale\x18\' \x01(\x02\x12\x16\n\x0eNegativePrompt\x18( \x01(\t\"\x18\n\x05Reply\x12\x0f\n\x07message\x18\x01 \x01(\x0c\"\xfb\x02\n\x0cModelOptions\x12\r\n\x05Model\x18\x01 \x01(\t\x12\x13\n\x0b\x43ontextSize\x18\x02 \x01(\x05\x12\x0c\n\x04Seed\x18\x03 \x01(\x05\x12\x0e\n\x06NBatch\x18\x04 \x01(\x05\x12\x11\n\tF16Memory\x18\x05 \x01(\x08\x12\r\n\x05MLock\x18\x06 \x01(\x08\x12\x0c\n\x04MMap\x18\x07 \x01(\x08\x12\x11\n\tVocabOnly\x18\x08 \x01(\x08\x12\x0f\n\x07LowVRAM\x18\t \x01(\x08\x12\x12\n\nEmbeddings\x18\n \x01(\x08\x12\x0c\n\x04NUMA\x18\x0b \x01(\x08\x12\x12\n\nNGPULayers\x18\x0c \x01(\x05\x12\x0f\n\x07MainGPU\x18\r \x01(\t\x12\x13\n\x0bTensorSplit\x18\x0e \x01(\t\x12\x0f\n\x07Threads\x18\x0f \x01(\x05\x12\x19\n\x11LibrarySearchPath\x18\x10 \x01(\t\x12\x14\n\x0cRopeFreqBase\x18\x11 \x01(\x02\x12\x15\n\rRopeFreqScale\x18\x12 \x01(\x02\x12\x12\n\nRMSNormEps\x18\x13 \x01(\x02\x12\x0c\n\x04NGQA\x18\x14 \x01(\x05\"*\n\x06Result\x12\x0f\n\x07message\x18\x01 \x01(\t\x12\x0f\n\x07success\x18\x02 \x01(\x08\"%\n\x0f\x45mbeddingResult\x12\x12\n\nembeddings\x18\x01 \x03(\x02\"C\n\x11TranscriptRequest\x12\x0b\n\x03\x64st\x18\x02 \x01(\t\x12\x10\n\x08language\x18\x03 \x01(\t\x12\x0f\n\x07threads\x18\x04 \x01(\r\"N\n\x10TranscriptResult\x12,\n\x08segments\x18\x01 \x03(\x0b\x32\x1a.backend.TranscriptSegment\x12\x0c\n\x04text\x18\x02 \x01(\t\"Y\n\x11TranscriptSegment\x12\n\n\x02id\x18\x01 \x01(\x05\x12\r\n\x05start\x18\x02 \x01(\x03\x12\x0b\n\x03\x65nd\x18\x03 \x01(\x03\x12\x0c\n\x04text\x18\x04 \x01(\t\x12\x0e\n\x06tokens\x18\x05 \x03(\x05\"\x9e\x01\n\x14GenerateImageRequest\x12\x0e\n\x06height\x18\x01 \x01(\x05\x12\r\n\x05width\x18\x02 \x01(\x05\x12\x0c\n\x04mode\x18\x03 \x01(\x05\x12\x0c\n\x04step\x18\x04 \x01(\x05\x12\x0c\n\x04seed\x18\x05 \x01(\x05\x12\x17\n\x0fpositive_prompt\x18\x06 \x01(\t\x12\x17\n\x0fnegative_prompt\x18\x07 \x01(\t\x12\x0b\n\x03\x64st\x18\x08 \x01(\t\"6\n\nTTSRequest\x12\x0c\n\x04text\x18\x01 \x01(\t\x12\r\n\x05model\x18\x02 \x01(\t\x12\x0b\n\x03\x64st\x18\x03 \x01(\t2\xeb\x03\n\x07\x42\x61\x63kend\x12\x32\n\x06Health\x12\x16.backend.HealthMessage\x1a\x0e.backend.Reply\"\x00\x12\x34\n\x07Predict\x12\x17.backend.PredictOptions\x1a\x0e.backend.Reply\"\x00\x12\x35\n\tLoadModel\x12\x15.backend.ModelOptions\x1a\x0f.backend.Result\"\x00\x12<\n\rPredictStream\x12\x17.backend.PredictOptions\x1a\x0e.backend.Reply\"\x00\x30\x01\x12@\n\tEmbedding\x12\x17.backend.PredictOptions\x1a\x18.backend.EmbeddingResult\"\x00\x12\x41\n\rGenerateImage\x12\x1d.backend.GenerateImageRequest\x1a\x0f.backend.Result\"\x00\x12M\n\x12\x41udioTranscription\x12\x1a.backend.TranscriptRequest\x1a\x19.backend.TranscriptResult\"\x00\x12-\n\x03TTS\x12\x13.backend.TTSRequest\x1a\x0f.backend.Result\"\x00\x42Z\n\x19io.skynet.localai.backendB\x0eLocalAIBackendP\x01Z+github.com/go-skynet/LocalAI/pkg/grpc/protob\x06proto3')
DESCRIPTOR = _descriptor_pool.Default().AddSerializedFile(b'\n\rbackend.proto\x12\x07\x62\x61\x63kend\"\x0f\n\rHealthMessage\"\x86\x06\n\x0ePredictOptions\x12\x0e\n\x06Prompt\x18\x01 \x01(\t\x12\x0c\n\x04Seed\x18\x02 \x01(\x05\x12\x0f\n\x07Threads\x18\x03 \x01(\x05\x12\x0e\n\x06Tokens\x18\x04 \x01(\x05\x12\x0c\n\x04TopK\x18\x05 \x01(\x05\x12\x0e\n\x06Repeat\x18\x06 \x01(\x05\x12\r\n\x05\x42\x61tch\x18\x07 \x01(\x05\x12\r\n\x05NKeep\x18\x08 \x01(\x05\x12\x13\n\x0bTemperature\x18\t \x01(\x02\x12\x0f\n\x07Penalty\x18\n \x01(\x02\x12\r\n\x05\x46\x31\x36KV\x18\x0b \x01(\x08\x12\x11\n\tDebugMode\x18\x0c \x01(\x08\x12\x13\n\x0bStopPrompts\x18\r \x03(\t\x12\x11\n\tIgnoreEOS\x18\x0e \x01(\x08\x12\x19\n\x11TailFreeSamplingZ\x18\x0f \x01(\x02\x12\x10\n\x08TypicalP\x18\x10 \x01(\x02\x12\x18\n\x10\x46requencyPenalty\x18\x11 \x01(\x02\x12\x17\n\x0fPresencePenalty\x18\x12 \x01(\x02\x12\x10\n\x08Mirostat\x18\x13 \x01(\x05\x12\x13\n\x0bMirostatETA\x18\x14 \x01(\x02\x12\x13\n\x0bMirostatTAU\x18\x15 \x01(\x02\x12\x12\n\nPenalizeNL\x18\x16 \x01(\x08\x12\x11\n\tLogitBias\x18\x17 \x01(\t\x12\r\n\x05MLock\x18\x19 \x01(\x08\x12\x0c\n\x04MMap\x18\x1a \x01(\x08\x12\x16\n\x0ePromptCacheAll\x18\x1b \x01(\x08\x12\x15\n\rPromptCacheRO\x18\x1c \x01(\x08\x12\x0f\n\x07Grammar\x18\x1d \x01(\t\x12\x0f\n\x07MainGPU\x18\x1e \x01(\t\x12\x13\n\x0bTensorSplit\x18\x1f \x01(\t\x12\x0c\n\x04TopP\x18 \x01(\x02\x12\x17\n\x0fPromptCachePath\x18! \x01(\t\x12\r\n\x05\x44\x65\x62ug\x18\" \x01(\x08\x12\x17\n\x0f\x45mbeddingTokens\x18# \x03(\x05\x12\x12\n\nEmbeddings\x18$ \x01(\t\x12\x14\n\x0cRopeFreqBase\x18% \x01(\x02\x12\x15\n\rRopeFreqScale\x18& \x01(\x02\x12\x1b\n\x13NegativePromptScale\x18\' \x01(\x02\x12\x16\n\x0eNegativePrompt\x18( \x01(\t\"\x18\n\x05Reply\x12\x0f\n\x07message\x18\x01 \x01(\x0c\"\xe2\x03\n\x0cModelOptions\x12\r\n\x05Model\x18\x01 \x01(\t\x12\x13\n\x0b\x43ontextSize\x18\x02 \x01(\x05\x12\x0c\n\x04Seed\x18\x03 \x01(\x05\x12\x0e\n\x06NBatch\x18\x04 \x01(\x05\x12\x11\n\tF16Memory\x18\x05 \x01(\x08\x12\r\n\x05MLock\x18\x06 \x01(\x08\x12\x0c\n\x04MMap\x18\x07 \x01(\x08\x12\x11\n\tVocabOnly\x18\x08 \x01(\x08\x12\x0f\n\x07LowVRAM\x18\t \x01(\x08\x12\x12\n\nEmbeddings\x18\n \x01(\x08\x12\x0c\n\x04NUMA\x18\x0b \x01(\x08\x12\x12\n\nNGPULayers\x18\x0c \x01(\x05\x12\x0f\n\x07MainGPU\x18\r \x01(\t\x12\x13\n\x0bTensorSplit\x18\x0e \x01(\t\x12\x0f\n\x07Threads\x18\x0f \x01(\x05\x12\x19\n\x11LibrarySearchPath\x18\x10 \x01(\t\x12\x14\n\x0cRopeFreqBase\x18\x11 \x01(\x02\x12\x15\n\rRopeFreqScale\x18\x12 \x01(\x02\x12\x12\n\nRMSNormEps\x18\x13 \x01(\x02\x12\x0c\n\x04NGQA\x18\x14 \x01(\x05\x12\x11\n\tModelFile\x18\x15 \x01(\t\x12\x0e\n\x06\x44\x65vice\x18\x16 \x01(\t\x12\x11\n\tUseTriton\x18\x17 \x01(\x08\x12\x15\n\rModelBaseName\x18\x18 \x01(\t\x12\x18\n\x10UseFastTokenizer\x18\x19 \x01(\x08\"*\n\x06Result\x12\x0f\n\x07message\x18\x01 \x01(\t\x12\x0f\n\x07success\x18\x02 \x01(\x08\"%\n\x0f\x45mbeddingResult\x12\x12\n\nembeddings\x18\x01 \x03(\x02\"C\n\x11TranscriptRequest\x12\x0b\n\x03\x64st\x18\x02 \x01(\t\x12\x10\n\x08language\x18\x03 \x01(\t\x12\x0f\n\x07threads\x18\x04 \x01(\r\"N\n\x10TranscriptResult\x12,\n\x08segments\x18\x01 \x03(\x0b\x32\x1a.backend.TranscriptSegment\x12\x0c\n\x04text\x18\x02 \x01(\t\"Y\n\x11TranscriptSegment\x12\n\n\x02id\x18\x01 \x01(\x05\x12\r\n\x05start\x18\x02 \x01(\x03\x12\x0b\n\x03\x65nd\x18\x03 \x01(\x03\x12\x0c\n\x04text\x18\x04 \x01(\t\x12\x0e\n\x06tokens\x18\x05 \x03(\x05\"\x9e\x01\n\x14GenerateImageRequest\x12\x0e\n\x06height\x18\x01 \x01(\x05\x12\r\n\x05width\x18\x02 \x01(\x05\x12\x0c\n\x04mode\x18\x03 \x01(\x05\x12\x0c\n\x04step\x18\x04 \x01(\x05\x12\x0c\n\x04seed\x18\x05 \x01(\x05\x12\x17\n\x0fpositive_prompt\x18\x06 \x01(\t\x12\x17\n\x0fnegative_prompt\x18\x07 \x01(\t\x12\x0b\n\x03\x64st\x18\x08 \x01(\t\"6\n\nTTSRequest\x12\x0c\n\x04text\x18\x01 \x01(\t\x12\r\n\x05model\x18\x02 \x01(\t\x12\x0b\n\x03\x64st\x18\x03 \x01(\t2\xeb\x03\n\x07\x42\x61\x63kend\x12\x32\n\x06Health\x12\x16.backend.HealthMessage\x1a\x0e.backend.Reply\"\x00\x12\x34\n\x07Predict\x12\x17.backend.PredictOptions\x1a\x0e.backend.Reply\"\x00\x12\x35\n\tLoadModel\x12\x15.backend.ModelOptions\x1a\x0f.backend.Result\"\x00\x12<\n\rPredictStream\x12\x17.backend.PredictOptions\x1a\x0e.backend.Reply\"\x00\x30\x01\x12@\n\tEmbedding\x12\x17.backend.PredictOptions\x1a\x18.backend.EmbeddingResult\"\x00\x12\x41\n\rGenerateImage\x12\x1d.backend.GenerateImageRequest\x1a\x0f.backend.Result\"\x00\x12M\n\x12\x41udioTranscription\x12\x1a.backend.TranscriptRequest\x1a\x19.backend.TranscriptResult\"\x00\x12-\n\x03TTS\x12\x13.backend.TTSRequest\x1a\x0f.backend.Result\"\x00\x42Z\n\x19io.skynet.localai.backendB\x0eLocalAIBackendP\x01Z+github.com/go-skynet/LocalAI/pkg/grpc/protob\x06proto3')
_globals = globals()
_builder.BuildMessageAndEnumDescriptors(DESCRIPTOR, _globals)
@ -29,21 +29,21 @@ if _descriptor._USE_C_DESCRIPTORS == False:
_globals['_REPLY']._serialized_start=820
_globals['_REPLY']._serialized_end=844
_globals['_MODELOPTIONS']._serialized_start=847
_globals['_MODELOPTIONS']._serialized_end=1226
_globals['_RESULT']._serialized_start=1228
_globals['_RESULT']._serialized_end=1270
_globals['_EMBEDDINGRESULT']._serialized_start=1272
_globals['_EMBEDDINGRESULT']._serialized_end=1309
_globals['_TRANSCRIPTREQUEST']._serialized_start=1311
_globals['_TRANSCRIPTREQUEST']._serialized_end=1378
_globals['_TRANSCRIPTRESULT']._serialized_start=1380
_globals['_TRANSCRIPTRESULT']._serialized_end=1458
_globals['_TRANSCRIPTSEGMENT']._serialized_start=1460
_globals['_TRANSCRIPTSEGMENT']._serialized_end=1549
_globals['_GENERATEIMAGEREQUEST']._serialized_start=1552
_globals['_GENERATEIMAGEREQUEST']._serialized_end=1710
_globals['_TTSREQUEST']._serialized_start=1712
_globals['_TTSREQUEST']._serialized_end=1766
_globals['_BACKEND']._serialized_start=1769
_globals['_BACKEND']._serialized_end=2260
_globals['_MODELOPTIONS']._serialized_end=1329
_globals['_RESULT']._serialized_start=1331
_globals['_RESULT']._serialized_end=1373
_globals['_EMBEDDINGRESULT']._serialized_start=1375
_globals['_EMBEDDINGRESULT']._serialized_end=1412
_globals['_TRANSCRIPTREQUEST']._serialized_start=1414
_globals['_TRANSCRIPTREQUEST']._serialized_end=1481
_globals['_TRANSCRIPTRESULT']._serialized_start=1483
_globals['_TRANSCRIPTRESULT']._serialized_end=1561
_globals['_TRANSCRIPTSEGMENT']._serialized_start=1563
_globals['_TRANSCRIPTSEGMENT']._serialized_end=1652
_globals['_GENERATEIMAGEREQUEST']._serialized_start=1655
_globals['_GENERATEIMAGEREQUEST']._serialized_end=1813
_globals['_TTSREQUEST']._serialized_start=1815
_globals['_TTSREQUEST']._serialized_end=1869
_globals['_BACKEND']._serialized_start=1872
_globals['_BACKEND']._serialized_end=2363
# @@protoc_insertion_point(module_scope)

View File

@ -18,7 +18,6 @@ class BackendServicer(backend_pb2_grpc.BackendServicer):
return backend_pb2.Reply(message=bytes("OK", 'utf-8'))
def LoadModel(self, request, context):
model_name = request.Model
model_name = os.path.basename(model_name)
try:
self.model = SentenceTransformer(model_name)
except Exception as err:

View File

@ -1,4 +1,6 @@
sentence_transformers
grpcio
google
protobuf
protobuf
torch
git+https://github.com/suno-ai/bark.git

View File

@ -85,7 +85,7 @@ func InstallModelFromGalleryByName(galleries []Gallery, name string, basePath st
name = strings.ReplaceAll(name, string(os.PathSeparator), "__")
var model *GalleryModel
for _, m := range models {
if name == m.Name || name == strings.ToLower(m.Name) {
if name == m.Name || m.Name == strings.ToLower(name) {
model = m
}
}

View File

@ -16,7 +16,7 @@ type StableDiffusion struct {
func (sd *StableDiffusion) Load(opts *pb.ModelOptions) error {
var err error
// Note: the Model here is a path to a directory containing the model files
sd.stablediffusion, err = stablediffusion.New(opts.Model)
sd.stablediffusion, err = stablediffusion.New(opts.ModelFile)
return err
}

View File

@ -15,7 +15,7 @@ type Embeddings struct {
}
func (llm *Embeddings) Load(opts *pb.ModelOptions) error {
model, err := bert.New(opts.Model)
model, err := bert.New(opts.ModelFile)
llm.bert = model
return err
}

View File

@ -18,7 +18,7 @@ type LLM struct {
}
func (llm *LLM) Load(opts *pb.ModelOptions) error {
model, err := bloomz.New(opts.Model)
model, err := bloomz.New(opts.ModelFile)
llm.bloomz = model
return err
}

View File

@ -40,7 +40,7 @@ func (llm *LLM) Load(opts *pb.ModelOptions) error {
ggllmOpts = append(ggllmOpts, ggllm.SetNBatch(512))
}
model, err := ggllm.New(opts.Model, ggllmOpts...)
model, err := ggllm.New(opts.ModelFile, ggllmOpts...)
llm.falcon = model
return err
}

View File

@ -17,7 +17,7 @@ type LLM struct {
}
func (llm *LLM) Load(opts *pb.ModelOptions) error {
model, err := gpt4all.New(opts.Model,
model, err := gpt4all.New(opts.ModelFile,
gpt4all.SetThreads(int(opts.Threads)),
gpt4all.SetLibrarySearchPath(opts.LibrarySearchPath))
llm.gpt4all = model

View File

@ -71,7 +71,7 @@ func (llm *LLM) Load(opts *pb.ModelOptions) error {
llamaOpts = append(llamaOpts, llama.EnabelLowVRAM)
}
model, err := llama.New(opts.Model, llamaOpts...)
model, err := llama.New(opts.ModelFile, llamaOpts...)
llm.llama = model
return err
}

View File

@ -20,9 +20,9 @@ type LLM struct {
}
func (llm *LLM) Load(opts *pb.ModelOptions) error {
modelPath := filepath.Dir(opts.Model)
modelFile := filepath.Base(opts.Model)
model := rwkv.LoadFiles(opts.Model, filepath.Join(modelPath, modelFile+tokenizerSuffix), uint32(opts.GetThreads()))
modelPath := filepath.Dir(opts.ModelFile)
modelFile := filepath.Base(opts.ModelFile)
model := rwkv.LoadFiles(opts.ModelFile, filepath.Join(modelPath, modelFile+tokenizerSuffix), uint32(opts.GetThreads()))
if model == nil {
return fmt.Errorf("could not load model")

View File

@ -18,7 +18,7 @@ type Dolly struct {
}
func (llm *Dolly) Load(opts *pb.ModelOptions) error {
model, err := transformers.NewDolly(opts.Model)
model, err := transformers.NewDolly(opts.ModelFile)
llm.dolly = model
return err
}

View File

@ -18,7 +18,7 @@ type Falcon struct {
}
func (llm *Falcon) Load(opts *pb.ModelOptions) error {
model, err := transformers.NewFalcon(opts.Model)
model, err := transformers.NewFalcon(opts.ModelFile)
llm.falcon = model
return err
}

View File

@ -18,7 +18,7 @@ type GPT2 struct {
}
func (llm *GPT2) Load(opts *pb.ModelOptions) error {
model, err := transformers.New(opts.Model)
model, err := transformers.New(opts.ModelFile)
llm.gpt2 = model
return err
}

View File

@ -18,7 +18,7 @@ type GPTJ struct {
}
func (llm *GPTJ) Load(opts *pb.ModelOptions) error {
model, err := transformers.NewGPTJ(opts.Model)
model, err := transformers.NewGPTJ(opts.ModelFile)
llm.gptj = model
return err
}

View File

@ -18,7 +18,7 @@ type GPTNeoX struct {
}
func (llm *GPTNeoX) Load(opts *pb.ModelOptions) error {
model, err := transformers.NewGPTNeoX(opts.Model)
model, err := transformers.NewGPTNeoX(opts.ModelFile)
llm.gptneox = model
return err
}

View File

@ -18,7 +18,7 @@ type MPT struct {
}
func (llm *MPT) Load(opts *pb.ModelOptions) error {
model, err := transformers.NewMPT(opts.Model)
model, err := transformers.NewMPT(opts.ModelFile)
llm.mpt = model
return err
}

View File

@ -18,7 +18,7 @@ type Replit struct {
}
func (llm *Replit) Load(opts *pb.ModelOptions) error {
model, err := transformers.NewReplit(opts.Model)
model, err := transformers.NewReplit(opts.ModelFile)
llm.replit = model
return err
}

View File

@ -18,7 +18,7 @@ type Starcoder struct {
}
func (llm *Starcoder) Load(opts *pb.ModelOptions) error {
model, err := transformers.NewStarcoder(opts.Model)
model, err := transformers.NewStarcoder(opts.ModelFile)
llm.starcoder = model
return err
}

View File

@ -483,6 +483,12 @@ type ModelOptions struct {
RopeFreqScale float32 `protobuf:"fixed32,18,opt,name=RopeFreqScale,proto3" json:"RopeFreqScale,omitempty"`
RMSNormEps float32 `protobuf:"fixed32,19,opt,name=RMSNormEps,proto3" json:"RMSNormEps,omitempty"`
NGQA int32 `protobuf:"varint,20,opt,name=NGQA,proto3" json:"NGQA,omitempty"`
ModelFile string `protobuf:"bytes,21,opt,name=ModelFile,proto3" json:"ModelFile,omitempty"`
// AutoGPTQ
Device string `protobuf:"bytes,22,opt,name=Device,proto3" json:"Device,omitempty"`
UseTriton bool `protobuf:"varint,23,opt,name=UseTriton,proto3" json:"UseTriton,omitempty"`
ModelBaseName string `protobuf:"bytes,24,opt,name=ModelBaseName,proto3" json:"ModelBaseName,omitempty"`
UseFastTokenizer bool `protobuf:"varint,25,opt,name=UseFastTokenizer,proto3" json:"UseFastTokenizer,omitempty"`
}
func (x *ModelOptions) Reset() {
@ -657,6 +663,41 @@ func (x *ModelOptions) GetNGQA() int32 {
return 0
}
func (x *ModelOptions) GetModelFile() string {
if x != nil {
return x.ModelFile
}
return ""
}
func (x *ModelOptions) GetDevice() string {
if x != nil {
return x.Device
}
return ""
}
func (x *ModelOptions) GetUseTriton() bool {
if x != nil {
return x.UseTriton
}
return false
}
func (x *ModelOptions) GetModelBaseName() string {
if x != nil {
return x.ModelBaseName
}
return ""
}
func (x *ModelOptions) GetUseFastTokenizer() bool {
if x != nil {
return x.UseFastTokenizer
}
return false
}
type Result struct {
state protoimpl.MessageState
sizeCache protoimpl.SizeCache
@ -1207,7 +1248,7 @@ var file_pkg_grpc_proto_backend_proto_rawDesc = []byte{
0x0e, 0x4e, 0x65, 0x67, 0x61, 0x74, 0x69, 0x76, 0x65, 0x50, 0x72, 0x6f, 0x6d, 0x70, 0x74, 0x22,
0x21, 0x0a, 0x05, 0x52, 0x65, 0x70, 0x6c, 0x79, 0x12, 0x18, 0x0a, 0x07, 0x6d, 0x65, 0x73, 0x73,
0x61, 0x67, 0x65, 0x18, 0x01, 0x20, 0x01, 0x28, 0x0c, 0x52, 0x07, 0x6d, 0x65, 0x73, 0x73, 0x61,
0x67, 0x65, 0x22, 0xc8, 0x04, 0x0a, 0x0c, 0x4d, 0x6f, 0x64, 0x65, 0x6c, 0x4f, 0x70, 0x74, 0x69,
0x67, 0x65, 0x22, 0xee, 0x05, 0x0a, 0x0c, 0x4d, 0x6f, 0x64, 0x65, 0x6c, 0x4f, 0x70, 0x74, 0x69,
0x6f, 0x6e, 0x73, 0x12, 0x14, 0x0a, 0x05, 0x4d, 0x6f, 0x64, 0x65, 0x6c, 0x18, 0x01, 0x20, 0x01,
0x28, 0x09, 0x52, 0x05, 0x4d, 0x6f, 0x64, 0x65, 0x6c, 0x12, 0x20, 0x0a, 0x0b, 0x43, 0x6f, 0x6e,
0x74, 0x65, 0x78, 0x74, 0x53, 0x69, 0x7a, 0x65, 0x18, 0x02, 0x20, 0x01, 0x28, 0x05, 0x52, 0x0b,
@ -1243,90 +1284,100 @@ var file_pkg_grpc_proto_backend_proto_rawDesc = []byte{
0x46, 0x72, 0x65, 0x71, 0x53, 0x63, 0x61, 0x6c, 0x65, 0x12, 0x1e, 0x0a, 0x0a, 0x52, 0x4d, 0x53,
0x4e, 0x6f, 0x72, 0x6d, 0x45, 0x70, 0x73, 0x18, 0x13, 0x20, 0x01, 0x28, 0x02, 0x52, 0x0a, 0x52,
0x4d, 0x53, 0x4e, 0x6f, 0x72, 0x6d, 0x45, 0x70, 0x73, 0x12, 0x12, 0x0a, 0x04, 0x4e, 0x47, 0x51,
0x41, 0x18, 0x14, 0x20, 0x01, 0x28, 0x05, 0x52, 0x04, 0x4e, 0x47, 0x51, 0x41, 0x22, 0x3c, 0x0a,
0x06, 0x52, 0x65, 0x73, 0x75, 0x6c, 0x74, 0x12, 0x18, 0x0a, 0x07, 0x6d, 0x65, 0x73, 0x73, 0x61,
0x67, 0x65, 0x18, 0x01, 0x20, 0x01, 0x28, 0x09, 0x52, 0x07, 0x6d, 0x65, 0x73, 0x73, 0x61, 0x67,
0x65, 0x12, 0x18, 0x0a, 0x07, 0x73, 0x75, 0x63, 0x63, 0x65, 0x73, 0x73, 0x18, 0x02, 0x20, 0x01,
0x28, 0x08, 0x52, 0x07, 0x73, 0x75, 0x63, 0x63, 0x65, 0x73, 0x73, 0x22, 0x31, 0x0a, 0x0f, 0x45,
0x6d, 0x62, 0x65, 0x64, 0x64, 0x69, 0x6e, 0x67, 0x52, 0x65, 0x73, 0x75, 0x6c, 0x74, 0x12, 0x1e,
0x0a, 0x0a, 0x65, 0x6d, 0x62, 0x65, 0x64, 0x64, 0x69, 0x6e, 0x67, 0x73, 0x18, 0x01, 0x20, 0x03,
0x28, 0x02, 0x52, 0x0a, 0x65, 0x6d, 0x62, 0x65, 0x64, 0x64, 0x69, 0x6e, 0x67, 0x73, 0x22, 0x5b,
0x0a, 0x11, 0x54, 0x72, 0x61, 0x6e, 0x73, 0x63, 0x72, 0x69, 0x70, 0x74, 0x52, 0x65, 0x71, 0x75,
0x65, 0x73, 0x74, 0x12, 0x10, 0x0a, 0x03, 0x64, 0x73, 0x74, 0x18, 0x02, 0x20, 0x01, 0x28, 0x09,
0x52, 0x03, 0x64, 0x73, 0x74, 0x12, 0x1a, 0x0a, 0x08, 0x6c, 0x61, 0x6e, 0x67, 0x75, 0x61, 0x67,
0x65, 0x18, 0x03, 0x20, 0x01, 0x28, 0x09, 0x52, 0x08, 0x6c, 0x61, 0x6e, 0x67, 0x75, 0x61, 0x67,
0x65, 0x12, 0x18, 0x0a, 0x07, 0x74, 0x68, 0x72, 0x65, 0x61, 0x64, 0x73, 0x18, 0x04, 0x20, 0x01,
0x28, 0x0d, 0x52, 0x07, 0x74, 0x68, 0x72, 0x65, 0x61, 0x64, 0x73, 0x22, 0x5e, 0x0a, 0x10, 0x54,
0x72, 0x61, 0x6e, 0x73, 0x63, 0x72, 0x69, 0x70, 0x74, 0x52, 0x65, 0x73, 0x75, 0x6c, 0x74, 0x12,
0x36, 0x0a, 0x08, 0x73, 0x65, 0x67, 0x6d, 0x65, 0x6e, 0x74, 0x73, 0x18, 0x01, 0x20, 0x03, 0x28,
0x0b, 0x32, 0x1a, 0x2e, 0x62, 0x61, 0x63, 0x6b, 0x65, 0x6e, 0x64, 0x2e, 0x54, 0x72, 0x61, 0x6e,
0x73, 0x63, 0x72, 0x69, 0x70, 0x74, 0x53, 0x65, 0x67, 0x6d, 0x65, 0x6e, 0x74, 0x52, 0x08, 0x73,
0x65, 0x67, 0x6d, 0x65, 0x6e, 0x74, 0x73, 0x12, 0x12, 0x0a, 0x04, 0x74, 0x65, 0x78, 0x74, 0x18,
0x02, 0x20, 0x01, 0x28, 0x09, 0x52, 0x04, 0x74, 0x65, 0x78, 0x74, 0x22, 0x77, 0x0a, 0x11, 0x54,
0x72, 0x61, 0x6e, 0x73, 0x63, 0x72, 0x69, 0x70, 0x74, 0x53, 0x65, 0x67, 0x6d, 0x65, 0x6e, 0x74,
0x12, 0x0e, 0x0a, 0x02, 0x69, 0x64, 0x18, 0x01, 0x20, 0x01, 0x28, 0x05, 0x52, 0x02, 0x69, 0x64,
0x12, 0x14, 0x0a, 0x05, 0x73, 0x74, 0x61, 0x72, 0x74, 0x18, 0x02, 0x20, 0x01, 0x28, 0x03, 0x52,
0x05, 0x73, 0x74, 0x61, 0x72, 0x74, 0x12, 0x10, 0x0a, 0x03, 0x65, 0x6e, 0x64, 0x18, 0x03, 0x20,
0x01, 0x28, 0x03, 0x52, 0x03, 0x65, 0x6e, 0x64, 0x12, 0x12, 0x0a, 0x04, 0x74, 0x65, 0x78, 0x74,
0x18, 0x04, 0x20, 0x01, 0x28, 0x09, 0x52, 0x04, 0x74, 0x65, 0x78, 0x74, 0x12, 0x16, 0x0a, 0x06,
0x74, 0x6f, 0x6b, 0x65, 0x6e, 0x73, 0x18, 0x05, 0x20, 0x03, 0x28, 0x05, 0x52, 0x06, 0x74, 0x6f,
0x6b, 0x65, 0x6e, 0x73, 0x22, 0xe4, 0x01, 0x0a, 0x14, 0x47, 0x65, 0x6e, 0x65, 0x72, 0x61, 0x74,
0x65, 0x49, 0x6d, 0x61, 0x67, 0x65, 0x52, 0x65, 0x71, 0x75, 0x65, 0x73, 0x74, 0x12, 0x16, 0x0a,
0x06, 0x68, 0x65, 0x69, 0x67, 0x68, 0x74, 0x18, 0x01, 0x20, 0x01, 0x28, 0x05, 0x52, 0x06, 0x68,
0x65, 0x69, 0x67, 0x68, 0x74, 0x12, 0x14, 0x0a, 0x05, 0x77, 0x69, 0x64, 0x74, 0x68, 0x18, 0x02,
0x20, 0x01, 0x28, 0x05, 0x52, 0x05, 0x77, 0x69, 0x64, 0x74, 0x68, 0x12, 0x12, 0x0a, 0x04, 0x6d,
0x6f, 0x64, 0x65, 0x18, 0x03, 0x20, 0x01, 0x28, 0x05, 0x52, 0x04, 0x6d, 0x6f, 0x64, 0x65, 0x12,
0x12, 0x0a, 0x04, 0x73, 0x74, 0x65, 0x70, 0x18, 0x04, 0x20, 0x01, 0x28, 0x05, 0x52, 0x04, 0x73,
0x74, 0x65, 0x70, 0x12, 0x12, 0x0a, 0x04, 0x73, 0x65, 0x65, 0x64, 0x18, 0x05, 0x20, 0x01, 0x28,
0x05, 0x52, 0x04, 0x73, 0x65, 0x65, 0x64, 0x12, 0x27, 0x0a, 0x0f, 0x70, 0x6f, 0x73, 0x69, 0x74,
0x69, 0x76, 0x65, 0x5f, 0x70, 0x72, 0x6f, 0x6d, 0x70, 0x74, 0x18, 0x06, 0x20, 0x01, 0x28, 0x09,
0x52, 0x0e, 0x70, 0x6f, 0x73, 0x69, 0x74, 0x69, 0x76, 0x65, 0x50, 0x72, 0x6f, 0x6d, 0x70, 0x74,
0x12, 0x27, 0x0a, 0x0f, 0x6e, 0x65, 0x67, 0x61, 0x74, 0x69, 0x76, 0x65, 0x5f, 0x70, 0x72, 0x6f,
0x6d, 0x70, 0x74, 0x18, 0x07, 0x20, 0x01, 0x28, 0x09, 0x52, 0x0e, 0x6e, 0x65, 0x67, 0x61, 0x74,
0x69, 0x76, 0x65, 0x50, 0x72, 0x6f, 0x6d, 0x70, 0x74, 0x12, 0x10, 0x0a, 0x03, 0x64, 0x73, 0x74,
0x18, 0x08, 0x20, 0x01, 0x28, 0x09, 0x52, 0x03, 0x64, 0x73, 0x74, 0x22, 0x48, 0x0a, 0x0a, 0x54,
0x54, 0x53, 0x52, 0x65, 0x71, 0x75, 0x65, 0x73, 0x74, 0x12, 0x12, 0x0a, 0x04, 0x74, 0x65, 0x78,
0x74, 0x18, 0x01, 0x20, 0x01, 0x28, 0x09, 0x52, 0x04, 0x74, 0x65, 0x78, 0x74, 0x12, 0x14, 0x0a,
0x05, 0x6d, 0x6f, 0x64, 0x65, 0x6c, 0x18, 0x02, 0x20, 0x01, 0x28, 0x09, 0x52, 0x05, 0x6d, 0x6f,
0x64, 0x65, 0x6c, 0x12, 0x10, 0x0a, 0x03, 0x64, 0x73, 0x74, 0x18, 0x03, 0x20, 0x01, 0x28, 0x09,
0x52, 0x03, 0x64, 0x73, 0x74, 0x32, 0xeb, 0x03, 0x0a, 0x07, 0x42, 0x61, 0x63, 0x6b, 0x65, 0x6e,
0x64, 0x12, 0x32, 0x0a, 0x06, 0x48, 0x65, 0x61, 0x6c, 0x74, 0x68, 0x12, 0x16, 0x2e, 0x62, 0x61,
0x63, 0x6b, 0x65, 0x6e, 0x64, 0x2e, 0x48, 0x65, 0x61, 0x6c, 0x74, 0x68, 0x4d, 0x65, 0x73, 0x73,
0x61, 0x67, 0x65, 0x1a, 0x0e, 0x2e, 0x62, 0x61, 0x63, 0x6b, 0x65, 0x6e, 0x64, 0x2e, 0x52, 0x65,
0x70, 0x6c, 0x79, 0x22, 0x00, 0x12, 0x34, 0x0a, 0x07, 0x50, 0x72, 0x65, 0x64, 0x69, 0x63, 0x74,
0x12, 0x17, 0x2e, 0x62, 0x61, 0x63, 0x6b, 0x65, 0x6e, 0x64, 0x2e, 0x50, 0x72, 0x65, 0x64, 0x69,
0x63, 0x74, 0x4f, 0x70, 0x74, 0x69, 0x6f, 0x6e, 0x73, 0x1a, 0x0e, 0x2e, 0x62, 0x61, 0x63, 0x6b,
0x65, 0x6e, 0x64, 0x2e, 0x52, 0x65, 0x70, 0x6c, 0x79, 0x22, 0x00, 0x12, 0x35, 0x0a, 0x09, 0x4c,
0x6f, 0x61, 0x64, 0x4d, 0x6f, 0x64, 0x65, 0x6c, 0x12, 0x15, 0x2e, 0x62, 0x61, 0x63, 0x6b, 0x65,
0x6e, 0x64, 0x2e, 0x4d, 0x6f, 0x64, 0x65, 0x6c, 0x4f, 0x70, 0x74, 0x69, 0x6f, 0x6e, 0x73, 0x1a,
0x0f, 0x2e, 0x62, 0x61, 0x63, 0x6b, 0x65, 0x6e, 0x64, 0x2e, 0x52, 0x65, 0x73, 0x75, 0x6c, 0x74,
0x22, 0x00, 0x12, 0x3c, 0x0a, 0x0d, 0x50, 0x72, 0x65, 0x64, 0x69, 0x63, 0x74, 0x53, 0x74, 0x72,
0x65, 0x61, 0x6d, 0x12, 0x17, 0x2e, 0x62, 0x61, 0x63, 0x6b, 0x65, 0x6e, 0x64, 0x2e, 0x50, 0x72,
0x65, 0x64, 0x69, 0x63, 0x74, 0x4f, 0x70, 0x74, 0x69, 0x6f, 0x6e, 0x73, 0x1a, 0x0e, 0x2e, 0x62,
0x61, 0x63, 0x6b, 0x65, 0x6e, 0x64, 0x2e, 0x52, 0x65, 0x70, 0x6c, 0x79, 0x22, 0x00, 0x30, 0x01,
0x12, 0x40, 0x0a, 0x09, 0x45, 0x6d, 0x62, 0x65, 0x64, 0x64, 0x69, 0x6e, 0x67, 0x12, 0x17, 0x2e,
0x62, 0x61, 0x63, 0x6b, 0x65, 0x6e, 0x64, 0x2e, 0x50, 0x72, 0x65, 0x64, 0x69, 0x63, 0x74, 0x4f,
0x70, 0x74, 0x69, 0x6f, 0x6e, 0x73, 0x1a, 0x18, 0x2e, 0x62, 0x61, 0x63, 0x6b, 0x65, 0x6e, 0x64,
0x2e, 0x45, 0x6d, 0x62, 0x65, 0x64, 0x64, 0x69, 0x6e, 0x67, 0x52, 0x65, 0x73, 0x75, 0x6c, 0x74,
0x22, 0x00, 0x12, 0x41, 0x0a, 0x0d, 0x47, 0x65, 0x6e, 0x65, 0x72, 0x61, 0x74, 0x65, 0x49, 0x6d,
0x61, 0x67, 0x65, 0x12, 0x1d, 0x2e, 0x62, 0x61, 0x63, 0x6b, 0x65, 0x6e, 0x64, 0x2e, 0x47, 0x65,
0x6e, 0x65, 0x72, 0x61, 0x74, 0x65, 0x49, 0x6d, 0x61, 0x67, 0x65, 0x52, 0x65, 0x71, 0x75, 0x65,
0x73, 0x74, 0x1a, 0x0f, 0x2e, 0x62, 0x61, 0x63, 0x6b, 0x65, 0x6e, 0x64, 0x2e, 0x52, 0x65, 0x73,
0x75, 0x6c, 0x74, 0x22, 0x00, 0x12, 0x4d, 0x0a, 0x12, 0x41, 0x75, 0x64, 0x69, 0x6f, 0x54, 0x72,
0x61, 0x6e, 0x73, 0x63, 0x72, 0x69, 0x70, 0x74, 0x69, 0x6f, 0x6e, 0x12, 0x1a, 0x2e, 0x62, 0x61,
0x63, 0x6b, 0x65, 0x6e, 0x64, 0x2e, 0x54, 0x72, 0x61, 0x6e, 0x73, 0x63, 0x72, 0x69, 0x70, 0x74,
0x52, 0x65, 0x71, 0x75, 0x65, 0x73, 0x74, 0x1a, 0x19, 0x2e, 0x62, 0x61, 0x63, 0x6b, 0x65, 0x6e,
0x64, 0x2e, 0x54, 0x72, 0x61, 0x6e, 0x73, 0x63, 0x72, 0x69, 0x70, 0x74, 0x52, 0x65, 0x73, 0x75,
0x6c, 0x74, 0x22, 0x00, 0x12, 0x2d, 0x0a, 0x03, 0x54, 0x54, 0x53, 0x12, 0x13, 0x2e, 0x62, 0x61,
0x63, 0x6b, 0x65, 0x6e, 0x64, 0x2e, 0x54, 0x54, 0x53, 0x52, 0x65, 0x71, 0x75, 0x65, 0x73, 0x74,
0x1a, 0x0f, 0x2e, 0x62, 0x61, 0x63, 0x6b, 0x65, 0x6e, 0x64, 0x2e, 0x52, 0x65, 0x73, 0x75, 0x6c,
0x74, 0x22, 0x00, 0x42, 0x5a, 0x0a, 0x19, 0x69, 0x6f, 0x2e, 0x73, 0x6b, 0x79, 0x6e, 0x65, 0x74,
0x2e, 0x6c, 0x6f, 0x63, 0x61, 0x6c, 0x61, 0x69, 0x2e, 0x62, 0x61, 0x63, 0x6b, 0x65, 0x6e, 0x64,
0x42, 0x0e, 0x4c, 0x6f, 0x63, 0x61, 0x6c, 0x41, 0x49, 0x42, 0x61, 0x63, 0x6b, 0x65, 0x6e, 0x64,
0x50, 0x01, 0x5a, 0x2b, 0x67, 0x69, 0x74, 0x68, 0x75, 0x62, 0x2e, 0x63, 0x6f, 0x6d, 0x2f, 0x67,
0x6f, 0x2d, 0x73, 0x6b, 0x79, 0x6e, 0x65, 0x74, 0x2f, 0x4c, 0x6f, 0x63, 0x61, 0x6c, 0x41, 0x49,
0x2f, 0x70, 0x6b, 0x67, 0x2f, 0x67, 0x72, 0x70, 0x63, 0x2f, 0x70, 0x72, 0x6f, 0x74, 0x6f, 0x62,
0x06, 0x70, 0x72, 0x6f, 0x74, 0x6f, 0x33,
0x41, 0x18, 0x14, 0x20, 0x01, 0x28, 0x05, 0x52, 0x04, 0x4e, 0x47, 0x51, 0x41, 0x12, 0x1c, 0x0a,
0x09, 0x4d, 0x6f, 0x64, 0x65, 0x6c, 0x46, 0x69, 0x6c, 0x65, 0x18, 0x15, 0x20, 0x01, 0x28, 0x09,
0x52, 0x09, 0x4d, 0x6f, 0x64, 0x65, 0x6c, 0x46, 0x69, 0x6c, 0x65, 0x12, 0x16, 0x0a, 0x06, 0x44,
0x65, 0x76, 0x69, 0x63, 0x65, 0x18, 0x16, 0x20, 0x01, 0x28, 0x09, 0x52, 0x06, 0x44, 0x65, 0x76,
0x69, 0x63, 0x65, 0x12, 0x1c, 0x0a, 0x09, 0x55, 0x73, 0x65, 0x54, 0x72, 0x69, 0x74, 0x6f, 0x6e,
0x18, 0x17, 0x20, 0x01, 0x28, 0x08, 0x52, 0x09, 0x55, 0x73, 0x65, 0x54, 0x72, 0x69, 0x74, 0x6f,
0x6e, 0x12, 0x24, 0x0a, 0x0d, 0x4d, 0x6f, 0x64, 0x65, 0x6c, 0x42, 0x61, 0x73, 0x65, 0x4e, 0x61,
0x6d, 0x65, 0x18, 0x18, 0x20, 0x01, 0x28, 0x09, 0x52, 0x0d, 0x4d, 0x6f, 0x64, 0x65, 0x6c, 0x42,
0x61, 0x73, 0x65, 0x4e, 0x61, 0x6d, 0x65, 0x12, 0x2a, 0x0a, 0x10, 0x55, 0x73, 0x65, 0x46, 0x61,
0x73, 0x74, 0x54, 0x6f, 0x6b, 0x65, 0x6e, 0x69, 0x7a, 0x65, 0x72, 0x18, 0x19, 0x20, 0x01, 0x28,
0x08, 0x52, 0x10, 0x55, 0x73, 0x65, 0x46, 0x61, 0x73, 0x74, 0x54, 0x6f, 0x6b, 0x65, 0x6e, 0x69,
0x7a, 0x65, 0x72, 0x22, 0x3c, 0x0a, 0x06, 0x52, 0x65, 0x73, 0x75, 0x6c, 0x74, 0x12, 0x18, 0x0a,
0x07, 0x6d, 0x65, 0x73, 0x73, 0x61, 0x67, 0x65, 0x18, 0x01, 0x20, 0x01, 0x28, 0x09, 0x52, 0x07,
0x6d, 0x65, 0x73, 0x73, 0x61, 0x67, 0x65, 0x12, 0x18, 0x0a, 0x07, 0x73, 0x75, 0x63, 0x63, 0x65,
0x73, 0x73, 0x18, 0x02, 0x20, 0x01, 0x28, 0x08, 0x52, 0x07, 0x73, 0x75, 0x63, 0x63, 0x65, 0x73,
0x73, 0x22, 0x31, 0x0a, 0x0f, 0x45, 0x6d, 0x62, 0x65, 0x64, 0x64, 0x69, 0x6e, 0x67, 0x52, 0x65,
0x73, 0x75, 0x6c, 0x74, 0x12, 0x1e, 0x0a, 0x0a, 0x65, 0x6d, 0x62, 0x65, 0x64, 0x64, 0x69, 0x6e,
0x67, 0x73, 0x18, 0x01, 0x20, 0x03, 0x28, 0x02, 0x52, 0x0a, 0x65, 0x6d, 0x62, 0x65, 0x64, 0x64,
0x69, 0x6e, 0x67, 0x73, 0x22, 0x5b, 0x0a, 0x11, 0x54, 0x72, 0x61, 0x6e, 0x73, 0x63, 0x72, 0x69,
0x70, 0x74, 0x52, 0x65, 0x71, 0x75, 0x65, 0x73, 0x74, 0x12, 0x10, 0x0a, 0x03, 0x64, 0x73, 0x74,
0x18, 0x02, 0x20, 0x01, 0x28, 0x09, 0x52, 0x03, 0x64, 0x73, 0x74, 0x12, 0x1a, 0x0a, 0x08, 0x6c,
0x61, 0x6e, 0x67, 0x75, 0x61, 0x67, 0x65, 0x18, 0x03, 0x20, 0x01, 0x28, 0x09, 0x52, 0x08, 0x6c,
0x61, 0x6e, 0x67, 0x75, 0x61, 0x67, 0x65, 0x12, 0x18, 0x0a, 0x07, 0x74, 0x68, 0x72, 0x65, 0x61,
0x64, 0x73, 0x18, 0x04, 0x20, 0x01, 0x28, 0x0d, 0x52, 0x07, 0x74, 0x68, 0x72, 0x65, 0x61, 0x64,
0x73, 0x22, 0x5e, 0x0a, 0x10, 0x54, 0x72, 0x61, 0x6e, 0x73, 0x63, 0x72, 0x69, 0x70, 0x74, 0x52,
0x65, 0x73, 0x75, 0x6c, 0x74, 0x12, 0x36, 0x0a, 0x08, 0x73, 0x65, 0x67, 0x6d, 0x65, 0x6e, 0x74,
0x73, 0x18, 0x01, 0x20, 0x03, 0x28, 0x0b, 0x32, 0x1a, 0x2e, 0x62, 0x61, 0x63, 0x6b, 0x65, 0x6e,
0x64, 0x2e, 0x54, 0x72, 0x61, 0x6e, 0x73, 0x63, 0x72, 0x69, 0x70, 0x74, 0x53, 0x65, 0x67, 0x6d,
0x65, 0x6e, 0x74, 0x52, 0x08, 0x73, 0x65, 0x67, 0x6d, 0x65, 0x6e, 0x74, 0x73, 0x12, 0x12, 0x0a,
0x04, 0x74, 0x65, 0x78, 0x74, 0x18, 0x02, 0x20, 0x01, 0x28, 0x09, 0x52, 0x04, 0x74, 0x65, 0x78,
0x74, 0x22, 0x77, 0x0a, 0x11, 0x54, 0x72, 0x61, 0x6e, 0x73, 0x63, 0x72, 0x69, 0x70, 0x74, 0x53,
0x65, 0x67, 0x6d, 0x65, 0x6e, 0x74, 0x12, 0x0e, 0x0a, 0x02, 0x69, 0x64, 0x18, 0x01, 0x20, 0x01,
0x28, 0x05, 0x52, 0x02, 0x69, 0x64, 0x12, 0x14, 0x0a, 0x05, 0x73, 0x74, 0x61, 0x72, 0x74, 0x18,
0x02, 0x20, 0x01, 0x28, 0x03, 0x52, 0x05, 0x73, 0x74, 0x61, 0x72, 0x74, 0x12, 0x10, 0x0a, 0x03,
0x65, 0x6e, 0x64, 0x18, 0x03, 0x20, 0x01, 0x28, 0x03, 0x52, 0x03, 0x65, 0x6e, 0x64, 0x12, 0x12,
0x0a, 0x04, 0x74, 0x65, 0x78, 0x74, 0x18, 0x04, 0x20, 0x01, 0x28, 0x09, 0x52, 0x04, 0x74, 0x65,
0x78, 0x74, 0x12, 0x16, 0x0a, 0x06, 0x74, 0x6f, 0x6b, 0x65, 0x6e, 0x73, 0x18, 0x05, 0x20, 0x03,
0x28, 0x05, 0x52, 0x06, 0x74, 0x6f, 0x6b, 0x65, 0x6e, 0x73, 0x22, 0xe4, 0x01, 0x0a, 0x14, 0x47,
0x65, 0x6e, 0x65, 0x72, 0x61, 0x74, 0x65, 0x49, 0x6d, 0x61, 0x67, 0x65, 0x52, 0x65, 0x71, 0x75,
0x65, 0x73, 0x74, 0x12, 0x16, 0x0a, 0x06, 0x68, 0x65, 0x69, 0x67, 0x68, 0x74, 0x18, 0x01, 0x20,
0x01, 0x28, 0x05, 0x52, 0x06, 0x68, 0x65, 0x69, 0x67, 0x68, 0x74, 0x12, 0x14, 0x0a, 0x05, 0x77,
0x69, 0x64, 0x74, 0x68, 0x18, 0x02, 0x20, 0x01, 0x28, 0x05, 0x52, 0x05, 0x77, 0x69, 0x64, 0x74,
0x68, 0x12, 0x12, 0x0a, 0x04, 0x6d, 0x6f, 0x64, 0x65, 0x18, 0x03, 0x20, 0x01, 0x28, 0x05, 0x52,
0x04, 0x6d, 0x6f, 0x64, 0x65, 0x12, 0x12, 0x0a, 0x04, 0x73, 0x74, 0x65, 0x70, 0x18, 0x04, 0x20,
0x01, 0x28, 0x05, 0x52, 0x04, 0x73, 0x74, 0x65, 0x70, 0x12, 0x12, 0x0a, 0x04, 0x73, 0x65, 0x65,
0x64, 0x18, 0x05, 0x20, 0x01, 0x28, 0x05, 0x52, 0x04, 0x73, 0x65, 0x65, 0x64, 0x12, 0x27, 0x0a,
0x0f, 0x70, 0x6f, 0x73, 0x69, 0x74, 0x69, 0x76, 0x65, 0x5f, 0x70, 0x72, 0x6f, 0x6d, 0x70, 0x74,
0x18, 0x06, 0x20, 0x01, 0x28, 0x09, 0x52, 0x0e, 0x70, 0x6f, 0x73, 0x69, 0x74, 0x69, 0x76, 0x65,
0x50, 0x72, 0x6f, 0x6d, 0x70, 0x74, 0x12, 0x27, 0x0a, 0x0f, 0x6e, 0x65, 0x67, 0x61, 0x74, 0x69,
0x76, 0x65, 0x5f, 0x70, 0x72, 0x6f, 0x6d, 0x70, 0x74, 0x18, 0x07, 0x20, 0x01, 0x28, 0x09, 0x52,
0x0e, 0x6e, 0x65, 0x67, 0x61, 0x74, 0x69, 0x76, 0x65, 0x50, 0x72, 0x6f, 0x6d, 0x70, 0x74, 0x12,
0x10, 0x0a, 0x03, 0x64, 0x73, 0x74, 0x18, 0x08, 0x20, 0x01, 0x28, 0x09, 0x52, 0x03, 0x64, 0x73,
0x74, 0x22, 0x48, 0x0a, 0x0a, 0x54, 0x54, 0x53, 0x52, 0x65, 0x71, 0x75, 0x65, 0x73, 0x74, 0x12,
0x12, 0x0a, 0x04, 0x74, 0x65, 0x78, 0x74, 0x18, 0x01, 0x20, 0x01, 0x28, 0x09, 0x52, 0x04, 0x74,
0x65, 0x78, 0x74, 0x12, 0x14, 0x0a, 0x05, 0x6d, 0x6f, 0x64, 0x65, 0x6c, 0x18, 0x02, 0x20, 0x01,
0x28, 0x09, 0x52, 0x05, 0x6d, 0x6f, 0x64, 0x65, 0x6c, 0x12, 0x10, 0x0a, 0x03, 0x64, 0x73, 0x74,
0x18, 0x03, 0x20, 0x01, 0x28, 0x09, 0x52, 0x03, 0x64, 0x73, 0x74, 0x32, 0xeb, 0x03, 0x0a, 0x07,
0x42, 0x61, 0x63, 0x6b, 0x65, 0x6e, 0x64, 0x12, 0x32, 0x0a, 0x06, 0x48, 0x65, 0x61, 0x6c, 0x74,
0x68, 0x12, 0x16, 0x2e, 0x62, 0x61, 0x63, 0x6b, 0x65, 0x6e, 0x64, 0x2e, 0x48, 0x65, 0x61, 0x6c,
0x74, 0x68, 0x4d, 0x65, 0x73, 0x73, 0x61, 0x67, 0x65, 0x1a, 0x0e, 0x2e, 0x62, 0x61, 0x63, 0x6b,
0x65, 0x6e, 0x64, 0x2e, 0x52, 0x65, 0x70, 0x6c, 0x79, 0x22, 0x00, 0x12, 0x34, 0x0a, 0x07, 0x50,
0x72, 0x65, 0x64, 0x69, 0x63, 0x74, 0x12, 0x17, 0x2e, 0x62, 0x61, 0x63, 0x6b, 0x65, 0x6e, 0x64,
0x2e, 0x50, 0x72, 0x65, 0x64, 0x69, 0x63, 0x74, 0x4f, 0x70, 0x74, 0x69, 0x6f, 0x6e, 0x73, 0x1a,
0x0e, 0x2e, 0x62, 0x61, 0x63, 0x6b, 0x65, 0x6e, 0x64, 0x2e, 0x52, 0x65, 0x70, 0x6c, 0x79, 0x22,
0x00, 0x12, 0x35, 0x0a, 0x09, 0x4c, 0x6f, 0x61, 0x64, 0x4d, 0x6f, 0x64, 0x65, 0x6c, 0x12, 0x15,
0x2e, 0x62, 0x61, 0x63, 0x6b, 0x65, 0x6e, 0x64, 0x2e, 0x4d, 0x6f, 0x64, 0x65, 0x6c, 0x4f, 0x70,
0x74, 0x69, 0x6f, 0x6e, 0x73, 0x1a, 0x0f, 0x2e, 0x62, 0x61, 0x63, 0x6b, 0x65, 0x6e, 0x64, 0x2e,
0x52, 0x65, 0x73, 0x75, 0x6c, 0x74, 0x22, 0x00, 0x12, 0x3c, 0x0a, 0x0d, 0x50, 0x72, 0x65, 0x64,
0x69, 0x63, 0x74, 0x53, 0x74, 0x72, 0x65, 0x61, 0x6d, 0x12, 0x17, 0x2e, 0x62, 0x61, 0x63, 0x6b,
0x65, 0x6e, 0x64, 0x2e, 0x50, 0x72, 0x65, 0x64, 0x69, 0x63, 0x74, 0x4f, 0x70, 0x74, 0x69, 0x6f,
0x6e, 0x73, 0x1a, 0x0e, 0x2e, 0x62, 0x61, 0x63, 0x6b, 0x65, 0x6e, 0x64, 0x2e, 0x52, 0x65, 0x70,
0x6c, 0x79, 0x22, 0x00, 0x30, 0x01, 0x12, 0x40, 0x0a, 0x09, 0x45, 0x6d, 0x62, 0x65, 0x64, 0x64,
0x69, 0x6e, 0x67, 0x12, 0x17, 0x2e, 0x62, 0x61, 0x63, 0x6b, 0x65, 0x6e, 0x64, 0x2e, 0x50, 0x72,
0x65, 0x64, 0x69, 0x63, 0x74, 0x4f, 0x70, 0x74, 0x69, 0x6f, 0x6e, 0x73, 0x1a, 0x18, 0x2e, 0x62,
0x61, 0x63, 0x6b, 0x65, 0x6e, 0x64, 0x2e, 0x45, 0x6d, 0x62, 0x65, 0x64, 0x64, 0x69, 0x6e, 0x67,
0x52, 0x65, 0x73, 0x75, 0x6c, 0x74, 0x22, 0x00, 0x12, 0x41, 0x0a, 0x0d, 0x47, 0x65, 0x6e, 0x65,
0x72, 0x61, 0x74, 0x65, 0x49, 0x6d, 0x61, 0x67, 0x65, 0x12, 0x1d, 0x2e, 0x62, 0x61, 0x63, 0x6b,
0x65, 0x6e, 0x64, 0x2e, 0x47, 0x65, 0x6e, 0x65, 0x72, 0x61, 0x74, 0x65, 0x49, 0x6d, 0x61, 0x67,
0x65, 0x52, 0x65, 0x71, 0x75, 0x65, 0x73, 0x74, 0x1a, 0x0f, 0x2e, 0x62, 0x61, 0x63, 0x6b, 0x65,
0x6e, 0x64, 0x2e, 0x52, 0x65, 0x73, 0x75, 0x6c, 0x74, 0x22, 0x00, 0x12, 0x4d, 0x0a, 0x12, 0x41,
0x75, 0x64, 0x69, 0x6f, 0x54, 0x72, 0x61, 0x6e, 0x73, 0x63, 0x72, 0x69, 0x70, 0x74, 0x69, 0x6f,
0x6e, 0x12, 0x1a, 0x2e, 0x62, 0x61, 0x63, 0x6b, 0x65, 0x6e, 0x64, 0x2e, 0x54, 0x72, 0x61, 0x6e,
0x73, 0x63, 0x72, 0x69, 0x70, 0x74, 0x52, 0x65, 0x71, 0x75, 0x65, 0x73, 0x74, 0x1a, 0x19, 0x2e,
0x62, 0x61, 0x63, 0x6b, 0x65, 0x6e, 0x64, 0x2e, 0x54, 0x72, 0x61, 0x6e, 0x73, 0x63, 0x72, 0x69,
0x70, 0x74, 0x52, 0x65, 0x73, 0x75, 0x6c, 0x74, 0x22, 0x00, 0x12, 0x2d, 0x0a, 0x03, 0x54, 0x54,
0x53, 0x12, 0x13, 0x2e, 0x62, 0x61, 0x63, 0x6b, 0x65, 0x6e, 0x64, 0x2e, 0x54, 0x54, 0x53, 0x52,
0x65, 0x71, 0x75, 0x65, 0x73, 0x74, 0x1a, 0x0f, 0x2e, 0x62, 0x61, 0x63, 0x6b, 0x65, 0x6e, 0x64,
0x2e, 0x52, 0x65, 0x73, 0x75, 0x6c, 0x74, 0x22, 0x00, 0x42, 0x5a, 0x0a, 0x19, 0x69, 0x6f, 0x2e,
0x73, 0x6b, 0x79, 0x6e, 0x65, 0x74, 0x2e, 0x6c, 0x6f, 0x63, 0x61, 0x6c, 0x61, 0x69, 0x2e, 0x62,
0x61, 0x63, 0x6b, 0x65, 0x6e, 0x64, 0x42, 0x0e, 0x4c, 0x6f, 0x63, 0x61, 0x6c, 0x41, 0x49, 0x42,
0x61, 0x63, 0x6b, 0x65, 0x6e, 0x64, 0x50, 0x01, 0x5a, 0x2b, 0x67, 0x69, 0x74, 0x68, 0x75, 0x62,
0x2e, 0x63, 0x6f, 0x6d, 0x2f, 0x67, 0x6f, 0x2d, 0x73, 0x6b, 0x79, 0x6e, 0x65, 0x74, 0x2f, 0x4c,
0x6f, 0x63, 0x61, 0x6c, 0x41, 0x49, 0x2f, 0x70, 0x6b, 0x67, 0x2f, 0x67, 0x72, 0x70, 0x63, 0x2f,
0x70, 0x72, 0x6f, 0x74, 0x6f, 0x62, 0x06, 0x70, 0x72, 0x6f, 0x74, 0x6f, 0x33,
}
var (

View File

@ -89,6 +89,13 @@ message ModelOptions {
float RopeFreqScale = 18;
float RMSNormEps = 19;
int32 NGQA = 20;
string ModelFile = 21;
// AutoGPTQ
string Device = 22;
bool UseTriton = 23;
string ModelBaseName = 24;
bool UseFastTokenizer = 25;
}
message Result {

View File

@ -17,7 +17,7 @@ type Whisper struct {
func (sd *Whisper) Load(opts *pb.ModelOptions) error {
// Note: the Model here is a path to a directory containing the model files
w, err := whisper.New(opts.Model)
w, err := whisper.New(opts.ModelFile)
sd.whisper = w
return err
}

View File

@ -18,8 +18,8 @@ type Piper struct {
}
func (sd *Piper) Load(opts *pb.ModelOptions) error {
if filepath.Ext(opts.Model) != ".onnx" {
return fmt.Errorf("unsupported model type %s (should end with .onnx)", opts.Model)
if filepath.Ext(opts.ModelFile) != ".onnx" {
return fmt.Errorf("unsupported model type %s (should end with .onnx)", opts.ModelFile)
}
var err error
// Note: the Model here is a path to a directory containing the model files

View File

@ -83,7 +83,9 @@ func (ml *ModelLoader) startProcess(grpcProcess, id string, serverAddress string
grpcControlProcess := process.New(
process.WithTemporaryStateDir(),
process.WithName(grpcProcess),
process.WithArgs("--addr", serverAddress))
process.WithArgs("--addr", serverAddress),
process.WithEnvironment(os.Environ()...),
)
ml.grpcProcesses[id] = grpcControlProcess
@ -124,8 +126,8 @@ func (ml *ModelLoader) startProcess(grpcProcess, id string, serverAddress string
// starts the grpcModelProcess for the backend, and returns a grpc client
// It also loads the model
func (ml *ModelLoader) grpcModel(backend string, o *Options) func(string) (*grpc.Client, error) {
return func(s string) (*grpc.Client, error) {
func (ml *ModelLoader) grpcModel(backend string, o *Options) func(string, string) (*grpc.Client, error) {
return func(modelName, modelFile string) (*grpc.Client, error) {
log.Debug().Msgf("Loading GRPC Model %s: %+v", backend, *o)
var client *grpc.Client
@ -148,7 +150,7 @@ func (ml *ModelLoader) grpcModel(backend string, o *Options) func(string) (*grpc
return nil, fmt.Errorf("failed allocating free ports: %s", err.Error())
}
// Make sure the process is executable
if err := ml.startProcess(uri, o.modelFile, serverAddress); err != nil {
if err := ml.startProcess(uri, o.model, serverAddress); err != nil {
return nil, err
}
@ -172,7 +174,7 @@ func (ml *ModelLoader) grpcModel(backend string, o *Options) func(string) (*grpc
}
// Make sure the process is executable
if err := ml.startProcess(grpcProcess, o.modelFile, serverAddress); err != nil {
if err := ml.startProcess(grpcProcess, o.model, serverAddress); err != nil {
return nil, err
}
@ -198,7 +200,8 @@ func (ml *ModelLoader) grpcModel(backend string, o *Options) func(string) (*grpc
}
options := *o.gRPCOptions
options.Model = s
options.Model = modelName
options.ModelFile = modelFile
log.Debug().Msgf("GRPC: Loading model with options: %+v", options)
@ -217,14 +220,14 @@ func (ml *ModelLoader) grpcModel(backend string, o *Options) func(string) (*grpc
func (ml *ModelLoader) BackendLoader(opts ...Option) (model *grpc.Client, err error) {
o := NewOptions(opts...)
log.Debug().Msgf("Loading model %s from %s", o.backendString, o.modelFile)
log.Debug().Msgf("Loading model %s from %s", o.backendString, o.model)
backend := strings.ToLower(o.backendString)
// if an external backend is provided, use it
_, externalBackendExists := o.externalBackends[backend]
if externalBackendExists {
return ml.LoadModel(o.modelFile, ml.grpcModel(backend, o))
return ml.LoadModel(o.model, ml.grpcModel(backend, o))
}
switch backend {
@ -232,13 +235,13 @@ func (ml *ModelLoader) BackendLoader(opts ...Option) (model *grpc.Client, err er
MPTBackend, Gpt2Backend, FalconBackend,
GPTNeoXBackend, ReplitBackend, StarcoderBackend, BloomzBackend,
RwkvBackend, LCHuggingFaceBackend, BertEmbeddingsBackend, FalconGGMLBackend, StableDiffusionBackend, WhisperBackend:
return ml.LoadModel(o.modelFile, ml.grpcModel(backend, o))
return ml.LoadModel(o.model, ml.grpcModel(backend, o))
case Gpt4AllLlamaBackend, Gpt4AllMptBackend, Gpt4AllJBackend, Gpt4All:
o.gRPCOptions.LibrarySearchPath = filepath.Join(o.assetDir, "backend-assets", "gpt4all")
return ml.LoadModel(o.modelFile, ml.grpcModel(Gpt4All, o))
return ml.LoadModel(o.model, ml.grpcModel(Gpt4All, o))
case PiperBackend:
o.gRPCOptions.LibrarySearchPath = filepath.Join(o.assetDir, "backend-assets", "espeak-ng-data")
return ml.LoadModel(o.modelFile, ml.grpcModel(PiperBackend, o))
return ml.LoadModel(o.model, ml.grpcModel(PiperBackend, o))
default:
return nil, fmt.Errorf("backend unsupported: %s", o.backendString)
}
@ -249,8 +252,8 @@ func (ml *ModelLoader) GreedyLoader(opts ...Option) (*grpc.Client, error) {
// Is this really needed? BackendLoader already does this
ml.mu.Lock()
if m := ml.checkIsLoaded(o.modelFile); m != nil {
log.Debug().Msgf("Model '%s' already loaded", o.modelFile)
if m := ml.checkIsLoaded(o.model); m != nil {
log.Debug().Msgf("Model '%s' already loaded", o.model)
ml.mu.Unlock()
return m, nil
}
@ -263,13 +266,13 @@ func (ml *ModelLoader) GreedyLoader(opts ...Option) (*grpc.Client, error) {
for _, b := range o.externalBackends {
allBackendsToAutoLoad = append(allBackendsToAutoLoad, b)
}
log.Debug().Msgf("Loading model '%s' greedly from all the available backends: %s", o.modelFile, strings.Join(allBackendsToAutoLoad, ", "))
log.Debug().Msgf("Loading model '%s' greedly from all the available backends: %s", o.model, strings.Join(allBackendsToAutoLoad, ", "))
for _, b := range allBackendsToAutoLoad {
log.Debug().Msgf("[%s] Attempting to load", b)
options := []Option{
WithBackendString(b),
WithModelFile(o.modelFile),
WithModel(o.model),
WithLoadGRPCLLMModelOpts(o.gRPCOptions),
WithThreads(o.threads),
WithAssetDir(o.assetDir),

View File

@ -98,7 +98,7 @@ func (ml *ModelLoader) ListModels() ([]string, error) {
return models, nil
}
func (ml *ModelLoader) LoadModel(modelName string, loader func(string) (*grpc.Client, error)) (*grpc.Client, error) {
func (ml *ModelLoader) LoadModel(modelName string, loader func(string, string) (*grpc.Client, error)) (*grpc.Client, error) {
ml.mu.Lock()
defer ml.mu.Unlock()
@ -111,7 +111,7 @@ func (ml *ModelLoader) LoadModel(modelName string, loader func(string) (*grpc.Cl
modelFile := filepath.Join(ml.ModelPath, modelName)
log.Debug().Msgf("Loading model in memory from file: %s", modelFile)
model, err := loader(modelFile)
model, err := loader(modelName, modelFile)
if err != nil {
return nil, err
}

View File

@ -8,7 +8,7 @@ import (
type Options struct {
backendString string
modelFile string
model string
threads uint32
assetDir string
context context.Context
@ -35,9 +35,9 @@ func WithBackendString(backend string) Option {
}
}
func WithModelFile(modelFile string) Option {
func WithModel(modelFile string) Option {
return func(o *Options) {
o.modelFile = modelFile
o.model = modelFile
}
}