diff --git a/Makefile b/Makefile index 26a90bee..4813cd90 100644 --- a/Makefile +++ b/Makefile @@ -335,7 +335,8 @@ protogen-go: pkg/grpc/proto/backend.proto protogen-python: - python -m grpc_tools.protoc -Ipkg/grpc/proto/ --python_out=extra/grpc/huggingface/ --grpc_python_out=extra/grpc/huggingface/ pkg/grpc/proto/backend.proto + python3 -m grpc_tools.protoc -Ipkg/grpc/proto/ --python_out=extra/grpc/huggingface/ --grpc_python_out=extra/grpc/huggingface/ pkg/grpc/proto/backend.proto + python3 -m grpc_tools.protoc -Ipkg/grpc/proto/ --python_out=extra/grpc/autogptq/ --grpc_python_out=extra/grpc/autogptq/ pkg/grpc/proto/backend.proto ## GRPC diff --git a/api/backend/embeddings.go b/api/backend/embeddings.go index 53df785b..c6ebad52 100644 --- a/api/backend/embeddings.go +++ b/api/backend/embeddings.go @@ -26,7 +26,7 @@ func ModelEmbedding(s string, tokens []int, loader *model.ModelLoader, c config. model.WithLoadGRPCLLMModelOpts(grpcOpts), model.WithThreads(uint32(c.Threads)), model.WithAssetDir(o.AssetsDestination), - model.WithModelFile(modelFile), + model.WithModel(modelFile), model.WithContext(o.Context), } diff --git a/api/backend/image.go b/api/backend/image.go index 9e32d1db..f3b3a9d7 100644 --- a/api/backend/image.go +++ b/api/backend/image.go @@ -20,7 +20,7 @@ func ImageGeneration(height, width, mode, step, seed int, positive_prompt, negat model.WithAssetDir(o.AssetsDestination), model.WithThreads(uint32(c.Threads)), model.WithContext(o.Context), - model.WithModelFile(c.ImageGenerationAssets), + model.WithModel(c.ImageGenerationAssets), } for k, v := range o.ExternalGRPCBackends { diff --git a/api/backend/llm.go b/api/backend/llm.go index 498c9b09..804df582 100644 --- a/api/backend/llm.go +++ b/api/backend/llm.go @@ -27,7 +27,7 @@ func ModelInference(ctx context.Context, s string, loader *model.ModelLoader, c model.WithLoadGRPCLLMModelOpts(grpcOpts), model.WithThreads(uint32(c.Threads)), // some models uses this to allocate threads during startup model.WithAssetDir(o.AssetsDestination), - model.WithModelFile(modelFile), + model.WithModel(modelFile), model.WithContext(o.Context), } diff --git a/api/backend/options.go b/api/backend/options.go index 813ecd7d..1de60bef 100644 --- a/api/backend/options.go +++ b/api/backend/options.go @@ -19,6 +19,9 @@ func gRPCModelOpts(c config.Config) *pb.ModelOptions { Seed: int32(c.Seed), NBatch: int32(b), NGQA: c.NGQA, + ModelBaseName: c.ModelBaseName, + Device: c.Device, + UseTriton: c.Triton, RMSNormEps: c.RMSNormEps, F16Memory: c.F16, MLock: c.MMlock, diff --git a/api/backend/transcript.go b/api/backend/transcript.go index b2f25012..add16f4c 100644 --- a/api/backend/transcript.go +++ b/api/backend/transcript.go @@ -15,7 +15,7 @@ import ( func ModelTranscription(audio, language string, loader *model.ModelLoader, c config.Config, o *options.Option) (*api.Result, error) { opts := []model.Option{ model.WithBackendString(model.WhisperBackend), - model.WithModelFile(c.Model), + model.WithModel(c.Model), model.WithContext(o.Context), model.WithThreads(uint32(c.Threads)), model.WithAssetDir(o.AssetsDestination), diff --git a/api/config/config.go b/api/config/config.go index 75782715..b2d7c833 100644 --- a/api/config/config.go +++ b/api/config/config.go @@ -54,6 +54,11 @@ type Config struct { RMSNormEps float32 `yaml:"rms_norm_eps"` NGQA int32 `yaml:"ngqa"` + + // AutoGPTQ + ModelBaseName string `yaml:"model_base_name"` + Device string `yaml:"device"` + Triton bool `yaml:"triton"` } type Functions struct { diff --git a/api/openai/api.go b/api/openai/api.go index ac757f47..2e3f154c 100644 --- a/api/openai/api.go +++ b/api/openai/api.go @@ -2,6 +2,7 @@ package openai import ( "context" + config "github.com/go-skynet/LocalAI/api/config" "github.com/go-skynet/LocalAI/pkg/grammar" @@ -106,4 +107,9 @@ type OpenAIRequest struct { Grammar string `json:"grammar" yaml:"grammar"` JSONFunctionGrammarObject *grammar.JSONFunctionStructure `json:"grammar_json_functions" yaml:"grammar_json_functions"` + + Backend string `json:"backend" yaml:"backend"` + + // AutoGPTQ + ModelBaseName string `json:"model_base_name" yaml:"model_base_name"` } diff --git a/api/openai/request.go b/api/openai/request.go index 5aa4d46a..76dcdffd 100644 --- a/api/openai/request.go +++ b/api/openai/request.go @@ -71,6 +71,14 @@ func updateConfig(config *config.Config, input *OpenAIRequest) { config.TopP = input.TopP } + if input.Backend != "" { + config.Backend = input.Backend + } + + if input.ModelBaseName != "" { + config.ModelBaseName = input.ModelBaseName + } + if input.NegativePromptScale != 0 { config.NegativePromptScale = input.NegativePromptScale } diff --git a/extra/grpc/autogptq/autogptq.py b/extra/grpc/autogptq/autogptq.py new file mode 100755 index 00000000..79f38d3d --- /dev/null +++ b/extra/grpc/autogptq/autogptq.py @@ -0,0 +1,94 @@ +#!/usr/bin/env python3 +import grpc +from concurrent import futures +import time +import backend_pb2 +import backend_pb2_grpc +import argparse +import signal +import sys +import os +from auto_gptq import AutoGPTQForCausalLM, BaseQuantizeConfig +from pathlib import Path +from transformers import AutoTokenizer +from transformers import TextGenerationPipeline + +_ONE_DAY_IN_SECONDS = 60 * 60 * 24 + +# Implement the BackendServicer class with the service methods +class BackendServicer(backend_pb2_grpc.BackendServicer): + def Health(self, request, context): + return backend_pb2.Reply(message=bytes("OK", 'utf-8')) + def LoadModel(self, request, context): + try: + device = "cuda:0" + if request.Device != "": + device = request.Device + + tokenizer = AutoTokenizer.from_pretrained(request.Model, use_fast=True) + + model = AutoGPTQForCausalLM.from_quantized(request.Model, + model_basename=request.ModelBaseName, + use_safetensors=True, + trust_remote_code=True, + device=device, + use_triton=request.UseTriton, + quantize_config=None) + + self.model = model + self.tokenizer = tokenizer + except Exception as err: + return backend_pb2.Result(success=False, message=f"Unexpected {err=}, {type(err)=}") + return backend_pb2.Result(message="Model loaded successfully", success=True) + + def Predict(self, request, context): + # Implement Predict RPC + pipeline = TextGenerationPipeline( + model=self.model, + tokenizer=self.tokenizer, + max_new_tokens=request.Tokens, + temperature=request.Temperature, + top_p=request.TopP, + repetition_penalty=request.Penalty, + ) + return backend_pb2.Result(message=bytes(pipeline(request.Prompt)[0]["generated_text"])) + + def PredictStream(self, request, context): + # Implement PredictStream RPC + #for reply in some_data_generator(): + # yield reply + # Not implemented yet + return self.Predict(request, context) + + +def serve(address): + server = grpc.server(futures.ThreadPoolExecutor(max_workers=10)) + backend_pb2_grpc.add_BackendServicer_to_server(BackendServicer(), server) + server.add_insecure_port(address) + server.start() + print("Server started. Listening on: " + address, file=sys.stderr) + + # Define the signal handler function + def signal_handler(sig, frame): + print("Received termination signal. Shutting down...") + server.stop(0) + sys.exit(0) + + # Set the signal handlers for SIGINT and SIGTERM + signal.signal(signal.SIGINT, signal_handler) + signal.signal(signal.SIGTERM, signal_handler) + + try: + while True: + time.sleep(_ONE_DAY_IN_SECONDS) + except KeyboardInterrupt: + server.stop(0) + +if __name__ == "__main__": + parser = argparse.ArgumentParser(description="Run the gRPC server.") + parser.add_argument( + "--addr", default="localhost:50051", help="The address to bind the server to." + ) + args = parser.parse_args() + + serve(args.addr) \ No newline at end of file diff --git a/extra/grpc/autogptq/backend_pb2.py b/extra/grpc/autogptq/backend_pb2.py new file mode 100644 index 00000000..cd6eca67 --- /dev/null +++ b/extra/grpc/autogptq/backend_pb2.py @@ -0,0 +1,49 @@ +# -*- coding: utf-8 -*- +# Generated by the protocol buffer compiler. DO NOT EDIT! +# source: backend.proto +"""Generated protocol buffer code.""" +from google.protobuf import descriptor as _descriptor +from google.protobuf import descriptor_pool as _descriptor_pool +from google.protobuf import symbol_database as _symbol_database +from google.protobuf.internal import builder as _builder +# @@protoc_insertion_point(imports) + +_sym_db = _symbol_database.Default() + + + + +DESCRIPTOR = _descriptor_pool.Default().AddSerializedFile(b'\n\rbackend.proto\x12\x07\x62\x61\x63kend\"\x0f\n\rHealthMessage\"\x86\x06\n\x0ePredictOptions\x12\x0e\n\x06Prompt\x18\x01 \x01(\t\x12\x0c\n\x04Seed\x18\x02 \x01(\x05\x12\x0f\n\x07Threads\x18\x03 \x01(\x05\x12\x0e\n\x06Tokens\x18\x04 \x01(\x05\x12\x0c\n\x04TopK\x18\x05 \x01(\x05\x12\x0e\n\x06Repeat\x18\x06 \x01(\x05\x12\r\n\x05\x42\x61tch\x18\x07 \x01(\x05\x12\r\n\x05NKeep\x18\x08 \x01(\x05\x12\x13\n\x0bTemperature\x18\t \x01(\x02\x12\x0f\n\x07Penalty\x18\n \x01(\x02\x12\r\n\x05\x46\x31\x36KV\x18\x0b \x01(\x08\x12\x11\n\tDebugMode\x18\x0c \x01(\x08\x12\x13\n\x0bStopPrompts\x18\r \x03(\t\x12\x11\n\tIgnoreEOS\x18\x0e \x01(\x08\x12\x19\n\x11TailFreeSamplingZ\x18\x0f \x01(\x02\x12\x10\n\x08TypicalP\x18\x10 \x01(\x02\x12\x18\n\x10\x46requencyPenalty\x18\x11 \x01(\x02\x12\x17\n\x0fPresencePenalty\x18\x12 \x01(\x02\x12\x10\n\x08Mirostat\x18\x13 \x01(\x05\x12\x13\n\x0bMirostatETA\x18\x14 \x01(\x02\x12\x13\n\x0bMirostatTAU\x18\x15 \x01(\x02\x12\x12\n\nPenalizeNL\x18\x16 \x01(\x08\x12\x11\n\tLogitBias\x18\x17 \x01(\t\x12\r\n\x05MLock\x18\x19 \x01(\x08\x12\x0c\n\x04MMap\x18\x1a \x01(\x08\x12\x16\n\x0ePromptCacheAll\x18\x1b \x01(\x08\x12\x15\n\rPromptCacheRO\x18\x1c \x01(\x08\x12\x0f\n\x07Grammar\x18\x1d \x01(\t\x12\x0f\n\x07MainGPU\x18\x1e \x01(\t\x12\x13\n\x0bTensorSplit\x18\x1f \x01(\t\x12\x0c\n\x04TopP\x18 \x01(\x02\x12\x17\n\x0fPromptCachePath\x18! \x01(\t\x12\r\n\x05\x44\x65\x62ug\x18\" \x01(\x08\x12\x17\n\x0f\x45mbeddingTokens\x18# \x03(\x05\x12\x12\n\nEmbeddings\x18$ \x01(\t\x12\x14\n\x0cRopeFreqBase\x18% \x01(\x02\x12\x15\n\rRopeFreqScale\x18& \x01(\x02\x12\x1b\n\x13NegativePromptScale\x18\' \x01(\x02\x12\x16\n\x0eNegativePrompt\x18( \x01(\t\"\x18\n\x05Reply\x12\x0f\n\x07message\x18\x01 \x01(\x0c\"\xc8\x03\n\x0cModelOptions\x12\r\n\x05Model\x18\x01 \x01(\t\x12\x13\n\x0b\x43ontextSize\x18\x02 \x01(\x05\x12\x0c\n\x04Seed\x18\x03 \x01(\x05\x12\x0e\n\x06NBatch\x18\x04 \x01(\x05\x12\x11\n\tF16Memory\x18\x05 \x01(\x08\x12\r\n\x05MLock\x18\x06 \x01(\x08\x12\x0c\n\x04MMap\x18\x07 \x01(\x08\x12\x11\n\tVocabOnly\x18\x08 \x01(\x08\x12\x0f\n\x07LowVRAM\x18\t \x01(\x08\x12\x12\n\nEmbeddings\x18\n \x01(\x08\x12\x0c\n\x04NUMA\x18\x0b \x01(\x08\x12\x12\n\nNGPULayers\x18\x0c \x01(\x05\x12\x0f\n\x07MainGPU\x18\r \x01(\t\x12\x13\n\x0bTensorSplit\x18\x0e \x01(\t\x12\x0f\n\x07Threads\x18\x0f \x01(\x05\x12\x19\n\x11LibrarySearchPath\x18\x10 \x01(\t\x12\x14\n\x0cRopeFreqBase\x18\x11 \x01(\x02\x12\x15\n\rRopeFreqScale\x18\x12 \x01(\x02\x12\x12\n\nRMSNormEps\x18\x13 \x01(\x02\x12\x0c\n\x04NGQA\x18\x14 \x01(\x05\x12\x11\n\tModelFile\x18\x15 \x01(\t\x12\x0e\n\x06\x44\x65vice\x18\x16 \x01(\t\x12\x11\n\tUseTriton\x18\x17 \x01(\x08\x12\x15\n\rModelBaseName\x18\x18 \x01(\t\"*\n\x06Result\x12\x0f\n\x07message\x18\x01 \x01(\t\x12\x0f\n\x07success\x18\x02 \x01(\x08\"%\n\x0f\x45mbeddingResult\x12\x12\n\nembeddings\x18\x01 \x03(\x02\"C\n\x11TranscriptRequest\x12\x0b\n\x03\x64st\x18\x02 \x01(\t\x12\x10\n\x08language\x18\x03 \x01(\t\x12\x0f\n\x07threads\x18\x04 \x01(\r\"N\n\x10TranscriptResult\x12,\n\x08segments\x18\x01 \x03(\x0b\x32\x1a.backend.TranscriptSegment\x12\x0c\n\x04text\x18\x02 \x01(\t\"Y\n\x11TranscriptSegment\x12\n\n\x02id\x18\x01 \x01(\x05\x12\r\n\x05start\x18\x02 \x01(\x03\x12\x0b\n\x03\x65nd\x18\x03 \x01(\x03\x12\x0c\n\x04text\x18\x04 \x01(\t\x12\x0e\n\x06tokens\x18\x05 \x03(\x05\"\x9e\x01\n\x14GenerateImageRequest\x12\x0e\n\x06height\x18\x01 \x01(\x05\x12\r\n\x05width\x18\x02 \x01(\x05\x12\x0c\n\x04mode\x18\x03 \x01(\x05\x12\x0c\n\x04step\x18\x04 \x01(\x05\x12\x0c\n\x04seed\x18\x05 \x01(\x05\x12\x17\n\x0fpositive_prompt\x18\x06 \x01(\t\x12\x17\n\x0fnegative_prompt\x18\x07 \x01(\t\x12\x0b\n\x03\x64st\x18\x08 \x01(\t\"6\n\nTTSRequest\x12\x0c\n\x04text\x18\x01 \x01(\t\x12\r\n\x05model\x18\x02 \x01(\t\x12\x0b\n\x03\x64st\x18\x03 \x01(\t2\xeb\x03\n\x07\x42\x61\x63kend\x12\x32\n\x06Health\x12\x16.backend.HealthMessage\x1a\x0e.backend.Reply\"\x00\x12\x34\n\x07Predict\x12\x17.backend.PredictOptions\x1a\x0e.backend.Reply\"\x00\x12\x35\n\tLoadModel\x12\x15.backend.ModelOptions\x1a\x0f.backend.Result\"\x00\x12<\n\rPredictStream\x12\x17.backend.PredictOptions\x1a\x0e.backend.Reply\"\x00\x30\x01\x12@\n\tEmbedding\x12\x17.backend.PredictOptions\x1a\x18.backend.EmbeddingResult\"\x00\x12\x41\n\rGenerateImage\x12\x1d.backend.GenerateImageRequest\x1a\x0f.backend.Result\"\x00\x12M\n\x12\x41udioTranscription\x12\x1a.backend.TranscriptRequest\x1a\x19.backend.TranscriptResult\"\x00\x12-\n\x03TTS\x12\x13.backend.TTSRequest\x1a\x0f.backend.Result\"\x00\x42Z\n\x19io.skynet.localai.backendB\x0eLocalAIBackendP\x01Z+github.com/go-skynet/LocalAI/pkg/grpc/protob\x06proto3') + +_globals = globals() +_builder.BuildMessageAndEnumDescriptors(DESCRIPTOR, _globals) +_builder.BuildTopDescriptorsAndMessages(DESCRIPTOR, 'backend_pb2', _globals) +if _descriptor._USE_C_DESCRIPTORS == False: + + DESCRIPTOR._options = None + DESCRIPTOR._serialized_options = b'\n\031io.skynet.localai.backendB\016LocalAIBackendP\001Z+github.com/go-skynet/LocalAI/pkg/grpc/proto' + _globals['_HEALTHMESSAGE']._serialized_start=26 + _globals['_HEALTHMESSAGE']._serialized_end=41 + _globals['_PREDICTOPTIONS']._serialized_start=44 + _globals['_PREDICTOPTIONS']._serialized_end=818 + _globals['_REPLY']._serialized_start=820 + _globals['_REPLY']._serialized_end=844 + _globals['_MODELOPTIONS']._serialized_start=847 + _globals['_MODELOPTIONS']._serialized_end=1303 + _globals['_RESULT']._serialized_start=1305 + _globals['_RESULT']._serialized_end=1347 + _globals['_EMBEDDINGRESULT']._serialized_start=1349 + _globals['_EMBEDDINGRESULT']._serialized_end=1386 + _globals['_TRANSCRIPTREQUEST']._serialized_start=1388 + _globals['_TRANSCRIPTREQUEST']._serialized_end=1455 + _globals['_TRANSCRIPTRESULT']._serialized_start=1457 + _globals['_TRANSCRIPTRESULT']._serialized_end=1535 + _globals['_TRANSCRIPTSEGMENT']._serialized_start=1537 + _globals['_TRANSCRIPTSEGMENT']._serialized_end=1626 + _globals['_GENERATEIMAGEREQUEST']._serialized_start=1629 + _globals['_GENERATEIMAGEREQUEST']._serialized_end=1787 + _globals['_TTSREQUEST']._serialized_start=1789 + _globals['_TTSREQUEST']._serialized_end=1843 + _globals['_BACKEND']._serialized_start=1846 + _globals['_BACKEND']._serialized_end=2337 +# @@protoc_insertion_point(module_scope) diff --git a/extra/grpc/autogptq/backend_pb2_grpc.py b/extra/grpc/autogptq/backend_pb2_grpc.py new file mode 100644 index 00000000..301c0729 --- /dev/null +++ b/extra/grpc/autogptq/backend_pb2_grpc.py @@ -0,0 +1,297 @@ +# Generated by the gRPC Python protocol compiler plugin. DO NOT EDIT! +"""Client and server classes corresponding to protobuf-defined services.""" +import grpc + +import backend_pb2 as backend__pb2 + + +class BackendStub(object): + """Missing associated documentation comment in .proto file.""" + + def __init__(self, channel): + """Constructor. + + Args: + channel: A grpc.Channel. + """ + self.Health = channel.unary_unary( + '/backend.Backend/Health', + request_serializer=backend__pb2.HealthMessage.SerializeToString, + response_deserializer=backend__pb2.Reply.FromString, + ) + self.Predict = channel.unary_unary( + '/backend.Backend/Predict', + request_serializer=backend__pb2.PredictOptions.SerializeToString, + response_deserializer=backend__pb2.Reply.FromString, + ) + self.LoadModel = channel.unary_unary( + '/backend.Backend/LoadModel', + request_serializer=backend__pb2.ModelOptions.SerializeToString, + response_deserializer=backend__pb2.Result.FromString, + ) + self.PredictStream = channel.unary_stream( + '/backend.Backend/PredictStream', + request_serializer=backend__pb2.PredictOptions.SerializeToString, + response_deserializer=backend__pb2.Reply.FromString, + ) + self.Embedding = channel.unary_unary( + '/backend.Backend/Embedding', + request_serializer=backend__pb2.PredictOptions.SerializeToString, + response_deserializer=backend__pb2.EmbeddingResult.FromString, + ) + self.GenerateImage = channel.unary_unary( + '/backend.Backend/GenerateImage', + request_serializer=backend__pb2.GenerateImageRequest.SerializeToString, + response_deserializer=backend__pb2.Result.FromString, + ) + self.AudioTranscription = channel.unary_unary( + '/backend.Backend/AudioTranscription', + request_serializer=backend__pb2.TranscriptRequest.SerializeToString, + response_deserializer=backend__pb2.TranscriptResult.FromString, + ) + self.TTS = channel.unary_unary( + '/backend.Backend/TTS', + request_serializer=backend__pb2.TTSRequest.SerializeToString, + response_deserializer=backend__pb2.Result.FromString, + ) + + +class BackendServicer(object): + """Missing associated documentation comment in .proto file.""" + + def Health(self, request, context): + """Missing associated documentation comment in .proto file.""" + context.set_code(grpc.StatusCode.UNIMPLEMENTED) + context.set_details('Method not implemented!') + raise NotImplementedError('Method not implemented!') + + def Predict(self, request, context): + """Missing associated documentation comment in .proto file.""" + context.set_code(grpc.StatusCode.UNIMPLEMENTED) + context.set_details('Method not implemented!') + raise NotImplementedError('Method not implemented!') + + def LoadModel(self, request, context): + """Missing associated documentation comment in .proto file.""" + context.set_code(grpc.StatusCode.UNIMPLEMENTED) + context.set_details('Method not implemented!') + raise NotImplementedError('Method not implemented!') + + def PredictStream(self, request, context): + """Missing associated documentation comment in .proto file.""" + context.set_code(grpc.StatusCode.UNIMPLEMENTED) + context.set_details('Method not implemented!') + raise NotImplementedError('Method not implemented!') + + def Embedding(self, request, context): + """Missing associated documentation comment in .proto file.""" + context.set_code(grpc.StatusCode.UNIMPLEMENTED) + context.set_details('Method not implemented!') + raise NotImplementedError('Method not implemented!') + + def GenerateImage(self, request, context): + """Missing associated documentation comment in .proto file.""" + context.set_code(grpc.StatusCode.UNIMPLEMENTED) + context.set_details('Method not implemented!') + raise NotImplementedError('Method not implemented!') + + def AudioTranscription(self, request, context): + """Missing associated documentation comment in .proto file.""" + context.set_code(grpc.StatusCode.UNIMPLEMENTED) + context.set_details('Method not implemented!') + raise NotImplementedError('Method not implemented!') + + def TTS(self, request, context): + """Missing associated documentation comment in .proto file.""" + context.set_code(grpc.StatusCode.UNIMPLEMENTED) + context.set_details('Method not implemented!') + raise NotImplementedError('Method not implemented!') + + +def add_BackendServicer_to_server(servicer, server): + rpc_method_handlers = { + 'Health': grpc.unary_unary_rpc_method_handler( + servicer.Health, + request_deserializer=backend__pb2.HealthMessage.FromString, + response_serializer=backend__pb2.Reply.SerializeToString, + ), + 'Predict': grpc.unary_unary_rpc_method_handler( + servicer.Predict, + request_deserializer=backend__pb2.PredictOptions.FromString, + response_serializer=backend__pb2.Reply.SerializeToString, + ), + 'LoadModel': grpc.unary_unary_rpc_method_handler( + servicer.LoadModel, + request_deserializer=backend__pb2.ModelOptions.FromString, + response_serializer=backend__pb2.Result.SerializeToString, + ), + 'PredictStream': grpc.unary_stream_rpc_method_handler( + servicer.PredictStream, + request_deserializer=backend__pb2.PredictOptions.FromString, + response_serializer=backend__pb2.Reply.SerializeToString, + ), + 'Embedding': grpc.unary_unary_rpc_method_handler( + servicer.Embedding, + request_deserializer=backend__pb2.PredictOptions.FromString, + response_serializer=backend__pb2.EmbeddingResult.SerializeToString, + ), + 'GenerateImage': grpc.unary_unary_rpc_method_handler( + servicer.GenerateImage, + request_deserializer=backend__pb2.GenerateImageRequest.FromString, + response_serializer=backend__pb2.Result.SerializeToString, + ), + 'AudioTranscription': grpc.unary_unary_rpc_method_handler( + servicer.AudioTranscription, + request_deserializer=backend__pb2.TranscriptRequest.FromString, + response_serializer=backend__pb2.TranscriptResult.SerializeToString, + ), + 'TTS': grpc.unary_unary_rpc_method_handler( + servicer.TTS, + request_deserializer=backend__pb2.TTSRequest.FromString, + response_serializer=backend__pb2.Result.SerializeToString, + ), + } + generic_handler = grpc.method_handlers_generic_handler( + 'backend.Backend', rpc_method_handlers) + server.add_generic_rpc_handlers((generic_handler,)) + + + # This class is part of an EXPERIMENTAL API. +class Backend(object): + """Missing associated documentation comment in .proto file.""" + + @staticmethod + def Health(request, + target, + options=(), + channel_credentials=None, + call_credentials=None, + insecure=False, + compression=None, + wait_for_ready=None, + timeout=None, + metadata=None): + return grpc.experimental.unary_unary(request, target, '/backend.Backend/Health', + backend__pb2.HealthMessage.SerializeToString, + backend__pb2.Reply.FromString, + options, channel_credentials, + insecure, call_credentials, compression, wait_for_ready, timeout, metadata) + + @staticmethod + def Predict(request, + target, + options=(), + channel_credentials=None, + call_credentials=None, + insecure=False, + compression=None, + wait_for_ready=None, + timeout=None, + metadata=None): + return grpc.experimental.unary_unary(request, target, '/backend.Backend/Predict', + backend__pb2.PredictOptions.SerializeToString, + backend__pb2.Reply.FromString, + options, channel_credentials, + insecure, call_credentials, compression, wait_for_ready, timeout, metadata) + + @staticmethod + def LoadModel(request, + target, + options=(), + channel_credentials=None, + call_credentials=None, + insecure=False, + compression=None, + wait_for_ready=None, + timeout=None, + metadata=None): + return grpc.experimental.unary_unary(request, target, '/backend.Backend/LoadModel', + backend__pb2.ModelOptions.SerializeToString, + backend__pb2.Result.FromString, + options, channel_credentials, + insecure, call_credentials, compression, wait_for_ready, timeout, metadata) + + @staticmethod + def PredictStream(request, + target, + options=(), + channel_credentials=None, + call_credentials=None, + insecure=False, + compression=None, + wait_for_ready=None, + timeout=None, + metadata=None): + return grpc.experimental.unary_stream(request, target, '/backend.Backend/PredictStream', + backend__pb2.PredictOptions.SerializeToString, + backend__pb2.Reply.FromString, + options, channel_credentials, + insecure, call_credentials, compression, wait_for_ready, timeout, metadata) + + @staticmethod + def Embedding(request, + target, + options=(), + channel_credentials=None, + call_credentials=None, + insecure=False, + compression=None, + wait_for_ready=None, + timeout=None, + metadata=None): + return grpc.experimental.unary_unary(request, target, '/backend.Backend/Embedding', + backend__pb2.PredictOptions.SerializeToString, + backend__pb2.EmbeddingResult.FromString, + options, channel_credentials, + insecure, call_credentials, compression, wait_for_ready, timeout, metadata) + + @staticmethod + def GenerateImage(request, + target, + options=(), + channel_credentials=None, + call_credentials=None, + insecure=False, + compression=None, + wait_for_ready=None, + timeout=None, + metadata=None): + return grpc.experimental.unary_unary(request, target, '/backend.Backend/GenerateImage', + backend__pb2.GenerateImageRequest.SerializeToString, + backend__pb2.Result.FromString, + options, channel_credentials, + insecure, call_credentials, compression, wait_for_ready, timeout, metadata) + + @staticmethod + def AudioTranscription(request, + target, + options=(), + channel_credentials=None, + call_credentials=None, + insecure=False, + compression=None, + wait_for_ready=None, + timeout=None, + metadata=None): + return grpc.experimental.unary_unary(request, target, '/backend.Backend/AudioTranscription', + backend__pb2.TranscriptRequest.SerializeToString, + backend__pb2.TranscriptResult.FromString, + options, channel_credentials, + insecure, call_credentials, compression, wait_for_ready, timeout, metadata) + + @staticmethod + def TTS(request, + target, + options=(), + channel_credentials=None, + call_credentials=None, + insecure=False, + compression=None, + wait_for_ready=None, + timeout=None, + metadata=None): + return grpc.experimental.unary_unary(request, target, '/backend.Backend/TTS', + backend__pb2.TTSRequest.SerializeToString, + backend__pb2.Result.FromString, + options, channel_credentials, + insecure, call_credentials, compression, wait_for_ready, timeout, metadata) diff --git a/extra/grpc/huggingface/backend_pb2.py b/extra/grpc/huggingface/backend_pb2.py index c062cee8..cd6eca67 100644 --- a/extra/grpc/huggingface/backend_pb2.py +++ b/extra/grpc/huggingface/backend_pb2.py @@ -13,7 +13,7 @@ _sym_db = _symbol_database.Default() -DESCRIPTOR = _descriptor_pool.Default().AddSerializedFile(b'\n\rbackend.proto\x12\x07\x62\x61\x63kend\"\x0f\n\rHealthMessage\"\x86\x06\n\x0ePredictOptions\x12\x0e\n\x06Prompt\x18\x01 \x01(\t\x12\x0c\n\x04Seed\x18\x02 \x01(\x05\x12\x0f\n\x07Threads\x18\x03 \x01(\x05\x12\x0e\n\x06Tokens\x18\x04 \x01(\x05\x12\x0c\n\x04TopK\x18\x05 \x01(\x05\x12\x0e\n\x06Repeat\x18\x06 \x01(\x05\x12\r\n\x05\x42\x61tch\x18\x07 \x01(\x05\x12\r\n\x05NKeep\x18\x08 \x01(\x05\x12\x13\n\x0bTemperature\x18\t \x01(\x02\x12\x0f\n\x07Penalty\x18\n \x01(\x02\x12\r\n\x05\x46\x31\x36KV\x18\x0b \x01(\x08\x12\x11\n\tDebugMode\x18\x0c \x01(\x08\x12\x13\n\x0bStopPrompts\x18\r \x03(\t\x12\x11\n\tIgnoreEOS\x18\x0e \x01(\x08\x12\x19\n\x11TailFreeSamplingZ\x18\x0f \x01(\x02\x12\x10\n\x08TypicalP\x18\x10 \x01(\x02\x12\x18\n\x10\x46requencyPenalty\x18\x11 \x01(\x02\x12\x17\n\x0fPresencePenalty\x18\x12 \x01(\x02\x12\x10\n\x08Mirostat\x18\x13 \x01(\x05\x12\x13\n\x0bMirostatETA\x18\x14 \x01(\x02\x12\x13\n\x0bMirostatTAU\x18\x15 \x01(\x02\x12\x12\n\nPenalizeNL\x18\x16 \x01(\x08\x12\x11\n\tLogitBias\x18\x17 \x01(\t\x12\r\n\x05MLock\x18\x19 \x01(\x08\x12\x0c\n\x04MMap\x18\x1a \x01(\x08\x12\x16\n\x0ePromptCacheAll\x18\x1b \x01(\x08\x12\x15\n\rPromptCacheRO\x18\x1c \x01(\x08\x12\x0f\n\x07Grammar\x18\x1d \x01(\t\x12\x0f\n\x07MainGPU\x18\x1e \x01(\t\x12\x13\n\x0bTensorSplit\x18\x1f \x01(\t\x12\x0c\n\x04TopP\x18 \x01(\x02\x12\x17\n\x0fPromptCachePath\x18! \x01(\t\x12\r\n\x05\x44\x65\x62ug\x18\" \x01(\x08\x12\x17\n\x0f\x45mbeddingTokens\x18# \x03(\x05\x12\x12\n\nEmbeddings\x18$ \x01(\t\x12\x14\n\x0cRopeFreqBase\x18% \x01(\x02\x12\x15\n\rRopeFreqScale\x18& \x01(\x02\x12\x1b\n\x13NegativePromptScale\x18\' \x01(\x02\x12\x16\n\x0eNegativePrompt\x18( \x01(\t\"\x18\n\x05Reply\x12\x0f\n\x07message\x18\x01 \x01(\x0c\"\xfb\x02\n\x0cModelOptions\x12\r\n\x05Model\x18\x01 \x01(\t\x12\x13\n\x0b\x43ontextSize\x18\x02 \x01(\x05\x12\x0c\n\x04Seed\x18\x03 \x01(\x05\x12\x0e\n\x06NBatch\x18\x04 \x01(\x05\x12\x11\n\tF16Memory\x18\x05 \x01(\x08\x12\r\n\x05MLock\x18\x06 \x01(\x08\x12\x0c\n\x04MMap\x18\x07 \x01(\x08\x12\x11\n\tVocabOnly\x18\x08 \x01(\x08\x12\x0f\n\x07LowVRAM\x18\t \x01(\x08\x12\x12\n\nEmbeddings\x18\n \x01(\x08\x12\x0c\n\x04NUMA\x18\x0b \x01(\x08\x12\x12\n\nNGPULayers\x18\x0c \x01(\x05\x12\x0f\n\x07MainGPU\x18\r \x01(\t\x12\x13\n\x0bTensorSplit\x18\x0e \x01(\t\x12\x0f\n\x07Threads\x18\x0f \x01(\x05\x12\x19\n\x11LibrarySearchPath\x18\x10 \x01(\t\x12\x14\n\x0cRopeFreqBase\x18\x11 \x01(\x02\x12\x15\n\rRopeFreqScale\x18\x12 \x01(\x02\x12\x12\n\nRMSNormEps\x18\x13 \x01(\x02\x12\x0c\n\x04NGQA\x18\x14 \x01(\x05\"*\n\x06Result\x12\x0f\n\x07message\x18\x01 \x01(\t\x12\x0f\n\x07success\x18\x02 \x01(\x08\"%\n\x0f\x45mbeddingResult\x12\x12\n\nembeddings\x18\x01 \x03(\x02\"C\n\x11TranscriptRequest\x12\x0b\n\x03\x64st\x18\x02 \x01(\t\x12\x10\n\x08language\x18\x03 \x01(\t\x12\x0f\n\x07threads\x18\x04 \x01(\r\"N\n\x10TranscriptResult\x12,\n\x08segments\x18\x01 \x03(\x0b\x32\x1a.backend.TranscriptSegment\x12\x0c\n\x04text\x18\x02 \x01(\t\"Y\n\x11TranscriptSegment\x12\n\n\x02id\x18\x01 \x01(\x05\x12\r\n\x05start\x18\x02 \x01(\x03\x12\x0b\n\x03\x65nd\x18\x03 \x01(\x03\x12\x0c\n\x04text\x18\x04 \x01(\t\x12\x0e\n\x06tokens\x18\x05 \x03(\x05\"\x9e\x01\n\x14GenerateImageRequest\x12\x0e\n\x06height\x18\x01 \x01(\x05\x12\r\n\x05width\x18\x02 \x01(\x05\x12\x0c\n\x04mode\x18\x03 \x01(\x05\x12\x0c\n\x04step\x18\x04 \x01(\x05\x12\x0c\n\x04seed\x18\x05 \x01(\x05\x12\x17\n\x0fpositive_prompt\x18\x06 \x01(\t\x12\x17\n\x0fnegative_prompt\x18\x07 \x01(\t\x12\x0b\n\x03\x64st\x18\x08 \x01(\t\"6\n\nTTSRequest\x12\x0c\n\x04text\x18\x01 \x01(\t\x12\r\n\x05model\x18\x02 \x01(\t\x12\x0b\n\x03\x64st\x18\x03 \x01(\t2\xeb\x03\n\x07\x42\x61\x63kend\x12\x32\n\x06Health\x12\x16.backend.HealthMessage\x1a\x0e.backend.Reply\"\x00\x12\x34\n\x07Predict\x12\x17.backend.PredictOptions\x1a\x0e.backend.Reply\"\x00\x12\x35\n\tLoadModel\x12\x15.backend.ModelOptions\x1a\x0f.backend.Result\"\x00\x12<\n\rPredictStream\x12\x17.backend.PredictOptions\x1a\x0e.backend.Reply\"\x00\x30\x01\x12@\n\tEmbedding\x12\x17.backend.PredictOptions\x1a\x18.backend.EmbeddingResult\"\x00\x12\x41\n\rGenerateImage\x12\x1d.backend.GenerateImageRequest\x1a\x0f.backend.Result\"\x00\x12M\n\x12\x41udioTranscription\x12\x1a.backend.TranscriptRequest\x1a\x19.backend.TranscriptResult\"\x00\x12-\n\x03TTS\x12\x13.backend.TTSRequest\x1a\x0f.backend.Result\"\x00\x42Z\n\x19io.skynet.localai.backendB\x0eLocalAIBackendP\x01Z+github.com/go-skynet/LocalAI/pkg/grpc/protob\x06proto3') +DESCRIPTOR = _descriptor_pool.Default().AddSerializedFile(b'\n\rbackend.proto\x12\x07\x62\x61\x63kend\"\x0f\n\rHealthMessage\"\x86\x06\n\x0ePredictOptions\x12\x0e\n\x06Prompt\x18\x01 \x01(\t\x12\x0c\n\x04Seed\x18\x02 \x01(\x05\x12\x0f\n\x07Threads\x18\x03 \x01(\x05\x12\x0e\n\x06Tokens\x18\x04 \x01(\x05\x12\x0c\n\x04TopK\x18\x05 \x01(\x05\x12\x0e\n\x06Repeat\x18\x06 \x01(\x05\x12\r\n\x05\x42\x61tch\x18\x07 \x01(\x05\x12\r\n\x05NKeep\x18\x08 \x01(\x05\x12\x13\n\x0bTemperature\x18\t \x01(\x02\x12\x0f\n\x07Penalty\x18\n \x01(\x02\x12\r\n\x05\x46\x31\x36KV\x18\x0b \x01(\x08\x12\x11\n\tDebugMode\x18\x0c \x01(\x08\x12\x13\n\x0bStopPrompts\x18\r \x03(\t\x12\x11\n\tIgnoreEOS\x18\x0e \x01(\x08\x12\x19\n\x11TailFreeSamplingZ\x18\x0f \x01(\x02\x12\x10\n\x08TypicalP\x18\x10 \x01(\x02\x12\x18\n\x10\x46requencyPenalty\x18\x11 \x01(\x02\x12\x17\n\x0fPresencePenalty\x18\x12 \x01(\x02\x12\x10\n\x08Mirostat\x18\x13 \x01(\x05\x12\x13\n\x0bMirostatETA\x18\x14 \x01(\x02\x12\x13\n\x0bMirostatTAU\x18\x15 \x01(\x02\x12\x12\n\nPenalizeNL\x18\x16 \x01(\x08\x12\x11\n\tLogitBias\x18\x17 \x01(\t\x12\r\n\x05MLock\x18\x19 \x01(\x08\x12\x0c\n\x04MMap\x18\x1a \x01(\x08\x12\x16\n\x0ePromptCacheAll\x18\x1b \x01(\x08\x12\x15\n\rPromptCacheRO\x18\x1c \x01(\x08\x12\x0f\n\x07Grammar\x18\x1d \x01(\t\x12\x0f\n\x07MainGPU\x18\x1e \x01(\t\x12\x13\n\x0bTensorSplit\x18\x1f \x01(\t\x12\x0c\n\x04TopP\x18 \x01(\x02\x12\x17\n\x0fPromptCachePath\x18! \x01(\t\x12\r\n\x05\x44\x65\x62ug\x18\" \x01(\x08\x12\x17\n\x0f\x45mbeddingTokens\x18# \x03(\x05\x12\x12\n\nEmbeddings\x18$ \x01(\t\x12\x14\n\x0cRopeFreqBase\x18% \x01(\x02\x12\x15\n\rRopeFreqScale\x18& \x01(\x02\x12\x1b\n\x13NegativePromptScale\x18\' \x01(\x02\x12\x16\n\x0eNegativePrompt\x18( \x01(\t\"\x18\n\x05Reply\x12\x0f\n\x07message\x18\x01 \x01(\x0c\"\xc8\x03\n\x0cModelOptions\x12\r\n\x05Model\x18\x01 \x01(\t\x12\x13\n\x0b\x43ontextSize\x18\x02 \x01(\x05\x12\x0c\n\x04Seed\x18\x03 \x01(\x05\x12\x0e\n\x06NBatch\x18\x04 \x01(\x05\x12\x11\n\tF16Memory\x18\x05 \x01(\x08\x12\r\n\x05MLock\x18\x06 \x01(\x08\x12\x0c\n\x04MMap\x18\x07 \x01(\x08\x12\x11\n\tVocabOnly\x18\x08 \x01(\x08\x12\x0f\n\x07LowVRAM\x18\t \x01(\x08\x12\x12\n\nEmbeddings\x18\n \x01(\x08\x12\x0c\n\x04NUMA\x18\x0b \x01(\x08\x12\x12\n\nNGPULayers\x18\x0c \x01(\x05\x12\x0f\n\x07MainGPU\x18\r \x01(\t\x12\x13\n\x0bTensorSplit\x18\x0e \x01(\t\x12\x0f\n\x07Threads\x18\x0f \x01(\x05\x12\x19\n\x11LibrarySearchPath\x18\x10 \x01(\t\x12\x14\n\x0cRopeFreqBase\x18\x11 \x01(\x02\x12\x15\n\rRopeFreqScale\x18\x12 \x01(\x02\x12\x12\n\nRMSNormEps\x18\x13 \x01(\x02\x12\x0c\n\x04NGQA\x18\x14 \x01(\x05\x12\x11\n\tModelFile\x18\x15 \x01(\t\x12\x0e\n\x06\x44\x65vice\x18\x16 \x01(\t\x12\x11\n\tUseTriton\x18\x17 \x01(\x08\x12\x15\n\rModelBaseName\x18\x18 \x01(\t\"*\n\x06Result\x12\x0f\n\x07message\x18\x01 \x01(\t\x12\x0f\n\x07success\x18\x02 \x01(\x08\"%\n\x0f\x45mbeddingResult\x12\x12\n\nembeddings\x18\x01 \x03(\x02\"C\n\x11TranscriptRequest\x12\x0b\n\x03\x64st\x18\x02 \x01(\t\x12\x10\n\x08language\x18\x03 \x01(\t\x12\x0f\n\x07threads\x18\x04 \x01(\r\"N\n\x10TranscriptResult\x12,\n\x08segments\x18\x01 \x03(\x0b\x32\x1a.backend.TranscriptSegment\x12\x0c\n\x04text\x18\x02 \x01(\t\"Y\n\x11TranscriptSegment\x12\n\n\x02id\x18\x01 \x01(\x05\x12\r\n\x05start\x18\x02 \x01(\x03\x12\x0b\n\x03\x65nd\x18\x03 \x01(\x03\x12\x0c\n\x04text\x18\x04 \x01(\t\x12\x0e\n\x06tokens\x18\x05 \x03(\x05\"\x9e\x01\n\x14GenerateImageRequest\x12\x0e\n\x06height\x18\x01 \x01(\x05\x12\r\n\x05width\x18\x02 \x01(\x05\x12\x0c\n\x04mode\x18\x03 \x01(\x05\x12\x0c\n\x04step\x18\x04 \x01(\x05\x12\x0c\n\x04seed\x18\x05 \x01(\x05\x12\x17\n\x0fpositive_prompt\x18\x06 \x01(\t\x12\x17\n\x0fnegative_prompt\x18\x07 \x01(\t\x12\x0b\n\x03\x64st\x18\x08 \x01(\t\"6\n\nTTSRequest\x12\x0c\n\x04text\x18\x01 \x01(\t\x12\r\n\x05model\x18\x02 \x01(\t\x12\x0b\n\x03\x64st\x18\x03 \x01(\t2\xeb\x03\n\x07\x42\x61\x63kend\x12\x32\n\x06Health\x12\x16.backend.HealthMessage\x1a\x0e.backend.Reply\"\x00\x12\x34\n\x07Predict\x12\x17.backend.PredictOptions\x1a\x0e.backend.Reply\"\x00\x12\x35\n\tLoadModel\x12\x15.backend.ModelOptions\x1a\x0f.backend.Result\"\x00\x12<\n\rPredictStream\x12\x17.backend.PredictOptions\x1a\x0e.backend.Reply\"\x00\x30\x01\x12@\n\tEmbedding\x12\x17.backend.PredictOptions\x1a\x18.backend.EmbeddingResult\"\x00\x12\x41\n\rGenerateImage\x12\x1d.backend.GenerateImageRequest\x1a\x0f.backend.Result\"\x00\x12M\n\x12\x41udioTranscription\x12\x1a.backend.TranscriptRequest\x1a\x19.backend.TranscriptResult\"\x00\x12-\n\x03TTS\x12\x13.backend.TTSRequest\x1a\x0f.backend.Result\"\x00\x42Z\n\x19io.skynet.localai.backendB\x0eLocalAIBackendP\x01Z+github.com/go-skynet/LocalAI/pkg/grpc/protob\x06proto3') _globals = globals() _builder.BuildMessageAndEnumDescriptors(DESCRIPTOR, _globals) @@ -29,21 +29,21 @@ if _descriptor._USE_C_DESCRIPTORS == False: _globals['_REPLY']._serialized_start=820 _globals['_REPLY']._serialized_end=844 _globals['_MODELOPTIONS']._serialized_start=847 - _globals['_MODELOPTIONS']._serialized_end=1226 - _globals['_RESULT']._serialized_start=1228 - _globals['_RESULT']._serialized_end=1270 - _globals['_EMBEDDINGRESULT']._serialized_start=1272 - _globals['_EMBEDDINGRESULT']._serialized_end=1309 - _globals['_TRANSCRIPTREQUEST']._serialized_start=1311 - _globals['_TRANSCRIPTREQUEST']._serialized_end=1378 - _globals['_TRANSCRIPTRESULT']._serialized_start=1380 - _globals['_TRANSCRIPTRESULT']._serialized_end=1458 - _globals['_TRANSCRIPTSEGMENT']._serialized_start=1460 - _globals['_TRANSCRIPTSEGMENT']._serialized_end=1549 - _globals['_GENERATEIMAGEREQUEST']._serialized_start=1552 - _globals['_GENERATEIMAGEREQUEST']._serialized_end=1710 - _globals['_TTSREQUEST']._serialized_start=1712 - _globals['_TTSREQUEST']._serialized_end=1766 - _globals['_BACKEND']._serialized_start=1769 - _globals['_BACKEND']._serialized_end=2260 + _globals['_MODELOPTIONS']._serialized_end=1303 + _globals['_RESULT']._serialized_start=1305 + _globals['_RESULT']._serialized_end=1347 + _globals['_EMBEDDINGRESULT']._serialized_start=1349 + _globals['_EMBEDDINGRESULT']._serialized_end=1386 + _globals['_TRANSCRIPTREQUEST']._serialized_start=1388 + _globals['_TRANSCRIPTREQUEST']._serialized_end=1455 + _globals['_TRANSCRIPTRESULT']._serialized_start=1457 + _globals['_TRANSCRIPTRESULT']._serialized_end=1535 + _globals['_TRANSCRIPTSEGMENT']._serialized_start=1537 + _globals['_TRANSCRIPTSEGMENT']._serialized_end=1626 + _globals['_GENERATEIMAGEREQUEST']._serialized_start=1629 + _globals['_GENERATEIMAGEREQUEST']._serialized_end=1787 + _globals['_TTSREQUEST']._serialized_start=1789 + _globals['_TTSREQUEST']._serialized_end=1843 + _globals['_BACKEND']._serialized_start=1846 + _globals['_BACKEND']._serialized_end=2337 # @@protoc_insertion_point(module_scope) diff --git a/extra/grpc/huggingface/huggingface.py b/extra/grpc/huggingface/huggingface.py index 04776930..7589dfd6 100755 --- a/extra/grpc/huggingface/huggingface.py +++ b/extra/grpc/huggingface/huggingface.py @@ -18,7 +18,6 @@ class BackendServicer(backend_pb2_grpc.BackendServicer): return backend_pb2.Reply(message=bytes("OK", 'utf-8')) def LoadModel(self, request, context): model_name = request.Model - model_name = os.path.basename(model_name) try: self.model = SentenceTransformer(model_name) except Exception as err: diff --git a/extra/requirements.txt b/extra/requirements.txt index 9744afb1..ea73dafe 100644 --- a/extra/requirements.txt +++ b/extra/requirements.txt @@ -1,4 +1,6 @@ sentence_transformers grpcio google -protobuf \ No newline at end of file +protobuf +https://github.com/PanQiWei/AutoGPTQ/releases/download/v0.3.0/auto_gptq-0.3.0+cu117-cp310-cp310-win_amd64.whl; platform_system == "Windows" +https://github.com/PanQiWei/AutoGPTQ/releases/download/v0.3.0/auto_gptq-0.3.0+cu117-cp310-cp310-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" diff --git a/pkg/grpc/image/stablediffusion.go b/pkg/grpc/image/stablediffusion.go index ce0275e9..600f2007 100644 --- a/pkg/grpc/image/stablediffusion.go +++ b/pkg/grpc/image/stablediffusion.go @@ -16,7 +16,7 @@ type StableDiffusion struct { func (sd *StableDiffusion) Load(opts *pb.ModelOptions) error { var err error // Note: the Model here is a path to a directory containing the model files - sd.stablediffusion, err = stablediffusion.New(opts.Model) + sd.stablediffusion, err = stablediffusion.New(opts.ModelFile) return err } diff --git a/pkg/grpc/llm/bert/bert.go b/pkg/grpc/llm/bert/bert.go index 7692797e..b7df5d76 100644 --- a/pkg/grpc/llm/bert/bert.go +++ b/pkg/grpc/llm/bert/bert.go @@ -15,7 +15,7 @@ type Embeddings struct { } func (llm *Embeddings) Load(opts *pb.ModelOptions) error { - model, err := bert.New(opts.Model) + model, err := bert.New(opts.ModelFile) llm.bert = model return err } diff --git a/pkg/grpc/llm/bloomz/bloomz.go b/pkg/grpc/llm/bloomz/bloomz.go index daa22640..fecaa5b7 100644 --- a/pkg/grpc/llm/bloomz/bloomz.go +++ b/pkg/grpc/llm/bloomz/bloomz.go @@ -18,7 +18,7 @@ type LLM struct { } func (llm *LLM) Load(opts *pb.ModelOptions) error { - model, err := bloomz.New(opts.Model) + model, err := bloomz.New(opts.ModelFile) llm.bloomz = model return err } diff --git a/pkg/grpc/llm/falcon/falcon.go b/pkg/grpc/llm/falcon/falcon.go index 3c0f84ed..d287ef6d 100644 --- a/pkg/grpc/llm/falcon/falcon.go +++ b/pkg/grpc/llm/falcon/falcon.go @@ -40,7 +40,7 @@ func (llm *LLM) Load(opts *pb.ModelOptions) error { ggllmOpts = append(ggllmOpts, ggllm.SetNBatch(512)) } - model, err := ggllm.New(opts.Model, ggllmOpts...) + model, err := ggllm.New(opts.ModelFile, ggllmOpts...) llm.falcon = model return err } diff --git a/pkg/grpc/llm/gpt4all/gpt4all.go b/pkg/grpc/llm/gpt4all/gpt4all.go index e17afc1e..f94e3309 100644 --- a/pkg/grpc/llm/gpt4all/gpt4all.go +++ b/pkg/grpc/llm/gpt4all/gpt4all.go @@ -17,7 +17,7 @@ type LLM struct { } func (llm *LLM) Load(opts *pb.ModelOptions) error { - model, err := gpt4all.New(opts.Model, + model, err := gpt4all.New(opts.ModelFile, gpt4all.SetThreads(int(opts.Threads)), gpt4all.SetLibrarySearchPath(opts.LibrarySearchPath)) llm.gpt4all = model diff --git a/pkg/grpc/llm/llama/llama.go b/pkg/grpc/llm/llama/llama.go index 79e55048..da46c9f5 100644 --- a/pkg/grpc/llm/llama/llama.go +++ b/pkg/grpc/llm/llama/llama.go @@ -71,7 +71,7 @@ func (llm *LLM) Load(opts *pb.ModelOptions) error { llamaOpts = append(llamaOpts, llama.EnabelLowVRAM) } - model, err := llama.New(opts.Model, llamaOpts...) + model, err := llama.New(opts.ModelFile, llamaOpts...) llm.llama = model return err } diff --git a/pkg/grpc/llm/rwkv/rwkv.go b/pkg/grpc/llm/rwkv/rwkv.go index f54c14bd..cfd60f0c 100644 --- a/pkg/grpc/llm/rwkv/rwkv.go +++ b/pkg/grpc/llm/rwkv/rwkv.go @@ -20,9 +20,9 @@ type LLM struct { } func (llm *LLM) Load(opts *pb.ModelOptions) error { - modelPath := filepath.Dir(opts.Model) - modelFile := filepath.Base(opts.Model) - model := rwkv.LoadFiles(opts.Model, filepath.Join(modelPath, modelFile+tokenizerSuffix), uint32(opts.GetThreads())) + modelPath := filepath.Dir(opts.ModelFile) + modelFile := filepath.Base(opts.ModelFile) + model := rwkv.LoadFiles(opts.ModelFile, filepath.Join(modelPath, modelFile+tokenizerSuffix), uint32(opts.GetThreads())) if model == nil { return fmt.Errorf("could not load model") diff --git a/pkg/grpc/llm/transformers/dolly.go b/pkg/grpc/llm/transformers/dolly.go index d5f30938..6c9e0a5d 100644 --- a/pkg/grpc/llm/transformers/dolly.go +++ b/pkg/grpc/llm/transformers/dolly.go @@ -18,7 +18,7 @@ type Dolly struct { } func (llm *Dolly) Load(opts *pb.ModelOptions) error { - model, err := transformers.NewDolly(opts.Model) + model, err := transformers.NewDolly(opts.ModelFile) llm.dolly = model return err } diff --git a/pkg/grpc/llm/transformers/falcon.go b/pkg/grpc/llm/transformers/falcon.go index 982e43e0..54e9b320 100644 --- a/pkg/grpc/llm/transformers/falcon.go +++ b/pkg/grpc/llm/transformers/falcon.go @@ -18,7 +18,7 @@ type Falcon struct { } func (llm *Falcon) Load(opts *pb.ModelOptions) error { - model, err := transformers.NewFalcon(opts.Model) + model, err := transformers.NewFalcon(opts.ModelFile) llm.falcon = model return err } diff --git a/pkg/grpc/llm/transformers/gpt2.go b/pkg/grpc/llm/transformers/gpt2.go index 85a41125..66517d7c 100644 --- a/pkg/grpc/llm/transformers/gpt2.go +++ b/pkg/grpc/llm/transformers/gpt2.go @@ -18,7 +18,7 @@ type GPT2 struct { } func (llm *GPT2) Load(opts *pb.ModelOptions) error { - model, err := transformers.New(opts.Model) + model, err := transformers.New(opts.ModelFile) llm.gpt2 = model return err } diff --git a/pkg/grpc/llm/transformers/gptj.go b/pkg/grpc/llm/transformers/gptj.go index e2bc3bf1..6f692188 100644 --- a/pkg/grpc/llm/transformers/gptj.go +++ b/pkg/grpc/llm/transformers/gptj.go @@ -18,7 +18,7 @@ type GPTJ struct { } func (llm *GPTJ) Load(opts *pb.ModelOptions) error { - model, err := transformers.NewGPTJ(opts.Model) + model, err := transformers.NewGPTJ(opts.ModelFile) llm.gptj = model return err } diff --git a/pkg/grpc/llm/transformers/gptneox.go b/pkg/grpc/llm/transformers/gptneox.go index ca6db941..bcff0abe 100644 --- a/pkg/grpc/llm/transformers/gptneox.go +++ b/pkg/grpc/llm/transformers/gptneox.go @@ -18,7 +18,7 @@ type GPTNeoX struct { } func (llm *GPTNeoX) Load(opts *pb.ModelOptions) error { - model, err := transformers.NewGPTNeoX(opts.Model) + model, err := transformers.NewGPTNeoX(opts.ModelFile) llm.gptneox = model return err } diff --git a/pkg/grpc/llm/transformers/mpt.go b/pkg/grpc/llm/transformers/mpt.go index d2b9ff1f..8adda361 100644 --- a/pkg/grpc/llm/transformers/mpt.go +++ b/pkg/grpc/llm/transformers/mpt.go @@ -18,7 +18,7 @@ type MPT struct { } func (llm *MPT) Load(opts *pb.ModelOptions) error { - model, err := transformers.NewMPT(opts.Model) + model, err := transformers.NewMPT(opts.ModelFile) llm.mpt = model return err } diff --git a/pkg/grpc/llm/transformers/replit.go b/pkg/grpc/llm/transformers/replit.go index 4b26ffd8..bfc3a90a 100644 --- a/pkg/grpc/llm/transformers/replit.go +++ b/pkg/grpc/llm/transformers/replit.go @@ -18,7 +18,7 @@ type Replit struct { } func (llm *Replit) Load(opts *pb.ModelOptions) error { - model, err := transformers.NewReplit(opts.Model) + model, err := transformers.NewReplit(opts.ModelFile) llm.replit = model return err } diff --git a/pkg/grpc/llm/transformers/starcoder.go b/pkg/grpc/llm/transformers/starcoder.go index 7631274e..06948cb3 100644 --- a/pkg/grpc/llm/transformers/starcoder.go +++ b/pkg/grpc/llm/transformers/starcoder.go @@ -18,7 +18,7 @@ type Starcoder struct { } func (llm *Starcoder) Load(opts *pb.ModelOptions) error { - model, err := transformers.NewStarcoder(opts.Model) + model, err := transformers.NewStarcoder(opts.ModelFile) llm.starcoder = model return err } diff --git a/pkg/grpc/proto/backend.pb.go b/pkg/grpc/proto/backend.pb.go index 3a58b349..4d3bd846 100644 --- a/pkg/grpc/proto/backend.pb.go +++ b/pkg/grpc/proto/backend.pb.go @@ -483,6 +483,11 @@ type ModelOptions struct { RopeFreqScale float32 `protobuf:"fixed32,18,opt,name=RopeFreqScale,proto3" json:"RopeFreqScale,omitempty"` RMSNormEps float32 `protobuf:"fixed32,19,opt,name=RMSNormEps,proto3" json:"RMSNormEps,omitempty"` NGQA int32 `protobuf:"varint,20,opt,name=NGQA,proto3" json:"NGQA,omitempty"` + ModelFile string `protobuf:"bytes,21,opt,name=ModelFile,proto3" json:"ModelFile,omitempty"` + // AutoGPTQ + Device string `protobuf:"bytes,22,opt,name=Device,proto3" json:"Device,omitempty"` + UseTriton bool `protobuf:"varint,23,opt,name=UseTriton,proto3" json:"UseTriton,omitempty"` + ModelBaseName string `protobuf:"bytes,24,opt,name=ModelBaseName,proto3" json:"ModelBaseName,omitempty"` } func (x *ModelOptions) Reset() { @@ -657,6 +662,34 @@ func (x *ModelOptions) GetNGQA() int32 { return 0 } +func (x *ModelOptions) GetModelFile() string { + if x != nil { + return x.ModelFile + } + return "" +} + +func (x *ModelOptions) GetDevice() string { + if x != nil { + return x.Device + } + return "" +} + +func (x *ModelOptions) GetUseTriton() bool { + if x != nil { + return x.UseTriton + } + return false +} + +func (x *ModelOptions) GetModelBaseName() string { + if x != nil { + return x.ModelBaseName + } + return "" +} + type Result struct { state protoimpl.MessageState sizeCache protoimpl.SizeCache @@ -1207,7 +1240,7 @@ var file_pkg_grpc_proto_backend_proto_rawDesc = []byte{ 0x0e, 0x4e, 0x65, 0x67, 0x61, 0x74, 0x69, 0x76, 0x65, 0x50, 0x72, 0x6f, 0x6d, 0x70, 0x74, 0x22, 0x21, 0x0a, 0x05, 0x52, 0x65, 0x70, 0x6c, 0x79, 0x12, 0x18, 0x0a, 0x07, 0x6d, 0x65, 0x73, 0x73, 0x61, 0x67, 0x65, 0x18, 0x01, 0x20, 0x01, 0x28, 0x0c, 0x52, 0x07, 0x6d, 0x65, 0x73, 0x73, 0x61, - 0x67, 0x65, 0x22, 0xc8, 0x04, 0x0a, 0x0c, 0x4d, 0x6f, 0x64, 0x65, 0x6c, 0x4f, 0x70, 0x74, 0x69, + 0x67, 0x65, 0x22, 0xc2, 0x05, 0x0a, 0x0c, 0x4d, 0x6f, 0x64, 0x65, 0x6c, 0x4f, 0x70, 0x74, 0x69, 0x6f, 0x6e, 0x73, 0x12, 0x14, 0x0a, 0x05, 0x4d, 0x6f, 0x64, 0x65, 0x6c, 0x18, 0x01, 0x20, 0x01, 0x28, 0x09, 0x52, 0x05, 0x4d, 0x6f, 0x64, 0x65, 0x6c, 0x12, 0x20, 0x0a, 0x0b, 0x43, 0x6f, 0x6e, 0x74, 0x65, 0x78, 0x74, 0x53, 0x69, 0x7a, 0x65, 0x18, 0x02, 0x20, 0x01, 0x28, 0x05, 0x52, 0x0b, @@ -1243,90 +1276,98 @@ var file_pkg_grpc_proto_backend_proto_rawDesc = []byte{ 0x46, 0x72, 0x65, 0x71, 0x53, 0x63, 0x61, 0x6c, 0x65, 0x12, 0x1e, 0x0a, 0x0a, 0x52, 0x4d, 0x53, 0x4e, 0x6f, 0x72, 0x6d, 0x45, 0x70, 0x73, 0x18, 0x13, 0x20, 0x01, 0x28, 0x02, 0x52, 0x0a, 0x52, 0x4d, 0x53, 0x4e, 0x6f, 0x72, 0x6d, 0x45, 0x70, 0x73, 0x12, 0x12, 0x0a, 0x04, 0x4e, 0x47, 0x51, - 0x41, 0x18, 0x14, 0x20, 0x01, 0x28, 0x05, 0x52, 0x04, 0x4e, 0x47, 0x51, 0x41, 0x22, 0x3c, 0x0a, - 0x06, 0x52, 0x65, 0x73, 0x75, 0x6c, 0x74, 0x12, 0x18, 0x0a, 0x07, 0x6d, 0x65, 0x73, 0x73, 0x61, - 0x67, 0x65, 0x18, 0x01, 0x20, 0x01, 0x28, 0x09, 0x52, 0x07, 0x6d, 0x65, 0x73, 0x73, 0x61, 0x67, - 0x65, 0x12, 0x18, 0x0a, 0x07, 0x73, 0x75, 0x63, 0x63, 0x65, 0x73, 0x73, 0x18, 0x02, 0x20, 0x01, - 0x28, 0x08, 0x52, 0x07, 0x73, 0x75, 0x63, 0x63, 0x65, 0x73, 0x73, 0x22, 0x31, 0x0a, 0x0f, 0x45, - 0x6d, 0x62, 0x65, 0x64, 0x64, 0x69, 0x6e, 0x67, 0x52, 0x65, 0x73, 0x75, 0x6c, 0x74, 0x12, 0x1e, - 0x0a, 0x0a, 0x65, 0x6d, 0x62, 0x65, 0x64, 0x64, 0x69, 0x6e, 0x67, 0x73, 0x18, 0x01, 0x20, 0x03, - 0x28, 0x02, 0x52, 0x0a, 0x65, 0x6d, 0x62, 0x65, 0x64, 0x64, 0x69, 0x6e, 0x67, 0x73, 0x22, 0x5b, - 0x0a, 0x11, 0x54, 0x72, 0x61, 0x6e, 0x73, 0x63, 0x72, 0x69, 0x70, 0x74, 0x52, 0x65, 0x71, 0x75, - 0x65, 0x73, 0x74, 0x12, 0x10, 0x0a, 0x03, 0x64, 0x73, 0x74, 0x18, 0x02, 0x20, 0x01, 0x28, 0x09, - 0x52, 0x03, 0x64, 0x73, 0x74, 0x12, 0x1a, 0x0a, 0x08, 0x6c, 0x61, 0x6e, 0x67, 0x75, 0x61, 0x67, - 0x65, 0x18, 0x03, 0x20, 0x01, 0x28, 0x09, 0x52, 0x08, 0x6c, 0x61, 0x6e, 0x67, 0x75, 0x61, 0x67, - 0x65, 0x12, 0x18, 0x0a, 0x07, 0x74, 0x68, 0x72, 0x65, 0x61, 0x64, 0x73, 0x18, 0x04, 0x20, 0x01, - 0x28, 0x0d, 0x52, 0x07, 0x74, 0x68, 0x72, 0x65, 0x61, 0x64, 0x73, 0x22, 0x5e, 0x0a, 0x10, 0x54, - 0x72, 0x61, 0x6e, 0x73, 0x63, 0x72, 0x69, 0x70, 0x74, 0x52, 0x65, 0x73, 0x75, 0x6c, 0x74, 0x12, - 0x36, 0x0a, 0x08, 0x73, 0x65, 0x67, 0x6d, 0x65, 0x6e, 0x74, 0x73, 0x18, 0x01, 0x20, 0x03, 0x28, - 0x0b, 0x32, 0x1a, 0x2e, 0x62, 0x61, 0x63, 0x6b, 0x65, 0x6e, 0x64, 0x2e, 0x54, 0x72, 0x61, 0x6e, - 0x73, 0x63, 0x72, 0x69, 0x70, 0x74, 0x53, 0x65, 0x67, 0x6d, 0x65, 0x6e, 0x74, 0x52, 0x08, 0x73, - 0x65, 0x67, 0x6d, 0x65, 0x6e, 0x74, 0x73, 0x12, 0x12, 0x0a, 0x04, 0x74, 0x65, 0x78, 0x74, 0x18, - 0x02, 0x20, 0x01, 0x28, 0x09, 0x52, 0x04, 0x74, 0x65, 0x78, 0x74, 0x22, 0x77, 0x0a, 0x11, 0x54, - 0x72, 0x61, 0x6e, 0x73, 0x63, 0x72, 0x69, 0x70, 0x74, 0x53, 0x65, 0x67, 0x6d, 0x65, 0x6e, 0x74, - 0x12, 0x0e, 0x0a, 0x02, 0x69, 0x64, 0x18, 0x01, 0x20, 0x01, 0x28, 0x05, 0x52, 0x02, 0x69, 0x64, - 0x12, 0x14, 0x0a, 0x05, 0x73, 0x74, 0x61, 0x72, 0x74, 0x18, 0x02, 0x20, 0x01, 0x28, 0x03, 0x52, - 0x05, 0x73, 0x74, 0x61, 0x72, 0x74, 0x12, 0x10, 0x0a, 0x03, 0x65, 0x6e, 0x64, 0x18, 0x03, 0x20, - 0x01, 0x28, 0x03, 0x52, 0x03, 0x65, 0x6e, 0x64, 0x12, 0x12, 0x0a, 0x04, 0x74, 0x65, 0x78, 0x74, - 0x18, 0x04, 0x20, 0x01, 0x28, 0x09, 0x52, 0x04, 0x74, 0x65, 0x78, 0x74, 0x12, 0x16, 0x0a, 0x06, - 0x74, 0x6f, 0x6b, 0x65, 0x6e, 0x73, 0x18, 0x05, 0x20, 0x03, 0x28, 0x05, 0x52, 0x06, 0x74, 0x6f, - 0x6b, 0x65, 0x6e, 0x73, 0x22, 0xe4, 0x01, 0x0a, 0x14, 0x47, 0x65, 0x6e, 0x65, 0x72, 0x61, 0x74, - 0x65, 0x49, 0x6d, 0x61, 0x67, 0x65, 0x52, 0x65, 0x71, 0x75, 0x65, 0x73, 0x74, 0x12, 0x16, 0x0a, - 0x06, 0x68, 0x65, 0x69, 0x67, 0x68, 0x74, 0x18, 0x01, 0x20, 0x01, 0x28, 0x05, 0x52, 0x06, 0x68, - 0x65, 0x69, 0x67, 0x68, 0x74, 0x12, 0x14, 0x0a, 0x05, 0x77, 0x69, 0x64, 0x74, 0x68, 0x18, 0x02, - 0x20, 0x01, 0x28, 0x05, 0x52, 0x05, 0x77, 0x69, 0x64, 0x74, 0x68, 0x12, 0x12, 0x0a, 0x04, 0x6d, - 0x6f, 0x64, 0x65, 0x18, 0x03, 0x20, 0x01, 0x28, 0x05, 0x52, 0x04, 0x6d, 0x6f, 0x64, 0x65, 0x12, - 0x12, 0x0a, 0x04, 0x73, 0x74, 0x65, 0x70, 0x18, 0x04, 0x20, 0x01, 0x28, 0x05, 0x52, 0x04, 0x73, - 0x74, 0x65, 0x70, 0x12, 0x12, 0x0a, 0x04, 0x73, 0x65, 0x65, 0x64, 0x18, 0x05, 0x20, 0x01, 0x28, - 0x05, 0x52, 0x04, 0x73, 0x65, 0x65, 0x64, 0x12, 0x27, 0x0a, 0x0f, 0x70, 0x6f, 0x73, 0x69, 0x74, - 0x69, 0x76, 0x65, 0x5f, 0x70, 0x72, 0x6f, 0x6d, 0x70, 0x74, 0x18, 0x06, 0x20, 0x01, 0x28, 0x09, - 0x52, 0x0e, 0x70, 0x6f, 0x73, 0x69, 0x74, 0x69, 0x76, 0x65, 0x50, 0x72, 0x6f, 0x6d, 0x70, 0x74, - 0x12, 0x27, 0x0a, 0x0f, 0x6e, 0x65, 0x67, 0x61, 0x74, 0x69, 0x76, 0x65, 0x5f, 0x70, 0x72, 0x6f, - 0x6d, 0x70, 0x74, 0x18, 0x07, 0x20, 0x01, 0x28, 0x09, 0x52, 0x0e, 0x6e, 0x65, 0x67, 0x61, 0x74, - 0x69, 0x76, 0x65, 0x50, 0x72, 0x6f, 0x6d, 0x70, 0x74, 0x12, 0x10, 0x0a, 0x03, 0x64, 0x73, 0x74, - 0x18, 0x08, 0x20, 0x01, 0x28, 0x09, 0x52, 0x03, 0x64, 0x73, 0x74, 0x22, 0x48, 0x0a, 0x0a, 0x54, - 0x54, 0x53, 0x52, 0x65, 0x71, 0x75, 0x65, 0x73, 0x74, 0x12, 0x12, 0x0a, 0x04, 0x74, 0x65, 0x78, - 0x74, 0x18, 0x01, 0x20, 0x01, 0x28, 0x09, 0x52, 0x04, 0x74, 0x65, 0x78, 0x74, 0x12, 0x14, 0x0a, - 0x05, 0x6d, 0x6f, 0x64, 0x65, 0x6c, 0x18, 0x02, 0x20, 0x01, 0x28, 0x09, 0x52, 0x05, 0x6d, 0x6f, - 0x64, 0x65, 0x6c, 0x12, 0x10, 0x0a, 0x03, 0x64, 0x73, 0x74, 0x18, 0x03, 0x20, 0x01, 0x28, 0x09, - 0x52, 0x03, 0x64, 0x73, 0x74, 0x32, 0xeb, 0x03, 0x0a, 0x07, 0x42, 0x61, 0x63, 0x6b, 0x65, 0x6e, - 0x64, 0x12, 0x32, 0x0a, 0x06, 0x48, 0x65, 0x61, 0x6c, 0x74, 0x68, 0x12, 0x16, 0x2e, 0x62, 0x61, - 0x63, 0x6b, 0x65, 0x6e, 0x64, 0x2e, 0x48, 0x65, 0x61, 0x6c, 0x74, 0x68, 0x4d, 0x65, 0x73, 0x73, - 0x61, 0x67, 0x65, 0x1a, 0x0e, 0x2e, 0x62, 0x61, 0x63, 0x6b, 0x65, 0x6e, 0x64, 0x2e, 0x52, 0x65, - 0x70, 0x6c, 0x79, 0x22, 0x00, 0x12, 0x34, 0x0a, 0x07, 0x50, 0x72, 0x65, 0x64, 0x69, 0x63, 0x74, - 0x12, 0x17, 0x2e, 0x62, 0x61, 0x63, 0x6b, 0x65, 0x6e, 0x64, 0x2e, 0x50, 0x72, 0x65, 0x64, 0x69, - 0x63, 0x74, 0x4f, 0x70, 0x74, 0x69, 0x6f, 0x6e, 0x73, 0x1a, 0x0e, 0x2e, 0x62, 0x61, 0x63, 0x6b, - 0x65, 0x6e, 0x64, 0x2e, 0x52, 0x65, 0x70, 0x6c, 0x79, 0x22, 0x00, 0x12, 0x35, 0x0a, 0x09, 0x4c, - 0x6f, 0x61, 0x64, 0x4d, 0x6f, 0x64, 0x65, 0x6c, 0x12, 0x15, 0x2e, 0x62, 0x61, 0x63, 0x6b, 0x65, - 0x6e, 0x64, 0x2e, 0x4d, 0x6f, 0x64, 0x65, 0x6c, 0x4f, 0x70, 0x74, 0x69, 0x6f, 0x6e, 0x73, 0x1a, - 0x0f, 0x2e, 0x62, 0x61, 0x63, 0x6b, 0x65, 0x6e, 0x64, 0x2e, 0x52, 0x65, 0x73, 0x75, 0x6c, 0x74, - 0x22, 0x00, 0x12, 0x3c, 0x0a, 0x0d, 0x50, 0x72, 0x65, 0x64, 0x69, 0x63, 0x74, 0x53, 0x74, 0x72, - 0x65, 0x61, 0x6d, 0x12, 0x17, 0x2e, 0x62, 0x61, 0x63, 0x6b, 0x65, 0x6e, 0x64, 0x2e, 0x50, 0x72, - 0x65, 0x64, 0x69, 0x63, 0x74, 0x4f, 0x70, 0x74, 0x69, 0x6f, 0x6e, 0x73, 0x1a, 0x0e, 0x2e, 0x62, - 0x61, 0x63, 0x6b, 0x65, 0x6e, 0x64, 0x2e, 0x52, 0x65, 0x70, 0x6c, 0x79, 0x22, 0x00, 0x30, 0x01, - 0x12, 0x40, 0x0a, 0x09, 0x45, 0x6d, 0x62, 0x65, 0x64, 0x64, 0x69, 0x6e, 0x67, 0x12, 0x17, 0x2e, - 0x62, 0x61, 0x63, 0x6b, 0x65, 0x6e, 0x64, 0x2e, 0x50, 0x72, 0x65, 0x64, 0x69, 0x63, 0x74, 0x4f, - 0x70, 0x74, 0x69, 0x6f, 0x6e, 0x73, 0x1a, 0x18, 0x2e, 0x62, 0x61, 0x63, 0x6b, 0x65, 0x6e, 0x64, - 0x2e, 0x45, 0x6d, 0x62, 0x65, 0x64, 0x64, 0x69, 0x6e, 0x67, 0x52, 0x65, 0x73, 0x75, 0x6c, 0x74, - 0x22, 0x00, 0x12, 0x41, 0x0a, 0x0d, 0x47, 0x65, 0x6e, 0x65, 0x72, 0x61, 0x74, 0x65, 0x49, 0x6d, - 0x61, 0x67, 0x65, 0x12, 0x1d, 0x2e, 0x62, 0x61, 0x63, 0x6b, 0x65, 0x6e, 0x64, 0x2e, 0x47, 0x65, - 0x6e, 0x65, 0x72, 0x61, 0x74, 0x65, 0x49, 0x6d, 0x61, 0x67, 0x65, 0x52, 0x65, 0x71, 0x75, 0x65, - 0x73, 0x74, 0x1a, 0x0f, 0x2e, 0x62, 0x61, 0x63, 0x6b, 0x65, 0x6e, 0x64, 0x2e, 0x52, 0x65, 0x73, - 0x75, 0x6c, 0x74, 0x22, 0x00, 0x12, 0x4d, 0x0a, 0x12, 0x41, 0x75, 0x64, 0x69, 0x6f, 0x54, 0x72, - 0x61, 0x6e, 0x73, 0x63, 0x72, 0x69, 0x70, 0x74, 0x69, 0x6f, 0x6e, 0x12, 0x1a, 0x2e, 0x62, 0x61, + 0x41, 0x18, 0x14, 0x20, 0x01, 0x28, 0x05, 0x52, 0x04, 0x4e, 0x47, 0x51, 0x41, 0x12, 0x1c, 0x0a, + 0x09, 0x4d, 0x6f, 0x64, 0x65, 0x6c, 0x46, 0x69, 0x6c, 0x65, 0x18, 0x15, 0x20, 0x01, 0x28, 0x09, + 0x52, 0x09, 0x4d, 0x6f, 0x64, 0x65, 0x6c, 0x46, 0x69, 0x6c, 0x65, 0x12, 0x16, 0x0a, 0x06, 0x44, + 0x65, 0x76, 0x69, 0x63, 0x65, 0x18, 0x16, 0x20, 0x01, 0x28, 0x09, 0x52, 0x06, 0x44, 0x65, 0x76, + 0x69, 0x63, 0x65, 0x12, 0x1c, 0x0a, 0x09, 0x55, 0x73, 0x65, 0x54, 0x72, 0x69, 0x74, 0x6f, 0x6e, + 0x18, 0x17, 0x20, 0x01, 0x28, 0x08, 0x52, 0x09, 0x55, 0x73, 0x65, 0x54, 0x72, 0x69, 0x74, 0x6f, + 0x6e, 0x12, 0x24, 0x0a, 0x0d, 0x4d, 0x6f, 0x64, 0x65, 0x6c, 0x42, 0x61, 0x73, 0x65, 0x4e, 0x61, + 0x6d, 0x65, 0x18, 0x18, 0x20, 0x01, 0x28, 0x09, 0x52, 0x0d, 0x4d, 0x6f, 0x64, 0x65, 0x6c, 0x42, + 0x61, 0x73, 0x65, 0x4e, 0x61, 0x6d, 0x65, 0x22, 0x3c, 0x0a, 0x06, 0x52, 0x65, 0x73, 0x75, 0x6c, + 0x74, 0x12, 0x18, 0x0a, 0x07, 0x6d, 0x65, 0x73, 0x73, 0x61, 0x67, 0x65, 0x18, 0x01, 0x20, 0x01, + 0x28, 0x09, 0x52, 0x07, 0x6d, 0x65, 0x73, 0x73, 0x61, 0x67, 0x65, 0x12, 0x18, 0x0a, 0x07, 0x73, + 0x75, 0x63, 0x63, 0x65, 0x73, 0x73, 0x18, 0x02, 0x20, 0x01, 0x28, 0x08, 0x52, 0x07, 0x73, 0x75, + 0x63, 0x63, 0x65, 0x73, 0x73, 0x22, 0x31, 0x0a, 0x0f, 0x45, 0x6d, 0x62, 0x65, 0x64, 0x64, 0x69, + 0x6e, 0x67, 0x52, 0x65, 0x73, 0x75, 0x6c, 0x74, 0x12, 0x1e, 0x0a, 0x0a, 0x65, 0x6d, 0x62, 0x65, + 0x64, 0x64, 0x69, 0x6e, 0x67, 0x73, 0x18, 0x01, 0x20, 0x03, 0x28, 0x02, 0x52, 0x0a, 0x65, 0x6d, + 0x62, 0x65, 0x64, 0x64, 0x69, 0x6e, 0x67, 0x73, 0x22, 0x5b, 0x0a, 0x11, 0x54, 0x72, 0x61, 0x6e, + 0x73, 0x63, 0x72, 0x69, 0x70, 0x74, 0x52, 0x65, 0x71, 0x75, 0x65, 0x73, 0x74, 0x12, 0x10, 0x0a, + 0x03, 0x64, 0x73, 0x74, 0x18, 0x02, 0x20, 0x01, 0x28, 0x09, 0x52, 0x03, 0x64, 0x73, 0x74, 0x12, + 0x1a, 0x0a, 0x08, 0x6c, 0x61, 0x6e, 0x67, 0x75, 0x61, 0x67, 0x65, 0x18, 0x03, 0x20, 0x01, 0x28, + 0x09, 0x52, 0x08, 0x6c, 0x61, 0x6e, 0x67, 0x75, 0x61, 0x67, 0x65, 0x12, 0x18, 0x0a, 0x07, 0x74, + 0x68, 0x72, 0x65, 0x61, 0x64, 0x73, 0x18, 0x04, 0x20, 0x01, 0x28, 0x0d, 0x52, 0x07, 0x74, 0x68, + 0x72, 0x65, 0x61, 0x64, 0x73, 0x22, 0x5e, 0x0a, 0x10, 0x54, 0x72, 0x61, 0x6e, 0x73, 0x63, 0x72, + 0x69, 0x70, 0x74, 0x52, 0x65, 0x73, 0x75, 0x6c, 0x74, 0x12, 0x36, 0x0a, 0x08, 0x73, 0x65, 0x67, + 0x6d, 0x65, 0x6e, 0x74, 0x73, 0x18, 0x01, 0x20, 0x03, 0x28, 0x0b, 0x32, 0x1a, 0x2e, 0x62, 0x61, 0x63, 0x6b, 0x65, 0x6e, 0x64, 0x2e, 0x54, 0x72, 0x61, 0x6e, 0x73, 0x63, 0x72, 0x69, 0x70, 0x74, - 0x52, 0x65, 0x71, 0x75, 0x65, 0x73, 0x74, 0x1a, 0x19, 0x2e, 0x62, 0x61, 0x63, 0x6b, 0x65, 0x6e, - 0x64, 0x2e, 0x54, 0x72, 0x61, 0x6e, 0x73, 0x63, 0x72, 0x69, 0x70, 0x74, 0x52, 0x65, 0x73, 0x75, - 0x6c, 0x74, 0x22, 0x00, 0x12, 0x2d, 0x0a, 0x03, 0x54, 0x54, 0x53, 0x12, 0x13, 0x2e, 0x62, 0x61, - 0x63, 0x6b, 0x65, 0x6e, 0x64, 0x2e, 0x54, 0x54, 0x53, 0x52, 0x65, 0x71, 0x75, 0x65, 0x73, 0x74, - 0x1a, 0x0f, 0x2e, 0x62, 0x61, 0x63, 0x6b, 0x65, 0x6e, 0x64, 0x2e, 0x52, 0x65, 0x73, 0x75, 0x6c, - 0x74, 0x22, 0x00, 0x42, 0x5a, 0x0a, 0x19, 0x69, 0x6f, 0x2e, 0x73, 0x6b, 0x79, 0x6e, 0x65, 0x74, - 0x2e, 0x6c, 0x6f, 0x63, 0x61, 0x6c, 0x61, 0x69, 0x2e, 0x62, 0x61, 0x63, 0x6b, 0x65, 0x6e, 0x64, - 0x42, 0x0e, 0x4c, 0x6f, 0x63, 0x61, 0x6c, 0x41, 0x49, 0x42, 0x61, 0x63, 0x6b, 0x65, 0x6e, 0x64, - 0x50, 0x01, 0x5a, 0x2b, 0x67, 0x69, 0x74, 0x68, 0x75, 0x62, 0x2e, 0x63, 0x6f, 0x6d, 0x2f, 0x67, - 0x6f, 0x2d, 0x73, 0x6b, 0x79, 0x6e, 0x65, 0x74, 0x2f, 0x4c, 0x6f, 0x63, 0x61, 0x6c, 0x41, 0x49, - 0x2f, 0x70, 0x6b, 0x67, 0x2f, 0x67, 0x72, 0x70, 0x63, 0x2f, 0x70, 0x72, 0x6f, 0x74, 0x6f, 0x62, - 0x06, 0x70, 0x72, 0x6f, 0x74, 0x6f, 0x33, + 0x53, 0x65, 0x67, 0x6d, 0x65, 0x6e, 0x74, 0x52, 0x08, 0x73, 0x65, 0x67, 0x6d, 0x65, 0x6e, 0x74, + 0x73, 0x12, 0x12, 0x0a, 0x04, 0x74, 0x65, 0x78, 0x74, 0x18, 0x02, 0x20, 0x01, 0x28, 0x09, 0x52, + 0x04, 0x74, 0x65, 0x78, 0x74, 0x22, 0x77, 0x0a, 0x11, 0x54, 0x72, 0x61, 0x6e, 0x73, 0x63, 0x72, + 0x69, 0x70, 0x74, 0x53, 0x65, 0x67, 0x6d, 0x65, 0x6e, 0x74, 0x12, 0x0e, 0x0a, 0x02, 0x69, 0x64, + 0x18, 0x01, 0x20, 0x01, 0x28, 0x05, 0x52, 0x02, 0x69, 0x64, 0x12, 0x14, 0x0a, 0x05, 0x73, 0x74, + 0x61, 0x72, 0x74, 0x18, 0x02, 0x20, 0x01, 0x28, 0x03, 0x52, 0x05, 0x73, 0x74, 0x61, 0x72, 0x74, + 0x12, 0x10, 0x0a, 0x03, 0x65, 0x6e, 0x64, 0x18, 0x03, 0x20, 0x01, 0x28, 0x03, 0x52, 0x03, 0x65, + 0x6e, 0x64, 0x12, 0x12, 0x0a, 0x04, 0x74, 0x65, 0x78, 0x74, 0x18, 0x04, 0x20, 0x01, 0x28, 0x09, + 0x52, 0x04, 0x74, 0x65, 0x78, 0x74, 0x12, 0x16, 0x0a, 0x06, 0x74, 0x6f, 0x6b, 0x65, 0x6e, 0x73, + 0x18, 0x05, 0x20, 0x03, 0x28, 0x05, 0x52, 0x06, 0x74, 0x6f, 0x6b, 0x65, 0x6e, 0x73, 0x22, 0xe4, + 0x01, 0x0a, 0x14, 0x47, 0x65, 0x6e, 0x65, 0x72, 0x61, 0x74, 0x65, 0x49, 0x6d, 0x61, 0x67, 0x65, + 0x52, 0x65, 0x71, 0x75, 0x65, 0x73, 0x74, 0x12, 0x16, 0x0a, 0x06, 0x68, 0x65, 0x69, 0x67, 0x68, + 0x74, 0x18, 0x01, 0x20, 0x01, 0x28, 0x05, 0x52, 0x06, 0x68, 0x65, 0x69, 0x67, 0x68, 0x74, 0x12, + 0x14, 0x0a, 0x05, 0x77, 0x69, 0x64, 0x74, 0x68, 0x18, 0x02, 0x20, 0x01, 0x28, 0x05, 0x52, 0x05, + 0x77, 0x69, 0x64, 0x74, 0x68, 0x12, 0x12, 0x0a, 0x04, 0x6d, 0x6f, 0x64, 0x65, 0x18, 0x03, 0x20, + 0x01, 0x28, 0x05, 0x52, 0x04, 0x6d, 0x6f, 0x64, 0x65, 0x12, 0x12, 0x0a, 0x04, 0x73, 0x74, 0x65, + 0x70, 0x18, 0x04, 0x20, 0x01, 0x28, 0x05, 0x52, 0x04, 0x73, 0x74, 0x65, 0x70, 0x12, 0x12, 0x0a, + 0x04, 0x73, 0x65, 0x65, 0x64, 0x18, 0x05, 0x20, 0x01, 0x28, 0x05, 0x52, 0x04, 0x73, 0x65, 0x65, + 0x64, 0x12, 0x27, 0x0a, 0x0f, 0x70, 0x6f, 0x73, 0x69, 0x74, 0x69, 0x76, 0x65, 0x5f, 0x70, 0x72, + 0x6f, 0x6d, 0x70, 0x74, 0x18, 0x06, 0x20, 0x01, 0x28, 0x09, 0x52, 0x0e, 0x70, 0x6f, 0x73, 0x69, + 0x74, 0x69, 0x76, 0x65, 0x50, 0x72, 0x6f, 0x6d, 0x70, 0x74, 0x12, 0x27, 0x0a, 0x0f, 0x6e, 0x65, + 0x67, 0x61, 0x74, 0x69, 0x76, 0x65, 0x5f, 0x70, 0x72, 0x6f, 0x6d, 0x70, 0x74, 0x18, 0x07, 0x20, + 0x01, 0x28, 0x09, 0x52, 0x0e, 0x6e, 0x65, 0x67, 0x61, 0x74, 0x69, 0x76, 0x65, 0x50, 0x72, 0x6f, + 0x6d, 0x70, 0x74, 0x12, 0x10, 0x0a, 0x03, 0x64, 0x73, 0x74, 0x18, 0x08, 0x20, 0x01, 0x28, 0x09, + 0x52, 0x03, 0x64, 0x73, 0x74, 0x22, 0x48, 0x0a, 0x0a, 0x54, 0x54, 0x53, 0x52, 0x65, 0x71, 0x75, + 0x65, 0x73, 0x74, 0x12, 0x12, 0x0a, 0x04, 0x74, 0x65, 0x78, 0x74, 0x18, 0x01, 0x20, 0x01, 0x28, + 0x09, 0x52, 0x04, 0x74, 0x65, 0x78, 0x74, 0x12, 0x14, 0x0a, 0x05, 0x6d, 0x6f, 0x64, 0x65, 0x6c, + 0x18, 0x02, 0x20, 0x01, 0x28, 0x09, 0x52, 0x05, 0x6d, 0x6f, 0x64, 0x65, 0x6c, 0x12, 0x10, 0x0a, + 0x03, 0x64, 0x73, 0x74, 0x18, 0x03, 0x20, 0x01, 0x28, 0x09, 0x52, 0x03, 0x64, 0x73, 0x74, 0x32, + 0xeb, 0x03, 0x0a, 0x07, 0x42, 0x61, 0x63, 0x6b, 0x65, 0x6e, 0x64, 0x12, 0x32, 0x0a, 0x06, 0x48, + 0x65, 0x61, 0x6c, 0x74, 0x68, 0x12, 0x16, 0x2e, 0x62, 0x61, 0x63, 0x6b, 0x65, 0x6e, 0x64, 0x2e, + 0x48, 0x65, 0x61, 0x6c, 0x74, 0x68, 0x4d, 0x65, 0x73, 0x73, 0x61, 0x67, 0x65, 0x1a, 0x0e, 0x2e, + 0x62, 0x61, 0x63, 0x6b, 0x65, 0x6e, 0x64, 0x2e, 0x52, 0x65, 0x70, 0x6c, 0x79, 0x22, 0x00, 0x12, + 0x34, 0x0a, 0x07, 0x50, 0x72, 0x65, 0x64, 0x69, 0x63, 0x74, 0x12, 0x17, 0x2e, 0x62, 0x61, 0x63, + 0x6b, 0x65, 0x6e, 0x64, 0x2e, 0x50, 0x72, 0x65, 0x64, 0x69, 0x63, 0x74, 0x4f, 0x70, 0x74, 0x69, + 0x6f, 0x6e, 0x73, 0x1a, 0x0e, 0x2e, 0x62, 0x61, 0x63, 0x6b, 0x65, 0x6e, 0x64, 0x2e, 0x52, 0x65, + 0x70, 0x6c, 0x79, 0x22, 0x00, 0x12, 0x35, 0x0a, 0x09, 0x4c, 0x6f, 0x61, 0x64, 0x4d, 0x6f, 0x64, + 0x65, 0x6c, 0x12, 0x15, 0x2e, 0x62, 0x61, 0x63, 0x6b, 0x65, 0x6e, 0x64, 0x2e, 0x4d, 0x6f, 0x64, + 0x65, 0x6c, 0x4f, 0x70, 0x74, 0x69, 0x6f, 0x6e, 0x73, 0x1a, 0x0f, 0x2e, 0x62, 0x61, 0x63, 0x6b, + 0x65, 0x6e, 0x64, 0x2e, 0x52, 0x65, 0x73, 0x75, 0x6c, 0x74, 0x22, 0x00, 0x12, 0x3c, 0x0a, 0x0d, + 0x50, 0x72, 0x65, 0x64, 0x69, 0x63, 0x74, 0x53, 0x74, 0x72, 0x65, 0x61, 0x6d, 0x12, 0x17, 0x2e, + 0x62, 0x61, 0x63, 0x6b, 0x65, 0x6e, 0x64, 0x2e, 0x50, 0x72, 0x65, 0x64, 0x69, 0x63, 0x74, 0x4f, + 0x70, 0x74, 0x69, 0x6f, 0x6e, 0x73, 0x1a, 0x0e, 0x2e, 0x62, 0x61, 0x63, 0x6b, 0x65, 0x6e, 0x64, + 0x2e, 0x52, 0x65, 0x70, 0x6c, 0x79, 0x22, 0x00, 0x30, 0x01, 0x12, 0x40, 0x0a, 0x09, 0x45, 0x6d, + 0x62, 0x65, 0x64, 0x64, 0x69, 0x6e, 0x67, 0x12, 0x17, 0x2e, 0x62, 0x61, 0x63, 0x6b, 0x65, 0x6e, + 0x64, 0x2e, 0x50, 0x72, 0x65, 0x64, 0x69, 0x63, 0x74, 0x4f, 0x70, 0x74, 0x69, 0x6f, 0x6e, 0x73, + 0x1a, 0x18, 0x2e, 0x62, 0x61, 0x63, 0x6b, 0x65, 0x6e, 0x64, 0x2e, 0x45, 0x6d, 0x62, 0x65, 0x64, + 0x64, 0x69, 0x6e, 0x67, 0x52, 0x65, 0x73, 0x75, 0x6c, 0x74, 0x22, 0x00, 0x12, 0x41, 0x0a, 0x0d, + 0x47, 0x65, 0x6e, 0x65, 0x72, 0x61, 0x74, 0x65, 0x49, 0x6d, 0x61, 0x67, 0x65, 0x12, 0x1d, 0x2e, + 0x62, 0x61, 0x63, 0x6b, 0x65, 0x6e, 0x64, 0x2e, 0x47, 0x65, 0x6e, 0x65, 0x72, 0x61, 0x74, 0x65, + 0x49, 0x6d, 0x61, 0x67, 0x65, 0x52, 0x65, 0x71, 0x75, 0x65, 0x73, 0x74, 0x1a, 0x0f, 0x2e, 0x62, + 0x61, 0x63, 0x6b, 0x65, 0x6e, 0x64, 0x2e, 0x52, 0x65, 0x73, 0x75, 0x6c, 0x74, 0x22, 0x00, 0x12, + 0x4d, 0x0a, 0x12, 0x41, 0x75, 0x64, 0x69, 0x6f, 0x54, 0x72, 0x61, 0x6e, 0x73, 0x63, 0x72, 0x69, + 0x70, 0x74, 0x69, 0x6f, 0x6e, 0x12, 0x1a, 0x2e, 0x62, 0x61, 0x63, 0x6b, 0x65, 0x6e, 0x64, 0x2e, + 0x54, 0x72, 0x61, 0x6e, 0x73, 0x63, 0x72, 0x69, 0x70, 0x74, 0x52, 0x65, 0x71, 0x75, 0x65, 0x73, + 0x74, 0x1a, 0x19, 0x2e, 0x62, 0x61, 0x63, 0x6b, 0x65, 0x6e, 0x64, 0x2e, 0x54, 0x72, 0x61, 0x6e, + 0x73, 0x63, 0x72, 0x69, 0x70, 0x74, 0x52, 0x65, 0x73, 0x75, 0x6c, 0x74, 0x22, 0x00, 0x12, 0x2d, + 0x0a, 0x03, 0x54, 0x54, 0x53, 0x12, 0x13, 0x2e, 0x62, 0x61, 0x63, 0x6b, 0x65, 0x6e, 0x64, 0x2e, + 0x54, 0x54, 0x53, 0x52, 0x65, 0x71, 0x75, 0x65, 0x73, 0x74, 0x1a, 0x0f, 0x2e, 0x62, 0x61, 0x63, + 0x6b, 0x65, 0x6e, 0x64, 0x2e, 0x52, 0x65, 0x73, 0x75, 0x6c, 0x74, 0x22, 0x00, 0x42, 0x5a, 0x0a, + 0x19, 0x69, 0x6f, 0x2e, 0x73, 0x6b, 0x79, 0x6e, 0x65, 0x74, 0x2e, 0x6c, 0x6f, 0x63, 0x61, 0x6c, + 0x61, 0x69, 0x2e, 0x62, 0x61, 0x63, 0x6b, 0x65, 0x6e, 0x64, 0x42, 0x0e, 0x4c, 0x6f, 0x63, 0x61, + 0x6c, 0x41, 0x49, 0x42, 0x61, 0x63, 0x6b, 0x65, 0x6e, 0x64, 0x50, 0x01, 0x5a, 0x2b, 0x67, 0x69, + 0x74, 0x68, 0x75, 0x62, 0x2e, 0x63, 0x6f, 0x6d, 0x2f, 0x67, 0x6f, 0x2d, 0x73, 0x6b, 0x79, 0x6e, + 0x65, 0x74, 0x2f, 0x4c, 0x6f, 0x63, 0x61, 0x6c, 0x41, 0x49, 0x2f, 0x70, 0x6b, 0x67, 0x2f, 0x67, + 0x72, 0x70, 0x63, 0x2f, 0x70, 0x72, 0x6f, 0x74, 0x6f, 0x62, 0x06, 0x70, 0x72, 0x6f, 0x74, 0x6f, + 0x33, } var ( diff --git a/pkg/grpc/proto/backend.proto b/pkg/grpc/proto/backend.proto index c17a7644..fb8f623c 100644 --- a/pkg/grpc/proto/backend.proto +++ b/pkg/grpc/proto/backend.proto @@ -89,6 +89,12 @@ message ModelOptions { float RopeFreqScale = 18; float RMSNormEps = 19; int32 NGQA = 20; + string ModelFile = 21; + + // AutoGPTQ + string Device = 22; + bool UseTriton = 23; + string ModelBaseName = 24; } message Result { diff --git a/pkg/grpc/transcribe/whisper.go b/pkg/grpc/transcribe/whisper.go index c0120dbd..3d25b050 100644 --- a/pkg/grpc/transcribe/whisper.go +++ b/pkg/grpc/transcribe/whisper.go @@ -17,7 +17,7 @@ type Whisper struct { func (sd *Whisper) Load(opts *pb.ModelOptions) error { // Note: the Model here is a path to a directory containing the model files - w, err := whisper.New(opts.Model) + w, err := whisper.New(opts.ModelFile) sd.whisper = w return err } diff --git a/pkg/grpc/tts/piper.go b/pkg/grpc/tts/piper.go index 3bc85e0d..e5e99fc8 100644 --- a/pkg/grpc/tts/piper.go +++ b/pkg/grpc/tts/piper.go @@ -18,8 +18,8 @@ type Piper struct { } func (sd *Piper) Load(opts *pb.ModelOptions) error { - if filepath.Ext(opts.Model) != ".onnx" { - return fmt.Errorf("unsupported model type %s (should end with .onnx)", opts.Model) + if filepath.Ext(opts.ModelFile) != ".onnx" { + return fmt.Errorf("unsupported model type %s (should end with .onnx)", opts.ModelFile) } var err error // Note: the Model here is a path to a directory containing the model files diff --git a/pkg/model/initializers.go b/pkg/model/initializers.go index 75644b28..46475f97 100644 --- a/pkg/model/initializers.go +++ b/pkg/model/initializers.go @@ -124,8 +124,8 @@ func (ml *ModelLoader) startProcess(grpcProcess, id string, serverAddress string // starts the grpcModelProcess for the backend, and returns a grpc client // It also loads the model -func (ml *ModelLoader) grpcModel(backend string, o *Options) func(string) (*grpc.Client, error) { - return func(s string) (*grpc.Client, error) { +func (ml *ModelLoader) grpcModel(backend string, o *Options) func(string, string) (*grpc.Client, error) { + return func(modelName, modelFile string) (*grpc.Client, error) { log.Debug().Msgf("Loading GRPC Model %s: %+v", backend, *o) var client *grpc.Client @@ -148,7 +148,7 @@ func (ml *ModelLoader) grpcModel(backend string, o *Options) func(string) (*grpc return nil, fmt.Errorf("failed allocating free ports: %s", err.Error()) } // Make sure the process is executable - if err := ml.startProcess(uri, o.modelFile, serverAddress); err != nil { + if err := ml.startProcess(uri, o.model, serverAddress); err != nil { return nil, err } @@ -172,7 +172,7 @@ func (ml *ModelLoader) grpcModel(backend string, o *Options) func(string) (*grpc } // Make sure the process is executable - if err := ml.startProcess(grpcProcess, o.modelFile, serverAddress); err != nil { + if err := ml.startProcess(grpcProcess, o.model, serverAddress); err != nil { return nil, err } @@ -198,7 +198,8 @@ func (ml *ModelLoader) grpcModel(backend string, o *Options) func(string) (*grpc } options := *o.gRPCOptions - options.Model = s + options.Model = modelName + options.ModelFile = modelFile log.Debug().Msgf("GRPC: Loading model with options: %+v", options) @@ -217,14 +218,14 @@ func (ml *ModelLoader) grpcModel(backend string, o *Options) func(string) (*grpc func (ml *ModelLoader) BackendLoader(opts ...Option) (model *grpc.Client, err error) { o := NewOptions(opts...) - log.Debug().Msgf("Loading model %s from %s", o.backendString, o.modelFile) + log.Debug().Msgf("Loading model %s from %s", o.backendString, o.model) backend := strings.ToLower(o.backendString) // if an external backend is provided, use it _, externalBackendExists := o.externalBackends[backend] if externalBackendExists { - return ml.LoadModel(o.modelFile, ml.grpcModel(backend, o)) + return ml.LoadModel(o.model, ml.grpcModel(backend, o)) } switch backend { @@ -232,13 +233,13 @@ func (ml *ModelLoader) BackendLoader(opts ...Option) (model *grpc.Client, err er MPTBackend, Gpt2Backend, FalconBackend, GPTNeoXBackend, ReplitBackend, StarcoderBackend, BloomzBackend, RwkvBackend, LCHuggingFaceBackend, BertEmbeddingsBackend, FalconGGMLBackend, StableDiffusionBackend, WhisperBackend: - return ml.LoadModel(o.modelFile, ml.grpcModel(backend, o)) + return ml.LoadModel(o.model, ml.grpcModel(backend, o)) case Gpt4AllLlamaBackend, Gpt4AllMptBackend, Gpt4AllJBackend, Gpt4All: o.gRPCOptions.LibrarySearchPath = filepath.Join(o.assetDir, "backend-assets", "gpt4all") - return ml.LoadModel(o.modelFile, ml.grpcModel(Gpt4All, o)) + return ml.LoadModel(o.model, ml.grpcModel(Gpt4All, o)) case PiperBackend: o.gRPCOptions.LibrarySearchPath = filepath.Join(o.assetDir, "backend-assets", "espeak-ng-data") - return ml.LoadModel(o.modelFile, ml.grpcModel(PiperBackend, o)) + return ml.LoadModel(o.model, ml.grpcModel(PiperBackend, o)) default: return nil, fmt.Errorf("backend unsupported: %s", o.backendString) } @@ -249,8 +250,8 @@ func (ml *ModelLoader) GreedyLoader(opts ...Option) (*grpc.Client, error) { // Is this really needed? BackendLoader already does this ml.mu.Lock() - if m := ml.checkIsLoaded(o.modelFile); m != nil { - log.Debug().Msgf("Model '%s' already loaded", o.modelFile) + if m := ml.checkIsLoaded(o.model); m != nil { + log.Debug().Msgf("Model '%s' already loaded", o.model) ml.mu.Unlock() return m, nil } @@ -263,13 +264,13 @@ func (ml *ModelLoader) GreedyLoader(opts ...Option) (*grpc.Client, error) { for _, b := range o.externalBackends { allBackendsToAutoLoad = append(allBackendsToAutoLoad, b) } - log.Debug().Msgf("Loading model '%s' greedly from all the available backends: %s", o.modelFile, strings.Join(allBackendsToAutoLoad, ", ")) + log.Debug().Msgf("Loading model '%s' greedly from all the available backends: %s", o.model, strings.Join(allBackendsToAutoLoad, ", ")) for _, b := range allBackendsToAutoLoad { log.Debug().Msgf("[%s] Attempting to load", b) options := []Option{ WithBackendString(b), - WithModelFile(o.modelFile), + WithModel(o.model), WithLoadGRPCLLMModelOpts(o.gRPCOptions), WithThreads(o.threads), WithAssetDir(o.assetDir), diff --git a/pkg/model/loader.go b/pkg/model/loader.go index a4d86dbc..b45a52c8 100644 --- a/pkg/model/loader.go +++ b/pkg/model/loader.go @@ -98,7 +98,7 @@ func (ml *ModelLoader) ListModels() ([]string, error) { return models, nil } -func (ml *ModelLoader) LoadModel(modelName string, loader func(string) (*grpc.Client, error)) (*grpc.Client, error) { +func (ml *ModelLoader) LoadModel(modelName string, loader func(string, string) (*grpc.Client, error)) (*grpc.Client, error) { ml.mu.Lock() defer ml.mu.Unlock() @@ -111,7 +111,7 @@ func (ml *ModelLoader) LoadModel(modelName string, loader func(string) (*grpc.Cl modelFile := filepath.Join(ml.ModelPath, modelName) log.Debug().Msgf("Loading model in memory from file: %s", modelFile) - model, err := loader(modelFile) + model, err := loader(modelName, modelFile) if err != nil { return nil, err } diff --git a/pkg/model/options.go b/pkg/model/options.go index 466e9c2f..646a5223 100644 --- a/pkg/model/options.go +++ b/pkg/model/options.go @@ -8,7 +8,7 @@ import ( type Options struct { backendString string - modelFile string + model string threads uint32 assetDir string context context.Context @@ -35,9 +35,9 @@ func WithBackendString(backend string) Option { } } -func WithModelFile(modelFile string) Option { +func WithModel(modelFile string) Option { return func(o *Options) { - o.modelFile = modelFile + o.model = modelFile } }