LocalAI/pkg/grpc/proto/backend.proto

syntax = "proto3";

option go_package = "github.com/go-skynet/LocalAI/pkg/grpc/proto";
option java_multiple_files = true;
option java_package = "io.skynet.localai.backend";
option java_outer_classname = "LocalAIBackend";

package backend;

service Backend {
  rpc Health(HealthMessage) returns (Reply) {}
  rpc Predict(PredictOptions) returns (Reply) {}
  rpc LoadModel(ModelOptions) returns (Result) {}
  rpc PredictStream(PredictOptions) returns (stream Reply) {}
  rpc Embedding(PredictOptions) returns (EmbeddingResult) {}
  rpc GenerateImage(GenerateImageRequest) returns (Result) {}
  rpc AudioTranscription(TranscriptRequest) returns (TranscriptResult) {}
  rpc TTS(TTSRequest) returns (Result) {}
}

message HealthMessage {}

// The request message containing the user's name.
message PredictOptions {
  string Prompt = 1;
  int32 Seed = 2;
  int32 Threads = 3;
  int32 Tokens = 4;
  int32 TopK = 5;
  int32 Repeat = 6;
  int32 Batch = 7;
  int32 NKeep = 8;
  float Temperature = 9;
  float Penalty = 10;
  bool F16KV = 11;
  bool DebugMode = 12;
  repeated string StopPrompts = 13;
  bool IgnoreEOS = 14;
  float TailFreeSamplingZ = 15;
  float TypicalP = 16;
  float FrequencyPenalty = 17;
  float PresencePenalty = 18;
  int32 Mirostat = 19;
  float MirostatETA = 20;
  float MirostatTAU = 21;
  bool PenalizeNL = 22;
  string LogitBias = 23;
  bool MLock = 25;
  bool MMap = 26;
  bool PromptCacheAll = 27;
  bool PromptCacheRO = 28;
  string Grammar = 29;
  string MainGPU = 30;
  string TensorSplit = 31;
  float TopP = 32;
  string PromptCachePath = 33;
  bool Debug = 34;
  repeated int32 EmbeddingTokens = 35;
  string Embeddings = 36;
  float RopeFreqBase = 37;
  float RopeFreqScale = 38;
  float NegativePromptScale = 39;
  string NegativePrompt = 40;
}

// The response message containing the result
message Reply {
  bytes message = 1;
}

message ModelOptions {
  string Model = 1;
  int32 ContextSize = 2;
  int32 Seed = 3;
  int32 NBatch = 4;
  bool F16Memory = 5;
  bool MLock = 6;
  bool MMap = 7;
  bool VocabOnly = 8;
  bool LowVRAM = 9;
  bool Embeddings = 10;
  bool NUMA = 11;
  int32 NGPULayers = 12;
  string MainGPU = 13;
  string TensorSplit = 14;
  int32 Threads = 15;
  string LibrarySearchPath = 16;
  float RopeFreqBase = 17;
  float RopeFreqScale = 18;
  float RMSNormEps = 19;
  int32 NGQA = 20;
  string ModelFile = 21;

  // AutoGPTQ
  string Device = 22;
  bool UseTriton = 23;
  string ModelBaseName = 24;
  bool UseFastTokenizer = 25;
}

message Result {
  string message = 1;
  bool success = 2;
}

message EmbeddingResult {
  repeated float embeddings = 1;
}

message TranscriptRequest {
  string dst = 2;
  string language = 3;
  uint32 threads = 4;
}

message TranscriptResult {
  repeated TranscriptSegment segments = 1;
  string text = 2;
}

message TranscriptSegment {
  int32 id = 1;
  int64 start = 2;
  int64 end = 3;
  string text = 4;
  repeated int32 tokens = 5;
}

message GenerateImageRequest {
  int32 height = 1;
  int32 width = 2;
  int32 mode = 3;
  int32 step = 4;
  int32 seed = 5;
  string positive_prompt = 6;
  string negative_prompt = 7;
  string dst = 8;
}

message TTSRequest {
  string text = 1;
  string model = 2;
  string dst = 3;
}
feat: add falcon ggllm via grpc client Signed-off-by: Ettore Di Giacinto <mudler@localai.io> 2023-07-14 23:19:43 +00:00			`syntax = "proto3";`

			`option go_package = "github.com/go-skynet/LocalAI/pkg/grpc/proto";`
			`option java_multiple_files = true;`
feat: move other backends to grpc This finally makes everything more consistent Signed-off-by: Ettore Di Giacinto <mudler@localai.io> 2023-07-14 23:19:43 +00:00			`option java_package = "io.skynet.localai.backend";`
			`option java_outer_classname = "LocalAIBackend";`
feat: add falcon ggllm via grpc client Signed-off-by: Ettore Di Giacinto <mudler@localai.io> 2023-07-14 23:19:43 +00:00
feat: move other backends to grpc This finally makes everything more consistent Signed-off-by: Ettore Di Giacinto <mudler@localai.io> 2023-07-14 23:19:43 +00:00			`package backend;`
feat: add falcon ggllm via grpc client Signed-off-by: Ettore Di Giacinto <mudler@localai.io> 2023-07-14 23:19:43 +00:00
feat: move other backends to grpc This finally makes everything more consistent Signed-off-by: Ettore Di Giacinto <mudler@localai.io> 2023-07-14 23:19:43 +00:00			`service Backend {`
feat: add falcon ggllm via grpc client Signed-off-by: Ettore Di Giacinto <mudler@localai.io> 2023-07-14 23:19:43 +00:00			`rpc Health(HealthMessage) returns (Reply) {}`
			`rpc Predict(PredictOptions) returns (Reply) {}`
			`rpc LoadModel(ModelOptions) returns (Result) {}`
			`rpc PredictStream(PredictOptions) returns (stream Reply) {}`
feat: move llama to a grpc Signed-off-by: Ettore Di Giacinto <mudler@localai.io> 2023-07-14 23:19:43 +00:00			`rpc Embedding(PredictOptions) returns (EmbeddingResult) {}`
feat: move other backends to grpc This finally makes everything more consistent Signed-off-by: Ettore Di Giacinto <mudler@localai.io> 2023-07-14 23:19:43 +00:00			`rpc GenerateImage(GenerateImageRequest) returns (Result) {}`
			`rpc AudioTranscription(TranscriptRequest) returns (TranscriptResult) {}`
			`rpc TTS(TTSRequest) returns (Result) {}`
feat: add falcon ggllm via grpc client Signed-off-by: Ettore Di Giacinto <mudler@localai.io> 2023-07-14 23:19:43 +00:00			`}`

			`message HealthMessage {}`

			`// The request message containing the user's name.`
			`message PredictOptions {`
			`string Prompt = 1;`
			`int32 Seed = 2;`
			`int32 Threads = 3;`
			`int32 Tokens = 4;`
			`int32 TopK = 5;`
			`int32 Repeat = 6;`
			`int32 Batch = 7;`
			`int32 NKeep = 8;`
			`float Temperature = 9;`
			`float Penalty = 10;`
			`bool F16KV = 11;`
			`bool DebugMode = 12;`
			`repeated string StopPrompts = 13;`
			`bool IgnoreEOS = 14;`
			`float TailFreeSamplingZ = 15;`
			`float TypicalP = 16;`
			`float FrequencyPenalty = 17;`
			`float PresencePenalty = 18;`
			`int32 Mirostat = 19;`
			`float MirostatETA = 20;`
			`float MirostatTAU = 21;`
			`bool PenalizeNL = 22;`
			`string LogitBias = 23;`
			`bool MLock = 25;`
			`bool MMap = 26;`
			`bool PromptCacheAll = 27;`
			`bool PromptCacheRO = 28;`
			`string Grammar = 29;`
			`string MainGPU = 30;`
			`string TensorSplit = 31;`
			`float TopP = 32;`
			`string PromptCachePath = 33;`
			`bool Debug = 34;`
feat: move llama to a grpc Signed-off-by: Ettore Di Giacinto <mudler@localai.io> 2023-07-14 23:19:43 +00:00			`repeated int32 EmbeddingTokens = 35;`
			`string Embeddings = 36;`
feat: add rope settings and negative prompt, drop grammar backend (#797) Signed-off-by: Ettore Di Giacinto <mudler@localai.io> 2023-07-25 17:05:27 +00:00			`float RopeFreqBase = 37;`
			`float RopeFreqScale = 38;`
			`float NegativePromptScale = 39;`
			`string NegativePrompt = 40;`
feat: add falcon ggllm via grpc client Signed-off-by: Ettore Di Giacinto <mudler@localai.io> 2023-07-14 23:19:43 +00:00			`}`

			`// The response message containing the result`
			`message Reply {`
fix: use bytes in gRPC proto instead of strings (#813) Signed-off-by: Ettore Di Giacinto <mudler@localai.io> 2023-07-27 16:41:04 +00:00			`bytes message = 1;`
feat: add falcon ggllm via grpc client Signed-off-by: Ettore Di Giacinto <mudler@localai.io> 2023-07-14 23:19:43 +00:00			`}`

			`message ModelOptions {`
			`string Model = 1;`
			`int32 ContextSize = 2;`
			`int32 Seed = 3;`
			`int32 NBatch = 4;`
			`bool F16Memory = 5;`
			`bool MLock = 6;`
			`bool MMap = 7;`
			`bool VocabOnly = 8;`
			`bool LowVRAM = 9;`
			`bool Embeddings = 10;`
			`bool NUMA = 11;`
			`int32 NGPULayers = 12;`
			`string MainGPU = 13;`
			`string TensorSplit = 14;`
feat: move gpt4all to a grpc service Signed-off-by: Ettore Di Giacinto <mudler@localai.io> 2023-07-14 23:19:43 +00:00			`int32 Threads = 15;`
			`string LibrarySearchPath = 16;`
fix: add rope settings during model load, fix CUDA (#821) Signed-off-by: Ettore Di Giacinto <mudler@localai.io> 2023-07-27 19:56:05 +00:00			`float RopeFreqBase = 17;`
			`float RopeFreqScale = 18;`
feat: add ngqa and RMSNormEps parameters (#860) Signed-off-by: Ettore Di Giacinto <mudler@localai.io> 2023-08-02 22:51:08 +00:00			`float RMSNormEps = 19;`
			`int32 NGQA = 20;`
feat: add initial AutoGPTQ backend implementation 2023-08-07 20:39:10 +00:00			`string ModelFile = 21;`

			`// AutoGPTQ`
			`string Device = 22;`
			`bool UseTriton = 23;`
			`string ModelBaseName = 24;`
feat: Add UseFastTokenizer Signed-off-by: Ettore Di Giacinto <mudler@localai.io> 2023-08-07 23:10:05 +00:00			`bool UseFastTokenizer = 25;`
feat: add falcon ggllm via grpc client Signed-off-by: Ettore Di Giacinto <mudler@localai.io> 2023-07-14 23:19:43 +00:00			`}`

			`message Result {`
			`string message = 1;`
			`bool success = 2;`
feat: move llama to a grpc Signed-off-by: Ettore Di Giacinto <mudler@localai.io> 2023-07-14 23:19:43 +00:00			`}`

			`message EmbeddingResult {`
			`repeated float embeddings = 1;`
feat: move other backends to grpc This finally makes everything more consistent Signed-off-by: Ettore Di Giacinto <mudler@localai.io> 2023-07-14 23:19:43 +00:00			`}`

			`message TranscriptRequest {`
			`string dst = 2;`
			`string language = 3;`
			`uint32 threads = 4;`
			`}`

			`message TranscriptResult {`
			`repeated TranscriptSegment segments = 1;`
			`string text = 2;`
			`}`

			`message TranscriptSegment {`
			`int32 id = 1;`
			`int64 start = 2;`
			`int64 end = 3;`
			`string text = 4;`
			`repeated int32 tokens = 5;`
			`}`

			`message GenerateImageRequest {`
			`int32 height = 1;`
			`int32 width = 2;`
			`int32 mode = 3;`
			`int32 step = 4;`
			`int32 seed = 5;`
			`string positive_prompt = 6;`
			`string negative_prompt = 7;`
			`string dst = 8;`
			`}`

			`message TTSRequest {`
			`string text = 1;`
			`string model = 2;`
			`string dst = 3;`
			`}`