2023-07-14 23:19:43 +00:00
|
|
|
syntax = "proto3";
|
|
|
|
|
|
|
|
option go_package = "github.com/go-skynet/LocalAI/pkg/grpc/proto";
|
|
|
|
option java_multiple_files = true;
|
2023-07-14 23:19:43 +00:00
|
|
|
option java_package = "io.skynet.localai.backend";
|
|
|
|
option java_outer_classname = "LocalAIBackend";
|
2023-07-14 23:19:43 +00:00
|
|
|
|
2023-07-14 23:19:43 +00:00
|
|
|
package backend;
|
2023-07-14 23:19:43 +00:00
|
|
|
|
2023-07-14 23:19:43 +00:00
|
|
|
service Backend {
|
2023-07-14 23:19:43 +00:00
|
|
|
rpc Health(HealthMessage) returns (Reply) {}
|
|
|
|
rpc Predict(PredictOptions) returns (Reply) {}
|
|
|
|
rpc LoadModel(ModelOptions) returns (Result) {}
|
|
|
|
rpc PredictStream(PredictOptions) returns (stream Reply) {}
|
2023-07-14 23:19:43 +00:00
|
|
|
rpc Embedding(PredictOptions) returns (EmbeddingResult) {}
|
2023-07-14 23:19:43 +00:00
|
|
|
rpc GenerateImage(GenerateImageRequest) returns (Result) {}
|
|
|
|
rpc AudioTranscription(TranscriptRequest) returns (TranscriptResult) {}
|
|
|
|
rpc TTS(TTSRequest) returns (Result) {}
|
2023-07-14 23:19:43 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
message HealthMessage {}
|
|
|
|
|
|
|
|
// The request message containing the user's name.
|
|
|
|
message PredictOptions {
|
|
|
|
string Prompt = 1;
|
|
|
|
int32 Seed = 2;
|
|
|
|
int32 Threads = 3;
|
|
|
|
int32 Tokens = 4;
|
|
|
|
int32 TopK = 5;
|
|
|
|
int32 Repeat = 6;
|
|
|
|
int32 Batch = 7;
|
|
|
|
int32 NKeep = 8;
|
|
|
|
float Temperature = 9;
|
|
|
|
float Penalty = 10;
|
|
|
|
bool F16KV = 11;
|
|
|
|
bool DebugMode = 12;
|
|
|
|
repeated string StopPrompts = 13;
|
|
|
|
bool IgnoreEOS = 14;
|
|
|
|
float TailFreeSamplingZ = 15;
|
|
|
|
float TypicalP = 16;
|
|
|
|
float FrequencyPenalty = 17;
|
|
|
|
float PresencePenalty = 18;
|
|
|
|
int32 Mirostat = 19;
|
|
|
|
float MirostatETA = 20;
|
|
|
|
float MirostatTAU = 21;
|
|
|
|
bool PenalizeNL = 22;
|
|
|
|
string LogitBias = 23;
|
|
|
|
bool MLock = 25;
|
|
|
|
bool MMap = 26;
|
|
|
|
bool PromptCacheAll = 27;
|
|
|
|
bool PromptCacheRO = 28;
|
|
|
|
string Grammar = 29;
|
|
|
|
string MainGPU = 30;
|
|
|
|
string TensorSplit = 31;
|
|
|
|
float TopP = 32;
|
|
|
|
string PromptCachePath = 33;
|
|
|
|
bool Debug = 34;
|
2023-07-14 23:19:43 +00:00
|
|
|
repeated int32 EmbeddingTokens = 35;
|
|
|
|
string Embeddings = 36;
|
2023-07-25 17:05:27 +00:00
|
|
|
float RopeFreqBase = 37;
|
|
|
|
float RopeFreqScale = 38;
|
|
|
|
float NegativePromptScale = 39;
|
|
|
|
string NegativePrompt = 40;
|
2023-07-14 23:19:43 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
// The response message containing the result
|
|
|
|
message Reply {
|
2023-07-27 16:41:04 +00:00
|
|
|
bytes message = 1;
|
2023-07-14 23:19:43 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
message ModelOptions {
|
|
|
|
string Model = 1;
|
|
|
|
int32 ContextSize = 2;
|
|
|
|
int32 Seed = 3;
|
|
|
|
int32 NBatch = 4;
|
|
|
|
bool F16Memory = 5;
|
|
|
|
bool MLock = 6;
|
|
|
|
bool MMap = 7;
|
|
|
|
bool VocabOnly = 8;
|
|
|
|
bool LowVRAM = 9;
|
|
|
|
bool Embeddings = 10;
|
|
|
|
bool NUMA = 11;
|
|
|
|
int32 NGPULayers = 12;
|
|
|
|
string MainGPU = 13;
|
|
|
|
string TensorSplit = 14;
|
2023-07-14 23:19:43 +00:00
|
|
|
int32 Threads = 15;
|
|
|
|
string LibrarySearchPath = 16;
|
2023-07-27 19:56:05 +00:00
|
|
|
float RopeFreqBase = 17;
|
|
|
|
float RopeFreqScale = 18;
|
2023-08-02 22:51:08 +00:00
|
|
|
float RMSNormEps = 19;
|
|
|
|
int32 NGQA = 20;
|
2023-08-07 20:39:10 +00:00
|
|
|
string ModelFile = 21;
|
|
|
|
|
|
|
|
// AutoGPTQ
|
|
|
|
string Device = 22;
|
|
|
|
bool UseTriton = 23;
|
|
|
|
string ModelBaseName = 24;
|
2023-08-07 23:10:05 +00:00
|
|
|
bool UseFastTokenizer = 25;
|
2023-07-14 23:19:43 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
message Result {
|
|
|
|
string message = 1;
|
|
|
|
bool success = 2;
|
2023-07-14 23:19:43 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
message EmbeddingResult {
|
|
|
|
repeated float embeddings = 1;
|
2023-07-14 23:19:43 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
message TranscriptRequest {
|
|
|
|
string dst = 2;
|
|
|
|
string language = 3;
|
|
|
|
uint32 threads = 4;
|
|
|
|
}
|
|
|
|
|
|
|
|
message TranscriptResult {
|
|
|
|
repeated TranscriptSegment segments = 1;
|
|
|
|
string text = 2;
|
|
|
|
}
|
|
|
|
|
|
|
|
message TranscriptSegment {
|
|
|
|
int32 id = 1;
|
|
|
|
int64 start = 2;
|
|
|
|
int64 end = 3;
|
|
|
|
string text = 4;
|
|
|
|
repeated int32 tokens = 5;
|
|
|
|
}
|
|
|
|
|
|
|
|
message GenerateImageRequest {
|
|
|
|
int32 height = 1;
|
|
|
|
int32 width = 2;
|
|
|
|
int32 mode = 3;
|
|
|
|
int32 step = 4;
|
|
|
|
int32 seed = 5;
|
|
|
|
string positive_prompt = 6;
|
|
|
|
string negative_prompt = 7;
|
|
|
|
string dst = 8;
|
|
|
|
}
|
|
|
|
|
|
|
|
message TTSRequest {
|
|
|
|
string text = 1;
|
|
|
|
string model = 2;
|
|
|
|
string dst = 3;
|
|
|
|
}
|