mirror of
https://github.com/mudler/LocalAI.git
synced 2024-12-18 20:27:57 +00:00
Add tensor_parallel_size setting to vllm setting items (#2085)
Signed-off-by: Taikono-Himazin <kazu@po.harenet.ne.jp>
This commit is contained in:
parent
b319ed58b0
commit
03adc1f60d
@ -177,6 +177,7 @@ message ModelOptions {
|
||||
bool EnforceEager = 52;
|
||||
int32 SwapSpace = 53;
|
||||
int32 MaxModelLen = 54;
|
||||
int32 TensorParallelSize = 55;
|
||||
|
||||
string MMProj = 41;
|
||||
|
||||
|
@ -95,6 +95,8 @@ class BackendServicer(backend_pb2_grpc.BackendServicer):
|
||||
engine_args.trust_remote_code = request.TrustRemoteCode
|
||||
if request.EnforceEager:
|
||||
engine_args.enforce_eager = request.EnforceEager
|
||||
if request.TensorParallelSize:
|
||||
engine_args.tensor_parallel_size = request.TensorParallelSize
|
||||
if request.SwapSpace != 0:
|
||||
engine_args.swap_space = request.SwapSpace
|
||||
if request.MaxModelLen != 0:
|
||||
|
@ -74,6 +74,7 @@ func gRPCModelOpts(c config.BackendConfig) *pb.ModelOptions {
|
||||
EnforceEager: c.EnforceEager,
|
||||
SwapSpace: int32(c.SwapSpace),
|
||||
MaxModelLen: int32(c.MaxModelLen),
|
||||
TensorParallelSize: int32(c.TensorParallelSize),
|
||||
MMProj: c.MMProj,
|
||||
YarnExtFactor: c.YarnExtFactor,
|
||||
YarnAttnFactor: c.YarnAttnFactor,
|
||||
|
@ -140,6 +140,7 @@ type LLMConfig struct {
|
||||
EnforceEager bool `yaml:"enforce_eager"` // vLLM
|
||||
SwapSpace int `yaml:"swap_space"` // vLLM
|
||||
MaxModelLen int `yaml:"max_model_len"` // vLLM
|
||||
TensorParallelSize int `yaml:"tensor_parallel_size"` // vLLM
|
||||
MMProj string `yaml:"mmproj"`
|
||||
|
||||
RopeScaling string `yaml:"rope_scaling"`
|
||||
|
Loading…
Reference in New Issue
Block a user