mirror of
https://github.com/mudler/LocalAI.git
synced 2025-05-09 12:03:15 +00:00
Add tensor_parallel_size setting to vllm setting items (#2085)
Signed-off-by: Taikono-Himazin <kazu@po.harenet.ne.jp>
This commit is contained in:
parent
b319ed58b0
commit
03adc1f60d
@ -177,6 +177,7 @@ message ModelOptions {
|
|||||||
bool EnforceEager = 52;
|
bool EnforceEager = 52;
|
||||||
int32 SwapSpace = 53;
|
int32 SwapSpace = 53;
|
||||||
int32 MaxModelLen = 54;
|
int32 MaxModelLen = 54;
|
||||||
|
int32 TensorParallelSize = 55;
|
||||||
|
|
||||||
string MMProj = 41;
|
string MMProj = 41;
|
||||||
|
|
||||||
|
@ -95,6 +95,8 @@ class BackendServicer(backend_pb2_grpc.BackendServicer):
|
|||||||
engine_args.trust_remote_code = request.TrustRemoteCode
|
engine_args.trust_remote_code = request.TrustRemoteCode
|
||||||
if request.EnforceEager:
|
if request.EnforceEager:
|
||||||
engine_args.enforce_eager = request.EnforceEager
|
engine_args.enforce_eager = request.EnforceEager
|
||||||
|
if request.TensorParallelSize:
|
||||||
|
engine_args.tensor_parallel_size = request.TensorParallelSize
|
||||||
if request.SwapSpace != 0:
|
if request.SwapSpace != 0:
|
||||||
engine_args.swap_space = request.SwapSpace
|
engine_args.swap_space = request.SwapSpace
|
||||||
if request.MaxModelLen != 0:
|
if request.MaxModelLen != 0:
|
||||||
|
@ -74,6 +74,7 @@ func gRPCModelOpts(c config.BackendConfig) *pb.ModelOptions {
|
|||||||
EnforceEager: c.EnforceEager,
|
EnforceEager: c.EnforceEager,
|
||||||
SwapSpace: int32(c.SwapSpace),
|
SwapSpace: int32(c.SwapSpace),
|
||||||
MaxModelLen: int32(c.MaxModelLen),
|
MaxModelLen: int32(c.MaxModelLen),
|
||||||
|
TensorParallelSize: int32(c.TensorParallelSize),
|
||||||
MMProj: c.MMProj,
|
MMProj: c.MMProj,
|
||||||
YarnExtFactor: c.YarnExtFactor,
|
YarnExtFactor: c.YarnExtFactor,
|
||||||
YarnAttnFactor: c.YarnAttnFactor,
|
YarnAttnFactor: c.YarnAttnFactor,
|
||||||
|
@ -140,6 +140,7 @@ type LLMConfig struct {
|
|||||||
EnforceEager bool `yaml:"enforce_eager"` // vLLM
|
EnforceEager bool `yaml:"enforce_eager"` // vLLM
|
||||||
SwapSpace int `yaml:"swap_space"` // vLLM
|
SwapSpace int `yaml:"swap_space"` // vLLM
|
||||||
MaxModelLen int `yaml:"max_model_len"` // vLLM
|
MaxModelLen int `yaml:"max_model_len"` // vLLM
|
||||||
|
TensorParallelSize int `yaml:"tensor_parallel_size"` // vLLM
|
||||||
MMProj string `yaml:"mmproj"`
|
MMProj string `yaml:"mmproj"`
|
||||||
|
|
||||||
RopeScaling string `yaml:"rope_scaling"`
|
RopeScaling string `yaml:"rope_scaling"`
|
||||||
|
Loading…
x
Reference in New Issue
Block a user