mirror of
https://github.com/mudler/LocalAI.git
synced 2025-05-06 18:48:24 +00:00
feat(vllm): expose 'load_format' (#3943)
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
This commit is contained in:
parent
c75ecfa009
commit
ae1ec4e096
@ -95,6 +95,8 @@ class BackendServicer(backend_pb2_grpc.BackendServicer):
|
|||||||
|
|
||||||
if request.Quantization != "":
|
if request.Quantization != "":
|
||||||
engine_args.quantization = request.Quantization
|
engine_args.quantization = request.Quantization
|
||||||
|
if request.LoadFormat != "":
|
||||||
|
engine_args.load_format = request.LoadFormat
|
||||||
if request.GPUMemoryUtilization != 0:
|
if request.GPUMemoryUtilization != 0:
|
||||||
engine_args.gpu_memory_utilization = request.GPUMemoryUtilization
|
engine_args.gpu_memory_utilization = request.GPUMemoryUtilization
|
||||||
if request.TrustRemoteCode:
|
if request.TrustRemoteCode:
|
||||||
|
@ -139,6 +139,7 @@ func grpcModelOpts(c config.BackendConfig) *pb.ModelOptions {
|
|||||||
DraftModel: c.DraftModel,
|
DraftModel: c.DraftModel,
|
||||||
AudioPath: c.VallE.AudioPath,
|
AudioPath: c.VallE.AudioPath,
|
||||||
Quantization: c.Quantization,
|
Quantization: c.Quantization,
|
||||||
|
LoadFormat: c.LoadFormat,
|
||||||
GPUMemoryUtilization: c.GPUMemoryUtilization,
|
GPUMemoryUtilization: c.GPUMemoryUtilization,
|
||||||
TrustRemoteCode: c.TrustRemoteCode,
|
TrustRemoteCode: c.TrustRemoteCode,
|
||||||
EnforceEager: c.EnforceEager,
|
EnforceEager: c.EnforceEager,
|
||||||
|
@ -143,6 +143,7 @@ type LLMConfig struct {
|
|||||||
DraftModel string `yaml:"draft_model"`
|
DraftModel string `yaml:"draft_model"`
|
||||||
NDraft int32 `yaml:"n_draft"`
|
NDraft int32 `yaml:"n_draft"`
|
||||||
Quantization string `yaml:"quantization"`
|
Quantization string `yaml:"quantization"`
|
||||||
|
LoadFormat string `yaml:"load_format"`
|
||||||
GPUMemoryUtilization float32 `yaml:"gpu_memory_utilization"` // vLLM
|
GPUMemoryUtilization float32 `yaml:"gpu_memory_utilization"` // vLLM
|
||||||
TrustRemoteCode bool `yaml:"trust_remote_code"` // vLLM
|
TrustRemoteCode bool `yaml:"trust_remote_code"` // vLLM
|
||||||
EnforceEager bool `yaml:"enforce_eager"` // vLLM
|
EnforceEager bool `yaml:"enforce_eager"` // vLLM
|
||||||
|
Loading…
x
Reference in New Issue
Block a user