mirror of
https://github.com/mudler/LocalAI.git
synced 2024-12-20 05:07:54 +00:00
feat: user defined inference device for CUDA and OpenVINO (#2212)
user defined inference device configuration via main_gpu parameter
This commit is contained in:
parent
6a7a7996bb
commit
4690b534e0
@ -89,8 +89,8 @@ class BackendServicer(backend_pb2_grpc.BackendServicer):
|
|||||||
quantization = None
|
quantization = None
|
||||||
|
|
||||||
if self.CUDA:
|
if self.CUDA:
|
||||||
if request.Device:
|
if request.MainGPU:
|
||||||
device_map=request.Device
|
device_map=request.MainGPU
|
||||||
else:
|
else:
|
||||||
device_map="cuda:0"
|
device_map="cuda:0"
|
||||||
if request.Quantization == "bnb_4bit":
|
if request.Quantization == "bnb_4bit":
|
||||||
@ -143,28 +143,36 @@ class BackendServicer(backend_pb2_grpc.BackendServicer):
|
|||||||
from optimum.intel.openvino import OVModelForCausalLM
|
from optimum.intel.openvino import OVModelForCausalLM
|
||||||
from openvino.runtime import Core
|
from openvino.runtime import Core
|
||||||
|
|
||||||
if "GPU" in Core().available_devices:
|
if request.MainGPU:
|
||||||
device_map="GPU"
|
device_map=request.MainGPU
|
||||||
else:
|
else:
|
||||||
device_map="CPU"
|
device_map="AUTO"
|
||||||
|
devices = Core().available_devices
|
||||||
|
if "GPU" in " ".join(devices):
|
||||||
|
device_map="AUTO:GPU"
|
||||||
|
|
||||||
self.model = OVModelForCausalLM.from_pretrained(model_name,
|
self.model = OVModelForCausalLM.from_pretrained(model_name,
|
||||||
compile=True,
|
compile=True,
|
||||||
trust_remote_code=request.TrustRemoteCode,
|
trust_remote_code=request.TrustRemoteCode,
|
||||||
ov_config={"PERFORMANCE_HINT": "CUMULATIVE_THROUGHPUT"},
|
ov_config={"PERFORMANCE_HINT": "CUMULATIVE_THROUGHPUT","GPU_DISABLE_WINOGRAD_CONVOLUTION": "YES"},
|
||||||
device=device_map)
|
device=device_map)
|
||||||
self.OV = True
|
self.OV = True
|
||||||
elif request.Type == "OVModelForFeatureExtraction":
|
elif request.Type == "OVModelForFeatureExtraction":
|
||||||
from optimum.intel.openvino import OVModelForFeatureExtraction
|
from optimum.intel.openvino import OVModelForFeatureExtraction
|
||||||
from openvino.runtime import Core
|
from openvino.runtime import Core
|
||||||
|
|
||||||
if "GPU" in Core().available_devices:
|
if request.MainGPU:
|
||||||
device_map="GPU"
|
device_map=request.MainGPU
|
||||||
else:
|
else:
|
||||||
device_map="CPU"
|
device_map="AUTO"
|
||||||
|
devices = Core().available_devices
|
||||||
|
if "GPU" in " ".join(devices):
|
||||||
|
device_map="AUTO:GPU"
|
||||||
|
|
||||||
self.model = OVModelForFeatureExtraction.from_pretrained(model_name,
|
self.model = OVModelForFeatureExtraction.from_pretrained(model_name,
|
||||||
compile=True,
|
compile=True,
|
||||||
trust_remote_code=request.TrustRemoteCode,
|
trust_remote_code=request.TrustRemoteCode,
|
||||||
ov_config={"PERFORMANCE_HINT": "CUMULATIVE_THROUGHPUT"},
|
ov_config={"PERFORMANCE_HINT": "CUMULATIVE_THROUGHPUT", "GPU_DISABLE_WINOGRAD_CONVOLUTION": "YES"},
|
||||||
export=True,
|
export=True,
|
||||||
device=device_map)
|
device=device_map)
|
||||||
self.OV = True
|
self.OV = True
|
||||||
|
Loading…
Reference in New Issue
Block a user