feat: OpenVINO acceleration for embeddings in transformer backend (#2190)

OpenVINO acceleration for embeddings

New argument type: OVModelForFeatureExtraction
This commit is contained in:
fakezeta 2024-04-30 10:13:04 +02:00 committed by GitHub
parent 3754f154ee
commit e38610e521
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194

View File

@ -153,6 +153,21 @@ class BackendServicer(backend_pb2_grpc.BackendServicer):
ov_config={"PERFORMANCE_HINT": "CUMULATIVE_THROUGHPUT"}, ov_config={"PERFORMANCE_HINT": "CUMULATIVE_THROUGHPUT"},
device=device_map) device=device_map)
self.OV = True self.OV = True
elif request.Type == "OVModelForFeatureExtraction":
from optimum.intel.openvino import OVModelForFeatureExtraction
from openvino.runtime import Core
if "GPU" in Core().available_devices:
device_map="GPU"
else:
device_map="CPU"
self.model = OVModelForFeatureExtraction.from_pretrained(model_name,
compile=True,
trust_remote_code=request.TrustRemoteCode,
ov_config={"PERFORMANCE_HINT": "CUMULATIVE_THROUGHPUT"},
export=True,
device=device_map)
self.OV = True
else: else:
self.model = AutoModel.from_pretrained(model_name, self.model = AutoModel.from_pretrained(model_name,
trust_remote_code=request.TrustRemoteCode, trust_remote_code=request.TrustRemoteCode,