mirror of
https://github.com/mudler/LocalAI.git
synced 2024-12-18 20:27:57 +00:00
feat: OpenVINO acceleration for embeddings in transformer backend (#2190)
OpenVINO acceleration for embeddings New argument type: OVModelForFeatureExtraction
This commit is contained in:
parent
3754f154ee
commit
e38610e521
@ -153,6 +153,21 @@ class BackendServicer(backend_pb2_grpc.BackendServicer):
|
||||
ov_config={"PERFORMANCE_HINT": "CUMULATIVE_THROUGHPUT"},
|
||||
device=device_map)
|
||||
self.OV = True
|
||||
elif request.Type == "OVModelForFeatureExtraction":
|
||||
from optimum.intel.openvino import OVModelForFeatureExtraction
|
||||
from openvino.runtime import Core
|
||||
|
||||
if "GPU" in Core().available_devices:
|
||||
device_map="GPU"
|
||||
else:
|
||||
device_map="CPU"
|
||||
self.model = OVModelForFeatureExtraction.from_pretrained(model_name,
|
||||
compile=True,
|
||||
trust_remote_code=request.TrustRemoteCode,
|
||||
ov_config={"PERFORMANCE_HINT": "CUMULATIVE_THROUGHPUT"},
|
||||
export=True,
|
||||
device=device_map)
|
||||
self.OV = True
|
||||
else:
|
||||
self.model = AutoModel.from_pretrained(model_name,
|
||||
trust_remote_code=request.TrustRemoteCode,
|
||||
|
Loading…
Reference in New Issue
Block a user