mirror of
https://github.com/mudler/LocalAI.git
synced 2024-12-21 05:33:09 +00:00
feat: OpenVINO acceleration for embeddings in transformer backend (#2190)
OpenVINO acceleration for embeddings New argument type: OVModelForFeatureExtraction
This commit is contained in:
parent
3754f154ee
commit
e38610e521
@ -153,6 +153,21 @@ class BackendServicer(backend_pb2_grpc.BackendServicer):
|
|||||||
ov_config={"PERFORMANCE_HINT": "CUMULATIVE_THROUGHPUT"},
|
ov_config={"PERFORMANCE_HINT": "CUMULATIVE_THROUGHPUT"},
|
||||||
device=device_map)
|
device=device_map)
|
||||||
self.OV = True
|
self.OV = True
|
||||||
|
elif request.Type == "OVModelForFeatureExtraction":
|
||||||
|
from optimum.intel.openvino import OVModelForFeatureExtraction
|
||||||
|
from openvino.runtime import Core
|
||||||
|
|
||||||
|
if "GPU" in Core().available_devices:
|
||||||
|
device_map="GPU"
|
||||||
|
else:
|
||||||
|
device_map="CPU"
|
||||||
|
self.model = OVModelForFeatureExtraction.from_pretrained(model_name,
|
||||||
|
compile=True,
|
||||||
|
trust_remote_code=request.TrustRemoteCode,
|
||||||
|
ov_config={"PERFORMANCE_HINT": "CUMULATIVE_THROUGHPUT"},
|
||||||
|
export=True,
|
||||||
|
device=device_map)
|
||||||
|
self.OV = True
|
||||||
else:
|
else:
|
||||||
self.model = AutoModel.from_pretrained(model_name,
|
self.model = AutoModel.from_pretrained(model_name,
|
||||||
trust_remote_code=request.TrustRemoteCode,
|
trust_remote_code=request.TrustRemoteCode,
|
||||||
|
Loading…
Reference in New Issue
Block a user