bugfix: CUDA acceleration not working (#2475)

* bugfix: CUDA acceleration not working

CUDA not working after #2286.
Refactored the code to be more polish

* Update requirements.txt

Missing imports

Signed-off-by: fakezeta <fakezeta@gmail.com>

* Update requirements.txt

Signed-off-by: fakezeta <fakezeta@gmail.com>

---------

Signed-off-by: fakezeta <fakezeta@gmail.com>
This commit is contained in:
fakezeta 2024-06-03 22:41:42 +02:00 committed by GitHub
parent daa7544d9c
commit 6ef78ef7f6
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
3 changed files with 16 additions and 8 deletions

View File

@ -21,10 +21,7 @@ import torch.cuda
XPU=os.environ.get("XPU", "0") == "1"
if XPU:
from transformers import AutoTokenizer, AutoModel, set_seed, TextIteratorStreamer, StoppingCriteriaList, StopStringCriteria
else:
from transformers import AutoTokenizer, AutoModel, AutoModelForCausalLM, set_seed, BitsAndBytesConfig, TextIteratorStreamer, StoppingCriteriaList, StopStringCriteria
from transformers import AutoTokenizer, AutoModel, set_seed, TextIteratorStreamer, StoppingCriteriaList, StopStringCriteria
_ONE_DAY_IN_SECONDS = 60 * 60 * 24
@ -77,11 +74,11 @@ class BackendServicer(backend_pb2_grpc.BackendServicer):
"""
model_name = request.Model
compute = "auto"
compute = torch.float16
if request.F16Memory == True:
compute=torch.bfloat16
self.CUDA = request.CUDA
self.CUDA = torch.cuda.is_available()
self.OV=False
device_map="cpu"
@ -89,6 +86,7 @@ class BackendServicer(backend_pb2_grpc.BackendServicer):
quantization = None
if self.CUDA:
from transformers import BitsAndBytesConfig, AutoModelForCausalLM
if request.MainGPU:
device_map=request.MainGPU
else:
@ -107,7 +105,7 @@ class BackendServicer(backend_pb2_grpc.BackendServicer):
bnb_4bit_compute_dtype = None,
load_in_8bit=True,
)
try:
if request.Type == "AutoModelForCausalLM":
if XPU:
@ -189,6 +187,7 @@ class BackendServicer(backend_pb2_grpc.BackendServicer):
device=device_map)
self.OV = True
else:
print("Automodel", file=sys.stderr)
self.model = AutoModel.from_pretrained(model_name,
trust_remote_code=request.TrustRemoteCode,
use_safetensors=True,

View File

@ -3,4 +3,7 @@ transformers
grpcio==1.64.0
protobuf
torch
certifi
certifi
intel-extension-for-transformers
bitsandbytes
setuptools==69.5.1 # https://github.com/mudler/LocalAI/issues/2406

View File

@ -1,4 +1,10 @@
#!/bin/bash
source $(dirname $0)/../common/libbackend.sh
if [ -d "/opt/intel" ]; then
# Assumes we are using the Intel oneAPI container image
# https://github.com/intel/intel-extension-for-pytorch/issues/538
export XPU=1
fi
startBackend $@