mirror of
https://github.com/mudler/LocalAI.git
synced 2024-12-18 20:27:57 +00:00
bugfix: CUDA acceleration not working (#2475)
* bugfix: CUDA acceleration not working CUDA not working after #2286. Refactored the code to be more polish * Update requirements.txt Missing imports Signed-off-by: fakezeta <fakezeta@gmail.com> * Update requirements.txt Signed-off-by: fakezeta <fakezeta@gmail.com> --------- Signed-off-by: fakezeta <fakezeta@gmail.com>
This commit is contained in:
parent
daa7544d9c
commit
6ef78ef7f6
@ -21,10 +21,7 @@ import torch.cuda
|
||||
|
||||
|
||||
XPU=os.environ.get("XPU", "0") == "1"
|
||||
if XPU:
|
||||
from transformers import AutoTokenizer, AutoModel, set_seed, TextIteratorStreamer, StoppingCriteriaList, StopStringCriteria
|
||||
else:
|
||||
from transformers import AutoTokenizer, AutoModel, AutoModelForCausalLM, set_seed, BitsAndBytesConfig, TextIteratorStreamer, StoppingCriteriaList, StopStringCriteria
|
||||
from transformers import AutoTokenizer, AutoModel, set_seed, TextIteratorStreamer, StoppingCriteriaList, StopStringCriteria
|
||||
|
||||
|
||||
_ONE_DAY_IN_SECONDS = 60 * 60 * 24
|
||||
@ -77,11 +74,11 @@ class BackendServicer(backend_pb2_grpc.BackendServicer):
|
||||
"""
|
||||
model_name = request.Model
|
||||
|
||||
compute = "auto"
|
||||
compute = torch.float16
|
||||
if request.F16Memory == True:
|
||||
compute=torch.bfloat16
|
||||
|
||||
self.CUDA = request.CUDA
|
||||
self.CUDA = torch.cuda.is_available()
|
||||
self.OV=False
|
||||
|
||||
device_map="cpu"
|
||||
@ -89,6 +86,7 @@ class BackendServicer(backend_pb2_grpc.BackendServicer):
|
||||
quantization = None
|
||||
|
||||
if self.CUDA:
|
||||
from transformers import BitsAndBytesConfig, AutoModelForCausalLM
|
||||
if request.MainGPU:
|
||||
device_map=request.MainGPU
|
||||
else:
|
||||
@ -107,7 +105,7 @@ class BackendServicer(backend_pb2_grpc.BackendServicer):
|
||||
bnb_4bit_compute_dtype = None,
|
||||
load_in_8bit=True,
|
||||
)
|
||||
|
||||
|
||||
try:
|
||||
if request.Type == "AutoModelForCausalLM":
|
||||
if XPU:
|
||||
@ -189,6 +187,7 @@ class BackendServicer(backend_pb2_grpc.BackendServicer):
|
||||
device=device_map)
|
||||
self.OV = True
|
||||
else:
|
||||
print("Automodel", file=sys.stderr)
|
||||
self.model = AutoModel.from_pretrained(model_name,
|
||||
trust_remote_code=request.TrustRemoteCode,
|
||||
use_safetensors=True,
|
||||
|
@ -3,4 +3,7 @@ transformers
|
||||
grpcio==1.64.0
|
||||
protobuf
|
||||
torch
|
||||
certifi
|
||||
certifi
|
||||
intel-extension-for-transformers
|
||||
bitsandbytes
|
||||
setuptools==69.5.1 # https://github.com/mudler/LocalAI/issues/2406
|
||||
|
@ -1,4 +1,10 @@
|
||||
#!/bin/bash
|
||||
source $(dirname $0)/../common/libbackend.sh
|
||||
|
||||
if [ -d "/opt/intel" ]; then
|
||||
# Assumes we are using the Intel oneAPI container image
|
||||
# https://github.com/intel/intel-extension-for-pytorch/issues/538
|
||||
export XPU=1
|
||||
fi
|
||||
|
||||
startBackend $@
|
Loading…
Reference in New Issue
Block a user