diff --git a/backend/python/autogptq/autogptq.py b/backend/python/autogptq/autogptq.py index db44f507..ffb37569 100755 --- a/backend/python/autogptq/autogptq.py +++ b/backend/python/autogptq/autogptq.py @@ -33,7 +33,7 @@ class BackendServicer(backend_pb2_grpc.BackendServicer): model = AutoGPTQForCausalLM.from_quantized(request.Model, model_basename=request.ModelBaseName, use_safetensors=True, - trust_remote_code=True, + trust_remote_code=request.TrustRemoteCode, device=device, use_triton=request.UseTriton, quantize_config=None) diff --git a/backend/python/transformers/transformers_server.py b/backend/python/transformers/transformers_server.py index 1b177057..fe0b815a 100755 --- a/backend/python/transformers/transformers_server.py +++ b/backend/python/transformers/transformers_server.py @@ -69,9 +69,9 @@ class BackendServicer(backend_pb2_grpc.BackendServicer): model_name = request.Model try: if request.Type == "AutoModelForCausalLM": - self.model = AutoModelForCausalLM.from_pretrained(model_name, trust_remote_code=True) + self.model = AutoModelForCausalLM.from_pretrained(model_name, trust_remote_code=request.TrustRemoteCode) else: - self.model = AutoModel.from_pretrained(model_name, trust_remote_code=True) + self.model = AutoModel.from_pretrained(model_name, trust_remote_code=request.TrustRemoteCode) self.tokenizer = AutoTokenizer.from_pretrained(model_name) self.CUDA = False