mirror of
https://github.com/mudler/LocalAI.git
synced 2025-05-11 21:12:58 +00:00
chore: bump grpc limits to 50MB (#5212)
Some checks failed
Explorer deployment / build-linux (push) Has been cancelled
GPU tests / ubuntu-latest (1.21.x) (push) Has been cancelled
generate and publish intel docker caches / generate_caches (intel/oneapi-basekit:2025.1.0-0-devel-ubuntu22.04, linux/amd64, ubuntu-latest) (push) Has been cancelled
build container images / hipblas-jobs (-aio-gpu-hipblas, rocm/dev-ubuntu-22.04:6.1, hipblas, true, ubuntu:22.04, extras, latest-gpu-hipblas, latest-aio-gpu-hipblas, --jobs=3 --output-sync=target, linux/amd64, arc-runner-set, auto, -hipblas-ffmpeg) (push) Has been cancelled
build container images / hipblas-jobs (rocm/dev-ubuntu-22.04:6.1, hipblas, false, ubuntu:22.04, core, --jobs=3 --output-sync=target, linux/amd64, arc-runner-set, false, -hipblas-core) (push) Has been cancelled
build container images / hipblas-jobs (rocm/dev-ubuntu-22.04:6.1, hipblas, false, ubuntu:22.04, extras, --jobs=3 --output-sync=target, linux/amd64, arc-runner-set, false, -hipblas) (push) Has been cancelled
build container images / hipblas-jobs (rocm/dev-ubuntu-22.04:6.1, hipblas, true, ubuntu:22.04, core, latest-gpu-hipblas-core, --jobs=3 --output-sync=target, linux/amd64, arc-runner-set, false, -hipblas-ffmpeg-core) (push) Has been cancelled
build container images / self-hosted-jobs (-aio-gpu-intel-f16, quay.io/go-skynet/intel-oneapi-base:latest, sycl_f16, true, ubuntu:22.04, extras, latest-gpu-intel-f16, latest-aio-gpu-intel-f16, --jobs=3 --output-sync=target, linux/amd64, arc-runner-set, auto, -sycl-f16-ffmpeg) (push) Has been cancelled
build container images / self-hosted-jobs (-aio-gpu-intel-f32, quay.io/go-skynet/intel-oneapi-base:latest, sycl_f32, true, ubuntu:22.04, extras, latest-gpu-intel-f32, latest-aio-gpu-intel-f32, --jobs=3 --output-sync=target, linux/amd64, arc-runner-set, auto, -sycl-f32-ffmpeg) (push) Has been cancelled
build container images / self-hosted-jobs (-aio-gpu-nvidia-cuda-11, ubuntu:22.04, cublas, 11, 7, true, extras, latest-gpu-nvidia-cuda-11, latest-aio-gpu-nvidia-cuda-11, --jobs=3 --output-sync=target, linux/amd64, arc-runner-set, auto, -cublas-cuda11-ffmpeg) (push) Has been cancelled
build container images / self-hosted-jobs (-aio-gpu-nvidia-cuda-12, ubuntu:22.04, cublas, 12, 0, true, extras, latest-gpu-nvidia-cuda-12, latest-aio-gpu-nvidia-cuda-12, --jobs=3 --output-sync=target, linux/amd64, arc-runner-set, auto, -cublas-cuda12-ffmpeg) (push) Has been cancelled
build container images / self-hosted-jobs (quay.io/go-skynet/intel-oneapi-base:latest, sycl_f16, false, ubuntu:22.04, core, --jobs=3 --output-sync=target, linux/amd64, arc-runner-set, false, -sycl-f16-core) (push) Has been cancelled
build container images / self-hosted-jobs (quay.io/go-skynet/intel-oneapi-base:latest, sycl_f16, true, ubuntu:22.04, core, latest-gpu-intel-f16-core, --jobs=3 --output-sync=target, linux/amd64, arc-runner-set, false, -sycl-f16-ffmpeg-core) (push) Has been cancelled
build container images / self-hosted-jobs (quay.io/go-skynet/intel-oneapi-base:latest, sycl_f32, false, ubuntu:22.04, core, --jobs=3 --output-sync=target, linux/amd64, arc-runner-set, false, -sycl-f32-core) (push) Has been cancelled
build container images / self-hosted-jobs (quay.io/go-skynet/intel-oneapi-base:latest, sycl_f32, true, ubuntu:22.04, core, latest-gpu-intel-f32-core, --jobs=3 --output-sync=target, linux/amd64, arc-runner-set, false, -sycl-f32-ffmpeg-core) (push) Has been cancelled
build container images / self-hosted-jobs (ubuntu:22.04, , , extras, --jobs=3 --output-sync=target, linux/amd64, arc-runner-set, auto, ) (push) Has been cancelled
build container images / self-hosted-jobs (ubuntu:22.04, , true, extras, --jobs=3 --output-sync=target, linux/amd64, arc-runner-set, auto, -ffmpeg) (push) Has been cancelled
build container images / self-hosted-jobs (ubuntu:22.04, cublas, 11, 7, , extras, --jobs=3 --output-sync=target, linux/amd64, arc-runner-set, false, -cublas-cuda11) (push) Has been cancelled
build container images / self-hosted-jobs (ubuntu:22.04, cublas, 12, 0, , extras, --jobs=3 --output-sync=target, linux/amd64, arc-runner-set, false, -cublas-cuda12) (push) Has been cancelled
build container images / core-image-build (-aio-cpu, ubuntu:22.04, , true, core, latest-cpu, latest-aio-cpu, --jobs=4 --output-sync=target, linux/amd64,linux/arm64, arc-runner-set, false, auto, -ffmpeg-core) (push) Has been cancelled
build container images / core-image-build (ubuntu:22.04, cublas, 11, 7, , core, --jobs=4 --output-sync=target, linux/amd64, arc-runner-set, false, false, -cublas-cuda11-core) (push) Has been cancelled
build container images / core-image-build (ubuntu:22.04, cublas, 11, 7, true, core, latest-gpu-nvidia-cuda-12-core, --jobs=4 --output-sync=target, linux/amd64, arc-runner-set, false, false, -cublas-cuda11-ffmpeg-core) (push) Has been cancelled
build container images / core-image-build (ubuntu:22.04, cublas, 12, 0, , core, --jobs=4 --output-sync=target, linux/amd64, arc-runner-set, false, false, -cublas-cuda12-core) (push) Has been cancelled
build container images / core-image-build (ubuntu:22.04, cublas, 12, 0, true, core, latest-gpu-nvidia-cuda-12-core, --jobs=4 --output-sync=target, linux/amd64, arc-runner-set, false, false, -cublas-cuda12-ffmpeg-core) (push) Has been cancelled
build container images / core-image-build (ubuntu:22.04, vulkan, true, core, latest-gpu-vulkan-core, --jobs=4 --output-sync=target, linux/amd64, arc-runner-set, false, false, -vulkan-ffmpeg-core) (push) Has been cancelled
build container images / gh-runner (nvcr.io/nvidia/l4t-jetpack:r36.4.0, cublas, 12, 0, true, core, latest-nvidia-l4t-arm64-core, --jobs=4 --output-sync=target, linux/arm64, ubuntu-24.04-arm, true, false, -nvidia-l4t-arm64-core) (push) Has been cancelled
Security Scan / tests (push) Has been cancelled
Tests extras backends / tests-transformers (push) Has been cancelled
Tests extras backends / tests-rerankers (push) Has been cancelled
Tests extras backends / tests-diffusers (push) Has been cancelled
Tests extras backends / tests-coqui (push) Has been cancelled
tests / tests-linux (1.21.x) (push) Has been cancelled
tests / tests-aio-container (push) Has been cancelled
tests / tests-apple (1.21.x) (push) Has been cancelled
Some checks failed
Explorer deployment / build-linux (push) Has been cancelled
GPU tests / ubuntu-latest (1.21.x) (push) Has been cancelled
generate and publish intel docker caches / generate_caches (intel/oneapi-basekit:2025.1.0-0-devel-ubuntu22.04, linux/amd64, ubuntu-latest) (push) Has been cancelled
build container images / hipblas-jobs (-aio-gpu-hipblas, rocm/dev-ubuntu-22.04:6.1, hipblas, true, ubuntu:22.04, extras, latest-gpu-hipblas, latest-aio-gpu-hipblas, --jobs=3 --output-sync=target, linux/amd64, arc-runner-set, auto, -hipblas-ffmpeg) (push) Has been cancelled
build container images / hipblas-jobs (rocm/dev-ubuntu-22.04:6.1, hipblas, false, ubuntu:22.04, core, --jobs=3 --output-sync=target, linux/amd64, arc-runner-set, false, -hipblas-core) (push) Has been cancelled
build container images / hipblas-jobs (rocm/dev-ubuntu-22.04:6.1, hipblas, false, ubuntu:22.04, extras, --jobs=3 --output-sync=target, linux/amd64, arc-runner-set, false, -hipblas) (push) Has been cancelled
build container images / hipblas-jobs (rocm/dev-ubuntu-22.04:6.1, hipblas, true, ubuntu:22.04, core, latest-gpu-hipblas-core, --jobs=3 --output-sync=target, linux/amd64, arc-runner-set, false, -hipblas-ffmpeg-core) (push) Has been cancelled
build container images / self-hosted-jobs (-aio-gpu-intel-f16, quay.io/go-skynet/intel-oneapi-base:latest, sycl_f16, true, ubuntu:22.04, extras, latest-gpu-intel-f16, latest-aio-gpu-intel-f16, --jobs=3 --output-sync=target, linux/amd64, arc-runner-set, auto, -sycl-f16-ffmpeg) (push) Has been cancelled
build container images / self-hosted-jobs (-aio-gpu-intel-f32, quay.io/go-skynet/intel-oneapi-base:latest, sycl_f32, true, ubuntu:22.04, extras, latest-gpu-intel-f32, latest-aio-gpu-intel-f32, --jobs=3 --output-sync=target, linux/amd64, arc-runner-set, auto, -sycl-f32-ffmpeg) (push) Has been cancelled
build container images / self-hosted-jobs (-aio-gpu-nvidia-cuda-11, ubuntu:22.04, cublas, 11, 7, true, extras, latest-gpu-nvidia-cuda-11, latest-aio-gpu-nvidia-cuda-11, --jobs=3 --output-sync=target, linux/amd64, arc-runner-set, auto, -cublas-cuda11-ffmpeg) (push) Has been cancelled
build container images / self-hosted-jobs (-aio-gpu-nvidia-cuda-12, ubuntu:22.04, cublas, 12, 0, true, extras, latest-gpu-nvidia-cuda-12, latest-aio-gpu-nvidia-cuda-12, --jobs=3 --output-sync=target, linux/amd64, arc-runner-set, auto, -cublas-cuda12-ffmpeg) (push) Has been cancelled
build container images / self-hosted-jobs (quay.io/go-skynet/intel-oneapi-base:latest, sycl_f16, false, ubuntu:22.04, core, --jobs=3 --output-sync=target, linux/amd64, arc-runner-set, false, -sycl-f16-core) (push) Has been cancelled
build container images / self-hosted-jobs (quay.io/go-skynet/intel-oneapi-base:latest, sycl_f16, true, ubuntu:22.04, core, latest-gpu-intel-f16-core, --jobs=3 --output-sync=target, linux/amd64, arc-runner-set, false, -sycl-f16-ffmpeg-core) (push) Has been cancelled
build container images / self-hosted-jobs (quay.io/go-skynet/intel-oneapi-base:latest, sycl_f32, false, ubuntu:22.04, core, --jobs=3 --output-sync=target, linux/amd64, arc-runner-set, false, -sycl-f32-core) (push) Has been cancelled
build container images / self-hosted-jobs (quay.io/go-skynet/intel-oneapi-base:latest, sycl_f32, true, ubuntu:22.04, core, latest-gpu-intel-f32-core, --jobs=3 --output-sync=target, linux/amd64, arc-runner-set, false, -sycl-f32-ffmpeg-core) (push) Has been cancelled
build container images / self-hosted-jobs (ubuntu:22.04, , , extras, --jobs=3 --output-sync=target, linux/amd64, arc-runner-set, auto, ) (push) Has been cancelled
build container images / self-hosted-jobs (ubuntu:22.04, , true, extras, --jobs=3 --output-sync=target, linux/amd64, arc-runner-set, auto, -ffmpeg) (push) Has been cancelled
build container images / self-hosted-jobs (ubuntu:22.04, cublas, 11, 7, , extras, --jobs=3 --output-sync=target, linux/amd64, arc-runner-set, false, -cublas-cuda11) (push) Has been cancelled
build container images / self-hosted-jobs (ubuntu:22.04, cublas, 12, 0, , extras, --jobs=3 --output-sync=target, linux/amd64, arc-runner-set, false, -cublas-cuda12) (push) Has been cancelled
build container images / core-image-build (-aio-cpu, ubuntu:22.04, , true, core, latest-cpu, latest-aio-cpu, --jobs=4 --output-sync=target, linux/amd64,linux/arm64, arc-runner-set, false, auto, -ffmpeg-core) (push) Has been cancelled
build container images / core-image-build (ubuntu:22.04, cublas, 11, 7, , core, --jobs=4 --output-sync=target, linux/amd64, arc-runner-set, false, false, -cublas-cuda11-core) (push) Has been cancelled
build container images / core-image-build (ubuntu:22.04, cublas, 11, 7, true, core, latest-gpu-nvidia-cuda-12-core, --jobs=4 --output-sync=target, linux/amd64, arc-runner-set, false, false, -cublas-cuda11-ffmpeg-core) (push) Has been cancelled
build container images / core-image-build (ubuntu:22.04, cublas, 12, 0, , core, --jobs=4 --output-sync=target, linux/amd64, arc-runner-set, false, false, -cublas-cuda12-core) (push) Has been cancelled
build container images / core-image-build (ubuntu:22.04, cublas, 12, 0, true, core, latest-gpu-nvidia-cuda-12-core, --jobs=4 --output-sync=target, linux/amd64, arc-runner-set, false, false, -cublas-cuda12-ffmpeg-core) (push) Has been cancelled
build container images / core-image-build (ubuntu:22.04, vulkan, true, core, latest-gpu-vulkan-core, --jobs=4 --output-sync=target, linux/amd64, arc-runner-set, false, false, -vulkan-ffmpeg-core) (push) Has been cancelled
build container images / gh-runner (nvcr.io/nvidia/l4t-jetpack:r36.4.0, cublas, 12, 0, true, core, latest-nvidia-l4t-arm64-core, --jobs=4 --output-sync=target, linux/arm64, ubuntu-24.04-arm, true, false, -nvidia-l4t-arm64-core) (push) Has been cancelled
Security Scan / tests (push) Has been cancelled
Tests extras backends / tests-transformers (push) Has been cancelled
Tests extras backends / tests-rerankers (push) Has been cancelled
Tests extras backends / tests-diffusers (push) Has been cancelled
Tests extras backends / tests-coqui (push) Has been cancelled
tests / tests-linux (1.21.x) (push) Has been cancelled
tests / tests-aio-container (push) Has been cancelled
tests / tests-apple (1.21.x) (push) Has been cancelled
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
This commit is contained in:
parent
8b3f76d8e6
commit
8abecb4a18
@ -2644,7 +2644,9 @@ void RunServer(const std::string& server_address) {
|
|||||||
ServerBuilder builder;
|
ServerBuilder builder;
|
||||||
builder.AddListeningPort(server_address, grpc::InsecureServerCredentials());
|
builder.AddListeningPort(server_address, grpc::InsecureServerCredentials());
|
||||||
builder.RegisterService(&service);
|
builder.RegisterService(&service);
|
||||||
|
builder.SetMaxMessageSize(50 * 1024 * 1024); // 50MB
|
||||||
|
builder.SetMaxSendMessageSize(50 * 1024 * 1024); // 50MB
|
||||||
|
builder.SetMaxReceiveMessageSize(50 * 1024 * 1024); // 50MB
|
||||||
std::unique_ptr<Server> server(builder.BuildAndStart());
|
std::unique_ptr<Server> server(builder.BuildAndStart());
|
||||||
std::cout << "Server listening on " << server_address << std::endl;
|
std::cout << "Server listening on " << server_address << std::endl;
|
||||||
server->Wait();
|
server->Wait();
|
||||||
|
@ -121,7 +121,12 @@ class BackendServicer(backend_pb2_grpc.BackendServicer):
|
|||||||
return (prompt, image_paths)
|
return (prompt, image_paths)
|
||||||
|
|
||||||
def serve(address):
|
def serve(address):
|
||||||
server = grpc.server(futures.ThreadPoolExecutor(max_workers=MAX_WORKERS))
|
server = grpc.server(futures.ThreadPoolExecutor(max_workers=MAX_WORKERS),
|
||||||
|
options=[
|
||||||
|
('grpc.max_message_length', 50 * 1024 * 1024), # 50MB
|
||||||
|
('grpc.max_send_message_length', 50 * 1024 * 1024), # 50MB
|
||||||
|
('grpc.max_receive_message_length', 50 * 1024 * 1024), # 50MB
|
||||||
|
])
|
||||||
backend_pb2_grpc.add_BackendServicer_to_server(BackendServicer(), server)
|
backend_pb2_grpc.add_BackendServicer_to_server(BackendServicer(), server)
|
||||||
server.add_insecure_port(address)
|
server.add_insecure_port(address)
|
||||||
server.start()
|
server.start()
|
||||||
|
@ -61,7 +61,12 @@ class BackendServicer(backend_pb2_grpc.BackendServicer):
|
|||||||
return backend_pb2.Result(success=True)
|
return backend_pb2.Result(success=True)
|
||||||
|
|
||||||
def serve(address):
|
def serve(address):
|
||||||
server = grpc.server(futures.ThreadPoolExecutor(max_workers=MAX_WORKERS))
|
server = grpc.server(futures.ThreadPoolExecutor(max_workers=MAX_WORKERS),
|
||||||
|
options=[
|
||||||
|
('grpc.max_message_length', 50 * 1024 * 1024), # 50MB
|
||||||
|
('grpc.max_send_message_length', 50 * 1024 * 1024), # 50MB
|
||||||
|
('grpc.max_receive_message_length', 50 * 1024 * 1024), # 50MB
|
||||||
|
])
|
||||||
backend_pb2_grpc.add_BackendServicer_to_server(BackendServicer(), server)
|
backend_pb2_grpc.add_BackendServicer_to_server(BackendServicer(), server)
|
||||||
server.add_insecure_port(address)
|
server.add_insecure_port(address)
|
||||||
server.start()
|
server.start()
|
||||||
|
@ -86,7 +86,12 @@ class BackendServicer(backend_pb2_grpc.BackendServicer):
|
|||||||
return backend_pb2.Result(success=True)
|
return backend_pb2.Result(success=True)
|
||||||
|
|
||||||
def serve(address):
|
def serve(address):
|
||||||
server = grpc.server(futures.ThreadPoolExecutor(max_workers=MAX_WORKERS))
|
server = grpc.server(futures.ThreadPoolExecutor(max_workers=MAX_WORKERS),
|
||||||
|
options=[
|
||||||
|
('grpc.max_message_length', 50 * 1024 * 1024), # 50MB
|
||||||
|
('grpc.max_send_message_length', 50 * 1024 * 1024), # 50MB
|
||||||
|
('grpc.max_receive_message_length', 50 * 1024 * 1024), # 50MB
|
||||||
|
])
|
||||||
backend_pb2_grpc.add_BackendServicer_to_server(BackendServicer(), server)
|
backend_pb2_grpc.add_BackendServicer_to_server(BackendServicer(), server)
|
||||||
server.add_insecure_port(address)
|
server.add_insecure_port(address)
|
||||||
server.start()
|
server.start()
|
||||||
|
@ -522,7 +522,12 @@ class BackendServicer(backend_pb2_grpc.BackendServicer):
|
|||||||
|
|
||||||
|
|
||||||
def serve(address):
|
def serve(address):
|
||||||
server = grpc.server(futures.ThreadPoolExecutor(max_workers=MAX_WORKERS))
|
server = grpc.server(futures.ThreadPoolExecutor(max_workers=MAX_WORKERS),
|
||||||
|
options=[
|
||||||
|
('grpc.max_message_length', 50 * 1024 * 1024), # 50MB
|
||||||
|
('grpc.max_send_message_length', 50 * 1024 * 1024), # 50MB
|
||||||
|
('grpc.max_receive_message_length', 50 * 1024 * 1024), # 50MB
|
||||||
|
])
|
||||||
backend_pb2_grpc.add_BackendServicer_to_server(BackendServicer(), server)
|
backend_pb2_grpc.add_BackendServicer_to_server(BackendServicer(), server)
|
||||||
server.add_insecure_port(address)
|
server.add_insecure_port(address)
|
||||||
server.start()
|
server.start()
|
||||||
|
@ -105,7 +105,12 @@ class BackendServicer(backend_pb2_grpc.BackendServicer):
|
|||||||
|
|
||||||
|
|
||||||
def serve(address):
|
def serve(address):
|
||||||
server = grpc.server(futures.ThreadPoolExecutor(max_workers=MAX_WORKERS))
|
server = grpc.server(futures.ThreadPoolExecutor(max_workers=MAX_WORKERS),
|
||||||
|
options=[
|
||||||
|
('grpc.max_message_length', 50 * 1024 * 1024), # 50MB
|
||||||
|
('grpc.max_send_message_length', 50 * 1024 * 1024), # 50MB
|
||||||
|
('grpc.max_receive_message_length', 50 * 1024 * 1024), # 50MB
|
||||||
|
])
|
||||||
backend_pb2_grpc.add_BackendServicer_to_server(BackendServicer(), server)
|
backend_pb2_grpc.add_BackendServicer_to_server(BackendServicer(), server)
|
||||||
server.add_insecure_port(address)
|
server.add_insecure_port(address)
|
||||||
server.start()
|
server.start()
|
||||||
|
@ -62,7 +62,12 @@ class BackendServicer(backend_pb2_grpc.BackendServicer):
|
|||||||
return backend_pb2.TranscriptResult(segments=resultSegments, text=text)
|
return backend_pb2.TranscriptResult(segments=resultSegments, text=text)
|
||||||
|
|
||||||
def serve(address):
|
def serve(address):
|
||||||
server = grpc.server(futures.ThreadPoolExecutor(max_workers=MAX_WORKERS))
|
server = grpc.server(futures.ThreadPoolExecutor(max_workers=MAX_WORKERS),
|
||||||
|
options=[
|
||||||
|
('grpc.max_message_length', 50 * 1024 * 1024), # 50MB
|
||||||
|
('grpc.max_send_message_length', 50 * 1024 * 1024), # 50MB
|
||||||
|
('grpc.max_receive_message_length', 50 * 1024 * 1024), # 50MB
|
||||||
|
])
|
||||||
backend_pb2_grpc.add_BackendServicer_to_server(BackendServicer(), server)
|
backend_pb2_grpc.add_BackendServicer_to_server(BackendServicer(), server)
|
||||||
server.add_insecure_port(address)
|
server.add_insecure_port(address)
|
||||||
server.start()
|
server.start()
|
||||||
|
@ -99,7 +99,12 @@ class BackendServicer(backend_pb2_grpc.BackendServicer):
|
|||||||
return backend_pb2.Result(success=True)
|
return backend_pb2.Result(success=True)
|
||||||
|
|
||||||
def serve(address):
|
def serve(address):
|
||||||
server = grpc.server(futures.ThreadPoolExecutor(max_workers=MAX_WORKERS))
|
server = grpc.server(futures.ThreadPoolExecutor(max_workers=MAX_WORKERS),
|
||||||
|
options=[
|
||||||
|
('grpc.max_message_length', 50 * 1024 * 1024), # 50MB
|
||||||
|
('grpc.max_send_message_length', 50 * 1024 * 1024), # 50MB
|
||||||
|
('grpc.max_receive_message_length', 50 * 1024 * 1024), # 50MB
|
||||||
|
])
|
||||||
backend_pb2_grpc.add_BackendServicer_to_server(BackendServicer(), server)
|
backend_pb2_grpc.add_BackendServicer_to_server(BackendServicer(), server)
|
||||||
server.add_insecure_port(address)
|
server.add_insecure_port(address)
|
||||||
server.start()
|
server.start()
|
||||||
|
@ -91,7 +91,12 @@ class BackendServicer(backend_pb2_grpc.BackendServicer):
|
|||||||
return backend_pb2.RerankResult(usage=usage, results=results)
|
return backend_pb2.RerankResult(usage=usage, results=results)
|
||||||
|
|
||||||
def serve(address):
|
def serve(address):
|
||||||
server = grpc.server(futures.ThreadPoolExecutor(max_workers=MAX_WORKERS))
|
server = grpc.server(futures.ThreadPoolExecutor(max_workers=MAX_WORKERS),
|
||||||
|
options=[
|
||||||
|
('grpc.max_message_length', 50 * 1024 * 1024), # 50MB
|
||||||
|
('grpc.max_send_message_length', 50 * 1024 * 1024), # 50MB
|
||||||
|
('grpc.max_receive_message_length', 50 * 1024 * 1024), # 50MB
|
||||||
|
])
|
||||||
backend_pb2_grpc.add_BackendServicer_to_server(BackendServicer(), server)
|
backend_pb2_grpc.add_BackendServicer_to_server(BackendServicer(), server)
|
||||||
server.add_insecure_port(address)
|
server.add_insecure_port(address)
|
||||||
server.start()
|
server.start()
|
||||||
|
@ -559,7 +559,12 @@ class BackendServicer(backend_pb2_grpc.BackendServicer):
|
|||||||
|
|
||||||
async def serve(address):
|
async def serve(address):
|
||||||
# Start asyncio gRPC server
|
# Start asyncio gRPC server
|
||||||
server = grpc.aio.server(migration_thread_pool=futures.ThreadPoolExecutor(max_workers=MAX_WORKERS))
|
server = grpc.aio.server(migration_thread_pool=futures.ThreadPoolExecutor(max_workers=MAX_WORKERS),
|
||||||
|
options=[
|
||||||
|
('grpc.max_message_length', 50 * 1024 * 1024), # 50MB
|
||||||
|
('grpc.max_send_message_length', 50 * 1024 * 1024), # 50MB
|
||||||
|
('grpc.max_receive_message_length', 50 * 1024 * 1024), # 50MB
|
||||||
|
])
|
||||||
# Add the servicer to the server
|
# Add the servicer to the server
|
||||||
backend_pb2_grpc.add_BackendServicer_to_server(BackendServicer(), server)
|
backend_pb2_grpc.add_BackendServicer_to_server(BackendServicer(), server)
|
||||||
# Bind the server to the address
|
# Bind the server to the address
|
||||||
|
@ -320,7 +320,12 @@ class BackendServicer(backend_pb2_grpc.BackendServicer):
|
|||||||
|
|
||||||
async def serve(address):
|
async def serve(address):
|
||||||
# Start asyncio gRPC server
|
# Start asyncio gRPC server
|
||||||
server = grpc.aio.server(migration_thread_pool=futures.ThreadPoolExecutor(max_workers=MAX_WORKERS))
|
server = grpc.aio.server(migration_thread_pool=futures.ThreadPoolExecutor(max_workers=MAX_WORKERS),
|
||||||
|
options=[
|
||||||
|
('grpc.max_message_length', 50 * 1024 * 1024), # 50MB
|
||||||
|
('grpc.max_send_message_length', 50 * 1024 * 1024), # 50MB
|
||||||
|
('grpc.max_receive_message_length', 50 * 1024 * 1024), # 50MB
|
||||||
|
])
|
||||||
# Add the servicer to the server
|
# Add the servicer to the server
|
||||||
backend_pb2_grpc.add_BackendServicer_to_server(BackendServicer(), server)
|
backend_pb2_grpc.add_BackendServicer_to_server(BackendServicer(), server)
|
||||||
# Bind the server to the address
|
# Bind the server to the address
|
||||||
|
@ -57,7 +57,11 @@ func (c *Client) HealthCheck(ctx context.Context) (bool, error) {
|
|||||||
}
|
}
|
||||||
c.setBusy(true)
|
c.setBusy(true)
|
||||||
defer c.setBusy(false)
|
defer c.setBusy(false)
|
||||||
conn, err := grpc.Dial(c.address, grpc.WithTransportCredentials(insecure.NewCredentials()))
|
conn, err := grpc.Dial(c.address, grpc.WithTransportCredentials(insecure.NewCredentials()),
|
||||||
|
grpc.WithDefaultCallOptions(
|
||||||
|
grpc.MaxCallRecvMsgSize(50*1024*1024), // 50MB
|
||||||
|
grpc.MaxCallSendMsgSize(50*1024*1024), // 50MB
|
||||||
|
))
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return false, err
|
return false, err
|
||||||
}
|
}
|
||||||
@ -89,7 +93,11 @@ func (c *Client) Embeddings(ctx context.Context, in *pb.PredictOptions, opts ...
|
|||||||
defer c.setBusy(false)
|
defer c.setBusy(false)
|
||||||
c.wdMark()
|
c.wdMark()
|
||||||
defer c.wdUnMark()
|
defer c.wdUnMark()
|
||||||
conn, err := grpc.Dial(c.address, grpc.WithTransportCredentials(insecure.NewCredentials()))
|
conn, err := grpc.Dial(c.address, grpc.WithTransportCredentials(insecure.NewCredentials()),
|
||||||
|
grpc.WithDefaultCallOptions(
|
||||||
|
grpc.MaxCallRecvMsgSize(50*1024*1024), // 50MB
|
||||||
|
grpc.MaxCallSendMsgSize(50*1024*1024), // 50MB
|
||||||
|
))
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return nil, err
|
return nil, err
|
||||||
}
|
}
|
||||||
@ -108,7 +116,11 @@ func (c *Client) Predict(ctx context.Context, in *pb.PredictOptions, opts ...grp
|
|||||||
defer c.setBusy(false)
|
defer c.setBusy(false)
|
||||||
c.wdMark()
|
c.wdMark()
|
||||||
defer c.wdUnMark()
|
defer c.wdUnMark()
|
||||||
conn, err := grpc.Dial(c.address, grpc.WithTransportCredentials(insecure.NewCredentials()))
|
conn, err := grpc.Dial(c.address, grpc.WithTransportCredentials(insecure.NewCredentials()),
|
||||||
|
grpc.WithDefaultCallOptions(
|
||||||
|
grpc.MaxCallRecvMsgSize(50*1024*1024), // 50MB
|
||||||
|
grpc.MaxCallSendMsgSize(50*1024*1024), // 50MB
|
||||||
|
))
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return nil, err
|
return nil, err
|
||||||
}
|
}
|
||||||
@ -127,7 +139,11 @@ func (c *Client) LoadModel(ctx context.Context, in *pb.ModelOptions, opts ...grp
|
|||||||
defer c.setBusy(false)
|
defer c.setBusy(false)
|
||||||
c.wdMark()
|
c.wdMark()
|
||||||
defer c.wdUnMark()
|
defer c.wdUnMark()
|
||||||
conn, err := grpc.Dial(c.address, grpc.WithTransportCredentials(insecure.NewCredentials()))
|
conn, err := grpc.Dial(c.address, grpc.WithTransportCredentials(insecure.NewCredentials()),
|
||||||
|
grpc.WithDefaultCallOptions(
|
||||||
|
grpc.MaxCallRecvMsgSize(50*1024*1024), // 50MB
|
||||||
|
grpc.MaxCallSendMsgSize(50*1024*1024), // 50MB
|
||||||
|
))
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return nil, err
|
return nil, err
|
||||||
}
|
}
|
||||||
@ -145,7 +161,11 @@ func (c *Client) PredictStream(ctx context.Context, in *pb.PredictOptions, f fun
|
|||||||
defer c.setBusy(false)
|
defer c.setBusy(false)
|
||||||
c.wdMark()
|
c.wdMark()
|
||||||
defer c.wdUnMark()
|
defer c.wdUnMark()
|
||||||
conn, err := grpc.Dial(c.address, grpc.WithTransportCredentials(insecure.NewCredentials()))
|
conn, err := grpc.Dial(c.address, grpc.WithTransportCredentials(insecure.NewCredentials()),
|
||||||
|
grpc.WithDefaultCallOptions(
|
||||||
|
grpc.MaxCallRecvMsgSize(50*1024*1024), // 50MB
|
||||||
|
grpc.MaxCallSendMsgSize(50*1024*1024), // 50MB
|
||||||
|
))
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
@ -182,7 +202,11 @@ func (c *Client) GenerateImage(ctx context.Context, in *pb.GenerateImageRequest,
|
|||||||
defer c.setBusy(false)
|
defer c.setBusy(false)
|
||||||
c.wdMark()
|
c.wdMark()
|
||||||
defer c.wdUnMark()
|
defer c.wdUnMark()
|
||||||
conn, err := grpc.Dial(c.address, grpc.WithTransportCredentials(insecure.NewCredentials()))
|
conn, err := grpc.Dial(c.address, grpc.WithTransportCredentials(insecure.NewCredentials()),
|
||||||
|
grpc.WithDefaultCallOptions(
|
||||||
|
grpc.MaxCallRecvMsgSize(50*1024*1024), // 50MB
|
||||||
|
grpc.MaxCallSendMsgSize(50*1024*1024), // 50MB
|
||||||
|
))
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return nil, err
|
return nil, err
|
||||||
}
|
}
|
||||||
@ -200,7 +224,11 @@ func (c *Client) TTS(ctx context.Context, in *pb.TTSRequest, opts ...grpc.CallOp
|
|||||||
defer c.setBusy(false)
|
defer c.setBusy(false)
|
||||||
c.wdMark()
|
c.wdMark()
|
||||||
defer c.wdUnMark()
|
defer c.wdUnMark()
|
||||||
conn, err := grpc.Dial(c.address, grpc.WithTransportCredentials(insecure.NewCredentials()))
|
conn, err := grpc.Dial(c.address, grpc.WithTransportCredentials(insecure.NewCredentials()),
|
||||||
|
grpc.WithDefaultCallOptions(
|
||||||
|
grpc.MaxCallRecvMsgSize(50*1024*1024), // 50MB
|
||||||
|
grpc.MaxCallSendMsgSize(50*1024*1024), // 50MB
|
||||||
|
))
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return nil, err
|
return nil, err
|
||||||
}
|
}
|
||||||
@ -218,7 +246,11 @@ func (c *Client) SoundGeneration(ctx context.Context, in *pb.SoundGenerationRequ
|
|||||||
defer c.setBusy(false)
|
defer c.setBusy(false)
|
||||||
c.wdMark()
|
c.wdMark()
|
||||||
defer c.wdUnMark()
|
defer c.wdUnMark()
|
||||||
conn, err := grpc.Dial(c.address, grpc.WithTransportCredentials(insecure.NewCredentials()))
|
conn, err := grpc.Dial(c.address, grpc.WithTransportCredentials(insecure.NewCredentials()),
|
||||||
|
grpc.WithDefaultCallOptions(
|
||||||
|
grpc.MaxCallRecvMsgSize(50*1024*1024), // 50MB
|
||||||
|
grpc.MaxCallSendMsgSize(50*1024*1024), // 50MB
|
||||||
|
))
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return nil, err
|
return nil, err
|
||||||
}
|
}
|
||||||
@ -236,7 +268,11 @@ func (c *Client) AudioTranscription(ctx context.Context, in *pb.TranscriptReques
|
|||||||
defer c.setBusy(false)
|
defer c.setBusy(false)
|
||||||
c.wdMark()
|
c.wdMark()
|
||||||
defer c.wdUnMark()
|
defer c.wdUnMark()
|
||||||
conn, err := grpc.Dial(c.address, grpc.WithTransportCredentials(insecure.NewCredentials()))
|
conn, err := grpc.Dial(c.address, grpc.WithTransportCredentials(insecure.NewCredentials()),
|
||||||
|
grpc.WithDefaultCallOptions(
|
||||||
|
grpc.MaxCallRecvMsgSize(50*1024*1024), // 50MB
|
||||||
|
grpc.MaxCallSendMsgSize(50*1024*1024), // 50MB
|
||||||
|
))
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return nil, err
|
return nil, err
|
||||||
}
|
}
|
||||||
@ -254,7 +290,11 @@ func (c *Client) TokenizeString(ctx context.Context, in *pb.PredictOptions, opts
|
|||||||
defer c.setBusy(false)
|
defer c.setBusy(false)
|
||||||
c.wdMark()
|
c.wdMark()
|
||||||
defer c.wdUnMark()
|
defer c.wdUnMark()
|
||||||
conn, err := grpc.Dial(c.address, grpc.WithTransportCredentials(insecure.NewCredentials()))
|
conn, err := grpc.Dial(c.address, grpc.WithTransportCredentials(insecure.NewCredentials()),
|
||||||
|
grpc.WithDefaultCallOptions(
|
||||||
|
grpc.MaxCallRecvMsgSize(50*1024*1024), // 50MB
|
||||||
|
grpc.MaxCallSendMsgSize(50*1024*1024), // 50MB
|
||||||
|
))
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return nil, err
|
return nil, err
|
||||||
}
|
}
|
||||||
@ -276,7 +316,11 @@ func (c *Client) Status(ctx context.Context) (*pb.StatusResponse, error) {
|
|||||||
}
|
}
|
||||||
c.setBusy(true)
|
c.setBusy(true)
|
||||||
defer c.setBusy(false)
|
defer c.setBusy(false)
|
||||||
conn, err := grpc.Dial(c.address, grpc.WithTransportCredentials(insecure.NewCredentials()))
|
conn, err := grpc.Dial(c.address, grpc.WithTransportCredentials(insecure.NewCredentials()),
|
||||||
|
grpc.WithDefaultCallOptions(
|
||||||
|
grpc.MaxCallRecvMsgSize(50*1024*1024), // 50MB
|
||||||
|
grpc.MaxCallSendMsgSize(50*1024*1024), // 50MB
|
||||||
|
))
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return nil, err
|
return nil, err
|
||||||
}
|
}
|
||||||
@ -294,7 +338,11 @@ func (c *Client) StoresSet(ctx context.Context, in *pb.StoresSetOptions, opts ..
|
|||||||
defer c.setBusy(false)
|
defer c.setBusy(false)
|
||||||
c.wdMark()
|
c.wdMark()
|
||||||
defer c.wdUnMark()
|
defer c.wdUnMark()
|
||||||
conn, err := grpc.Dial(c.address, grpc.WithTransportCredentials(insecure.NewCredentials()))
|
conn, err := grpc.Dial(c.address, grpc.WithTransportCredentials(insecure.NewCredentials()),
|
||||||
|
grpc.WithDefaultCallOptions(
|
||||||
|
grpc.MaxCallRecvMsgSize(50*1024*1024), // 50MB
|
||||||
|
grpc.MaxCallSendMsgSize(50*1024*1024), // 50MB
|
||||||
|
))
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return nil, err
|
return nil, err
|
||||||
}
|
}
|
||||||
@ -312,7 +360,11 @@ func (c *Client) StoresDelete(ctx context.Context, in *pb.StoresDeleteOptions, o
|
|||||||
defer c.wdUnMark()
|
defer c.wdUnMark()
|
||||||
c.setBusy(true)
|
c.setBusy(true)
|
||||||
defer c.setBusy(false)
|
defer c.setBusy(false)
|
||||||
conn, err := grpc.Dial(c.address, grpc.WithTransportCredentials(insecure.NewCredentials()))
|
conn, err := grpc.Dial(c.address, grpc.WithTransportCredentials(insecure.NewCredentials()),
|
||||||
|
grpc.WithDefaultCallOptions(
|
||||||
|
grpc.MaxCallRecvMsgSize(50*1024*1024), // 50MB
|
||||||
|
grpc.MaxCallSendMsgSize(50*1024*1024), // 50MB
|
||||||
|
))
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return nil, err
|
return nil, err
|
||||||
}
|
}
|
||||||
@ -330,7 +382,11 @@ func (c *Client) StoresGet(ctx context.Context, in *pb.StoresGetOptions, opts ..
|
|||||||
defer c.setBusy(false)
|
defer c.setBusy(false)
|
||||||
c.wdMark()
|
c.wdMark()
|
||||||
defer c.wdUnMark()
|
defer c.wdUnMark()
|
||||||
conn, err := grpc.Dial(c.address, grpc.WithTransportCredentials(insecure.NewCredentials()))
|
conn, err := grpc.Dial(c.address, grpc.WithTransportCredentials(insecure.NewCredentials()),
|
||||||
|
grpc.WithDefaultCallOptions(
|
||||||
|
grpc.MaxCallRecvMsgSize(50*1024*1024), // 50MB
|
||||||
|
grpc.MaxCallSendMsgSize(50*1024*1024), // 50MB
|
||||||
|
))
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return nil, err
|
return nil, err
|
||||||
}
|
}
|
||||||
@ -348,7 +404,11 @@ func (c *Client) StoresFind(ctx context.Context, in *pb.StoresFindOptions, opts
|
|||||||
defer c.setBusy(false)
|
defer c.setBusy(false)
|
||||||
c.wdMark()
|
c.wdMark()
|
||||||
defer c.wdUnMark()
|
defer c.wdUnMark()
|
||||||
conn, err := grpc.Dial(c.address, grpc.WithTransportCredentials(insecure.NewCredentials()))
|
conn, err := grpc.Dial(c.address, grpc.WithTransportCredentials(insecure.NewCredentials()),
|
||||||
|
grpc.WithDefaultCallOptions(
|
||||||
|
grpc.MaxCallRecvMsgSize(50*1024*1024), // 50MB
|
||||||
|
grpc.MaxCallSendMsgSize(50*1024*1024), // 50MB
|
||||||
|
))
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return nil, err
|
return nil, err
|
||||||
}
|
}
|
||||||
@ -366,7 +426,11 @@ func (c *Client) Rerank(ctx context.Context, in *pb.RerankRequest, opts ...grpc.
|
|||||||
defer c.setBusy(false)
|
defer c.setBusy(false)
|
||||||
c.wdMark()
|
c.wdMark()
|
||||||
defer c.wdUnMark()
|
defer c.wdUnMark()
|
||||||
conn, err := grpc.Dial(c.address, grpc.WithTransportCredentials(insecure.NewCredentials()))
|
conn, err := grpc.Dial(c.address, grpc.WithTransportCredentials(insecure.NewCredentials()),
|
||||||
|
grpc.WithDefaultCallOptions(
|
||||||
|
grpc.MaxCallRecvMsgSize(50*1024*1024), // 50MB
|
||||||
|
grpc.MaxCallSendMsgSize(50*1024*1024), // 50MB
|
||||||
|
))
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return nil, err
|
return nil, err
|
||||||
}
|
}
|
||||||
@ -384,7 +448,11 @@ func (c *Client) GetTokenMetrics(ctx context.Context, in *pb.MetricsRequest, opt
|
|||||||
defer c.setBusy(false)
|
defer c.setBusy(false)
|
||||||
c.wdMark()
|
c.wdMark()
|
||||||
defer c.wdUnMark()
|
defer c.wdUnMark()
|
||||||
conn, err := grpc.Dial(c.address, grpc.WithTransportCredentials(insecure.NewCredentials()))
|
conn, err := grpc.Dial(c.address, grpc.WithTransportCredentials(insecure.NewCredentials()),
|
||||||
|
grpc.WithDefaultCallOptions(
|
||||||
|
grpc.MaxCallRecvMsgSize(50*1024*1024), // 50MB
|
||||||
|
grpc.MaxCallSendMsgSize(50*1024*1024), // 50MB
|
||||||
|
))
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return nil, err
|
return nil, err
|
||||||
}
|
}
|
||||||
@ -402,7 +470,11 @@ func (c *Client) VAD(ctx context.Context, in *pb.VADRequest, opts ...grpc.CallOp
|
|||||||
defer c.setBusy(false)
|
defer c.setBusy(false)
|
||||||
c.wdMark()
|
c.wdMark()
|
||||||
defer c.wdUnMark()
|
defer c.wdUnMark()
|
||||||
conn, err := grpc.Dial(c.address, grpc.WithTransportCredentials(insecure.NewCredentials()))
|
conn, err := grpc.Dial(c.address, grpc.WithTransportCredentials(insecure.NewCredentials()),
|
||||||
|
grpc.WithDefaultCallOptions(
|
||||||
|
grpc.MaxCallRecvMsgSize(50*1024*1024), // 50MB
|
||||||
|
grpc.MaxCallSendMsgSize(50*1024*1024), // 50MB
|
||||||
|
))
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return nil, err
|
return nil, err
|
||||||
}
|
}
|
||||||
|
@ -244,7 +244,10 @@ func StartServer(address string, model LLM) error {
|
|||||||
if err != nil {
|
if err != nil {
|
||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
s := grpc.NewServer()
|
s := grpc.NewServer(
|
||||||
|
grpc.MaxRecvMsgSize(50*1024*1024), // 50MB
|
||||||
|
grpc.MaxSendMsgSize(50*1024*1024), // 50MB
|
||||||
|
)
|
||||||
pb.RegisterBackendServer(s, &server{llm: model})
|
pb.RegisterBackendServer(s, &server{llm: model})
|
||||||
log.Printf("gRPC Server listening at %v", lis.Addr())
|
log.Printf("gRPC Server listening at %v", lis.Addr())
|
||||||
if err := s.Serve(lis); err != nil {
|
if err := s.Serve(lis); err != nil {
|
||||||
@ -259,7 +262,10 @@ func RunServer(address string, model LLM) (func() error, error) {
|
|||||||
if err != nil {
|
if err != nil {
|
||||||
return nil, err
|
return nil, err
|
||||||
}
|
}
|
||||||
s := grpc.NewServer()
|
s := grpc.NewServer(
|
||||||
|
grpc.MaxRecvMsgSize(50*1024*1024), // 50MB
|
||||||
|
grpc.MaxSendMsgSize(50*1024*1024), // 50MB
|
||||||
|
)
|
||||||
pb.RegisterBackendServer(s, &server{llm: model})
|
pb.RegisterBackendServer(s, &server{llm: model})
|
||||||
log.Printf("gRPC Server listening at %v", lis.Addr())
|
log.Printf("gRPC Server listening at %v", lis.Addr())
|
||||||
if err = s.Serve(lis); err != nil {
|
if err = s.Serve(lis); err != nil {
|
||||||
|
Loading…
x
Reference in New Issue
Block a user