From e27c91f6d63f7d94733a36ca212a66a50026d049 Mon Sep 17 00:00:00 2001 From: Radoslav Gerganov Date: Fri, 9 May 2025 10:31:07 +0300 Subject: [PATCH] rpc : add rpc_msg_set_tensor_hash_req (llama/13353) * rpc : add rpc_msg_set_tensor_hash_req Use a dedicated struct for the request of RPC_CMD_SET_TENSOR_HASH which makes the code cleaner. * fix --- ggml/src/ggml-rpc/ggml-rpc.cpp | 52 ++++++++++++++++------------------ 1 file changed, 25 insertions(+), 27 deletions(-) diff --git a/ggml/src/ggml-rpc/ggml-rpc.cpp b/ggml/src/ggml-rpc/ggml-rpc.cpp index 039214dc..4f0abb5a 100644 --- a/ggml/src/ggml-rpc/ggml-rpc.cpp +++ b/ggml/src/ggml-rpc/ggml-rpc.cpp @@ -151,6 +151,12 @@ struct rpc_msg_buffer_clear_req { uint8_t value; }; +struct rpc_msg_set_tensor_hash_req { + rpc_tensor tensor; + uint64_t offset; + uint64_t hash; +}; + struct rpc_msg_set_tensor_hash_rsp { uint8_t result; }; @@ -548,15 +554,12 @@ static void ggml_backend_rpc_buffer_set_tensor(ggml_backend_buffer_t buffer, ggm ggml_backend_rpc_buffer_context * ctx = (ggml_backend_rpc_buffer_context *)buffer->context; rpc_tensor rpc_tensor = serialize_tensor(tensor); if (size > HASH_THRESHOLD) { - // input serialization format: | rpc_tensor | offset (8 bytes) | hash (8 bytes) - size_t input_size = sizeof(rpc_tensor) + sizeof(uint64_t) + sizeof(uint64_t); - std::vector input(input_size, 0); - uint64_t hash = fnv_hash((const uint8_t*)data, size); - memcpy(input.data(), &rpc_tensor, sizeof(rpc_tensor)); - memcpy(input.data() + sizeof(rpc_tensor), &offset, sizeof(offset)); - memcpy(input.data() + sizeof(rpc_tensor) + sizeof(offset), &hash, sizeof(hash)); + rpc_msg_set_tensor_hash_req request; + request.tensor = rpc_tensor; + request.offset = offset; + request.hash = fnv_hash((const uint8_t*)data, size); rpc_msg_set_tensor_hash_rsp response; - bool status = send_rpc_cmd(ctx->sock, RPC_CMD_SET_TENSOR_HASH, input.data(), input.size(), &response, sizeof(response)); + bool status = send_rpc_cmd(ctx->sock, RPC_CMD_SET_TENSOR_HASH, &request, sizeof(request), &response, sizeof(response)); GGML_ASSERT(status); if (response.result) { // the server has the same data, no need to send it @@ -864,7 +867,7 @@ public: bool free_buffer(const rpc_msg_free_buffer_req & request); bool buffer_clear(const rpc_msg_buffer_clear_req & request); bool set_tensor(const std::vector & input); - bool set_tensor_hash(const std::vector & input, rpc_msg_set_tensor_hash_rsp & response); + bool set_tensor_hash(const rpc_msg_set_tensor_hash_req & request, rpc_msg_set_tensor_hash_rsp & response); bool get_tensor(const rpc_msg_get_tensor_req & request, std::vector & response); bool copy_tensor(const rpc_msg_copy_tensor_req & request, rpc_msg_copy_tensor_rsp & response); bool graph_compute(const std::vector & input, rpc_msg_graph_compute_rsp & response); @@ -1101,18 +1104,10 @@ bool rpc_server::get_cached_file(uint64_t hash, std::vector & data) { return true; } -bool rpc_server::set_tensor_hash(const std::vector & input, rpc_msg_set_tensor_hash_rsp & response) +bool rpc_server::set_tensor_hash(const rpc_msg_set_tensor_hash_req & request, rpc_msg_set_tensor_hash_rsp & response) { - // serialization format: | rpc_tensor | offset (8 bytes) | hash (8 bytes) | - if (input.size() != sizeof(rpc_tensor) + 16) { - return false; - } - const rpc_tensor * in_tensor = (const rpc_tensor *)input.data(); - uint64_t offset; - memcpy(&offset, input.data() + sizeof(rpc_tensor), sizeof(offset)); - const uint64_t * hash = (const uint64_t *)(input.data() + sizeof(rpc_tensor) + sizeof(offset)); std::vector cached_file; - if (!get_cached_file(*hash, cached_file)) { + if (!get_cached_file(request.hash, cached_file)) { response.result = 0; return true; } @@ -1125,25 +1120,28 @@ bool rpc_server::set_tensor_hash(const std::vector & input, rpc_msg_set ggml_context_ptr ctx_ptr { ggml_init(params) }; GGML_ASSERT(ctx_ptr != nullptr); ggml_context * ctx = ctx_ptr.get(); - ggml_tensor * tensor = deserialize_tensor(ctx, in_tensor); + ggml_tensor * tensor = deserialize_tensor(ctx, &request.tensor); if (tensor == nullptr) { GGML_LOG_ERROR("[%s] error deserializing tensor\n", __func__); return false; } - GGML_PRINT_DEBUG("[%s] buffer: %p, data: %p, offset: %" PRIu64 ", size: %zu, hash: %" PRIx64 "\n", __func__, (void*)tensor->buffer, tensor->data, offset, size, *hash); + GGML_PRINT_DEBUG("[%s] buffer: %p, data: %p, offset: %" PRIu64 ", size: %zu, hash: %" PRIx64 "\n", + __func__, (void*)tensor->buffer, tensor->data, request.offset, size, request.hash); // sanitize tensor->data { const size_t p0 = (size_t) ggml_backend_buffer_get_base(tensor->buffer); const size_t p1 = p0 + ggml_backend_buffer_get_size(tensor->buffer); - if (in_tensor->data + offset < p0 || in_tensor->data + offset >= p1 || size > (p1 - in_tensor->data - offset)) { + if (request.tensor.data + request.offset < p0 + || request.tensor.data + request.offset >= p1 + || size > (p1 - request.tensor.data - request.offset)) { GGML_LOG_ERROR("[%s] tensor data region (data=0x%" PRIx64 ", offset=%" PRIu64 ", size=%zu, hash=0x%" PRIx64 ") out of buffer bounds [0x%zx, 0x%zx)\n", - __func__, in_tensor->data, offset, size, *hash, p0, p1); + __func__, request.tensor.data, request.offset, size, request.hash, p0, p1); return false; } } - ggml_backend_tensor_set(tensor, cached_file.data(), offset, size); + ggml_backend_tensor_set(tensor, cached_file.data(), request.offset, size); response.result = 1; return true; } @@ -1503,12 +1501,12 @@ static void rpc_serve_client(ggml_backend_t backend, const char * cache_dir, break; } case RPC_CMD_SET_TENSOR_HASH: { - std::vector input; - if (!recv_msg(sockfd, input)) { + rpc_msg_set_tensor_hash_req request; + if (!recv_msg(sockfd, &request, sizeof(request))) { return; } rpc_msg_set_tensor_hash_rsp response; - if (!server.set_tensor_hash(input, response)) { + if (!server.set_tensor_hash(request, response)) { return; } if (!send_msg(sockfd, &response, sizeof(response))) {