rpc : use backend registry, support dl backends (llama/13304)

This commit is contained in:
Diego Devesa 2025-05-04 21:25:43 +02:00 committed by Georgi Gerganov
parent e1bdd148c5
commit 1e1fa27add
2 changed files with 40 additions and 16 deletions

View File

@ -11,24 +11,26 @@
#include <vector> #include <vector>
#ifdef GGML_USE_CPU_HBM #ifdef GGML_USE_CPU_HBM
#include "ggml-cpu-hbm.h" # include "ggml-cpu-hbm.h"
#endif #endif
#ifdef GGML_USE_CPU_KLEIDIAI #ifdef GGML_USE_CPU_KLEIDIAI
#include "kleidiai/kleidiai.h" # include "kleidiai/kleidiai.h"
#endif
#if defined(__APPLE__)
#include <sys/types.h>
#include <sys/sysctl.h>
#endif #endif
#if defined(_WIN32) #if defined(_WIN32)
#define WIN32_LEAN_AND_MEAN # define WIN32_LEAN_AND_MEAN
#ifndef NOMINMAX # ifndef NOMINMAX
#define NOMINMAX # define NOMINMAX
# endif
# include <windows.h>
#else
# include <unistd.h>
#endif #endif
#include <windows.h>
#if defined(__APPLE__)
# include <sys/sysctl.h>
# include <sys/types.h>
#endif #endif
// ggml-backend interface // ggml-backend interface
@ -70,8 +72,10 @@ static ggml_backend_buffer_type_t * ggml_backend_cpu_device_get_extra_buffers_ty
} }
static bool ggml_backend_cpu_is_extra_buffer_type(ggml_backend_buffer_type_t buft) { static bool ggml_backend_cpu_is_extra_buffer_type(ggml_backend_buffer_type_t buft) {
for (auto extra : ggml_backend_cpu_get_extra_buffers_type()) { for (auto * extra : ggml_backend_cpu_get_extra_buffers_type()) {
if (extra && extra == buft) return true; if (extra && extra == buft) {
return true;
}
} }
return false; return false;
} }
@ -330,9 +334,18 @@ static const char * ggml_backend_cpu_device_get_description(ggml_backend_dev_t d
} }
static void ggml_backend_cpu_device_get_memory(ggml_backend_dev_t dev, size_t * free, size_t * total) { static void ggml_backend_cpu_device_get_memory(ggml_backend_dev_t dev, size_t * free, size_t * total) {
// TODO #ifdef _WIN32
*free = 0; MEMORYSTATUSEX status;
*total = 0; status.dwLength = sizeof(status);
GlobalMemoryStatusEx(&status);
*total = status.ullTotalPhys;
*free = status.ullAvailPhys;
#else
long pages = sysconf(_SC_PHYS_PAGES);
long page_size = sysconf(_SC_PAGE_SIZE);
*total = pages * page_size;
*free = *total;
#endif
GGML_UNUSED(dev); GGML_UNUSED(dev);
} }

View File

@ -1594,6 +1594,14 @@ static void rpc_serve_client(ggml_backend_t backend, const char * cache_dir,
void ggml_backend_rpc_start_server(ggml_backend_t backend, const char * endpoint, void ggml_backend_rpc_start_server(ggml_backend_t backend, const char * endpoint,
const char * cache_dir, const char * cache_dir,
size_t free_mem, size_t total_mem) { size_t free_mem, size_t total_mem) {
printf("Starting RPC server v%d.%d.%d\n",
RPC_PROTO_MAJOR_VERSION,
RPC_PROTO_MINOR_VERSION,
RPC_PROTO_PATCH_VERSION);
printf(" endpoint : %s\n", endpoint);
printf(" local cache : %s\n", cache_dir ? cache_dir : "n/a");
printf(" backend memory : %zu MB\n", free_mem / (1024 * 1024));
std::string host; std::string host;
int port; int port;
if (!parse_endpoint(endpoint, host, port)) { if (!parse_endpoint(endpoint, host, port)) {
@ -1753,6 +1761,9 @@ static void * ggml_backend_rpc_get_proc_address(ggml_backend_reg_t reg, const ch
if (std::strcmp(name, "ggml_backend_rpc_add_device") == 0) { if (std::strcmp(name, "ggml_backend_rpc_add_device") == 0) {
return (void *)ggml_backend_rpc_add_device; return (void *)ggml_backend_rpc_add_device;
} }
if (std::strcmp(name, "ggml_backend_rpc_start_server") == 0) {
return (void *)ggml_backend_rpc_start_server;
}
return NULL; return NULL;
GGML_UNUSED(reg); GGML_UNUSED(reg);