mirror of
https://github.com/ggerganov/whisper.cpp.git
synced 2025-05-09 20:13:14 +00:00
rpc : use backend registry, support dl backends (llama/13304)
This commit is contained in:
parent
e1bdd148c5
commit
1e1fa27add
@ -11,24 +11,26 @@
|
|||||||
#include <vector>
|
#include <vector>
|
||||||
|
|
||||||
#ifdef GGML_USE_CPU_HBM
|
#ifdef GGML_USE_CPU_HBM
|
||||||
#include "ggml-cpu-hbm.h"
|
# include "ggml-cpu-hbm.h"
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#ifdef GGML_USE_CPU_KLEIDIAI
|
#ifdef GGML_USE_CPU_KLEIDIAI
|
||||||
#include "kleidiai/kleidiai.h"
|
# include "kleidiai/kleidiai.h"
|
||||||
#endif
|
|
||||||
|
|
||||||
#if defined(__APPLE__)
|
|
||||||
#include <sys/types.h>
|
|
||||||
#include <sys/sysctl.h>
|
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#if defined(_WIN32)
|
#if defined(_WIN32)
|
||||||
#define WIN32_LEAN_AND_MEAN
|
# define WIN32_LEAN_AND_MEAN
|
||||||
#ifndef NOMINMAX
|
# ifndef NOMINMAX
|
||||||
#define NOMINMAX
|
# define NOMINMAX
|
||||||
|
# endif
|
||||||
|
# include <windows.h>
|
||||||
|
#else
|
||||||
|
# include <unistd.h>
|
||||||
#endif
|
#endif
|
||||||
#include <windows.h>
|
|
||||||
|
#if defined(__APPLE__)
|
||||||
|
# include <sys/sysctl.h>
|
||||||
|
# include <sys/types.h>
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
// ggml-backend interface
|
// ggml-backend interface
|
||||||
@ -70,8 +72,10 @@ static ggml_backend_buffer_type_t * ggml_backend_cpu_device_get_extra_buffers_ty
|
|||||||
}
|
}
|
||||||
|
|
||||||
static bool ggml_backend_cpu_is_extra_buffer_type(ggml_backend_buffer_type_t buft) {
|
static bool ggml_backend_cpu_is_extra_buffer_type(ggml_backend_buffer_type_t buft) {
|
||||||
for (auto extra : ggml_backend_cpu_get_extra_buffers_type()) {
|
for (auto * extra : ggml_backend_cpu_get_extra_buffers_type()) {
|
||||||
if (extra && extra == buft) return true;
|
if (extra && extra == buft) {
|
||||||
|
return true;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
@ -330,9 +334,18 @@ static const char * ggml_backend_cpu_device_get_description(ggml_backend_dev_t d
|
|||||||
}
|
}
|
||||||
|
|
||||||
static void ggml_backend_cpu_device_get_memory(ggml_backend_dev_t dev, size_t * free, size_t * total) {
|
static void ggml_backend_cpu_device_get_memory(ggml_backend_dev_t dev, size_t * free, size_t * total) {
|
||||||
// TODO
|
#ifdef _WIN32
|
||||||
*free = 0;
|
MEMORYSTATUSEX status;
|
||||||
*total = 0;
|
status.dwLength = sizeof(status);
|
||||||
|
GlobalMemoryStatusEx(&status);
|
||||||
|
*total = status.ullTotalPhys;
|
||||||
|
*free = status.ullAvailPhys;
|
||||||
|
#else
|
||||||
|
long pages = sysconf(_SC_PHYS_PAGES);
|
||||||
|
long page_size = sysconf(_SC_PAGE_SIZE);
|
||||||
|
*total = pages * page_size;
|
||||||
|
*free = *total;
|
||||||
|
#endif
|
||||||
|
|
||||||
GGML_UNUSED(dev);
|
GGML_UNUSED(dev);
|
||||||
}
|
}
|
||||||
|
@ -1594,6 +1594,14 @@ static void rpc_serve_client(ggml_backend_t backend, const char * cache_dir,
|
|||||||
void ggml_backend_rpc_start_server(ggml_backend_t backend, const char * endpoint,
|
void ggml_backend_rpc_start_server(ggml_backend_t backend, const char * endpoint,
|
||||||
const char * cache_dir,
|
const char * cache_dir,
|
||||||
size_t free_mem, size_t total_mem) {
|
size_t free_mem, size_t total_mem) {
|
||||||
|
printf("Starting RPC server v%d.%d.%d\n",
|
||||||
|
RPC_PROTO_MAJOR_VERSION,
|
||||||
|
RPC_PROTO_MINOR_VERSION,
|
||||||
|
RPC_PROTO_PATCH_VERSION);
|
||||||
|
printf(" endpoint : %s\n", endpoint);
|
||||||
|
printf(" local cache : %s\n", cache_dir ? cache_dir : "n/a");
|
||||||
|
printf(" backend memory : %zu MB\n", free_mem / (1024 * 1024));
|
||||||
|
|
||||||
std::string host;
|
std::string host;
|
||||||
int port;
|
int port;
|
||||||
if (!parse_endpoint(endpoint, host, port)) {
|
if (!parse_endpoint(endpoint, host, port)) {
|
||||||
@ -1753,6 +1761,9 @@ static void * ggml_backend_rpc_get_proc_address(ggml_backend_reg_t reg, const ch
|
|||||||
if (std::strcmp(name, "ggml_backend_rpc_add_device") == 0) {
|
if (std::strcmp(name, "ggml_backend_rpc_add_device") == 0) {
|
||||||
return (void *)ggml_backend_rpc_add_device;
|
return (void *)ggml_backend_rpc_add_device;
|
||||||
}
|
}
|
||||||
|
if (std::strcmp(name, "ggml_backend_rpc_start_server") == 0) {
|
||||||
|
return (void *)ggml_backend_rpc_start_server;
|
||||||
|
}
|
||||||
return NULL;
|
return NULL;
|
||||||
|
|
||||||
GGML_UNUSED(reg);
|
GGML_UNUSED(reg);
|
||||||
|
Loading…
x
Reference in New Issue
Block a user