ggml : allocate contexts on the heap (v2)

This commit is contained in:
Georgi Gerganov 2024-10-31 12:46:20 +02:00
parent 3689d49b81
commit 987f3145d0
No known key found for this signature in database
GPG Key ID: 449E073F9DC10735
3 changed files with 25 additions and 53 deletions

View File

@ -217,7 +217,6 @@
#define GGML_MAX_DIMS 4 #define GGML_MAX_DIMS 4
#define GGML_MAX_PARAMS 2048 #define GGML_MAX_PARAMS 2048
#define GGML_MAX_CONTEXTS 64
#define GGML_MAX_SRC 10 #define GGML_MAX_SRC 10
#define GGML_MAX_N_THREADS 512 #define GGML_MAX_N_THREADS 512
#define GGML_MAX_OP_PARAMS 64 #define GGML_MAX_OP_PARAMS 64
@ -657,6 +656,7 @@ extern "C" {
}; };
// scratch buffer // scratch buffer
// TODO: deprecate and remove
struct ggml_scratch { struct ggml_scratch {
size_t offs; size_t offs;
size_t size; size_t size;
@ -761,6 +761,7 @@ extern "C" {
// main // main
GGML_API struct ggml_context * ggml_init (struct ggml_init_params params); GGML_API struct ggml_context * ggml_init (struct ggml_init_params params);
GGML_API void ggml_reset(struct ggml_context * ctx);
GGML_API void ggml_free (struct ggml_context * ctx); GGML_API void ggml_free (struct ggml_context * ctx);
GGML_API size_t ggml_used_mem(const struct ggml_context * ctx); GGML_API size_t ggml_used_mem(const struct ggml_context * ctx);

View File

@ -3129,7 +3129,7 @@ static enum ggml_status ggml_metal_graph_compute(
// default buffer // default buffer
static id<MTLDevice> g_backend_device = nil; static id<MTLDevice> g_backend_device = nil;
static int g_backend_device_ref_count = 0; static int g_backend_device_ref_count = 0; // TODO: make thread-safe
static id<MTLDevice> ggml_backend_metal_get_device(void) { static id<MTLDevice> ggml_backend_metal_get_device(void) {
if (g_backend_device == nil) { if (g_backend_device == nil) {

View File

@ -308,6 +308,7 @@ void ggml_abort(const char * file, int line, const char * fmt, ...) {
} }
#define GGML_DEBUG 0 #define GGML_DEBUG 0
#define GGML_GELU_FP16 #define GGML_GELU_FP16
#define GGML_GELU_QUICK_FP16 #define GGML_GELU_QUICK_FP16
@ -3234,7 +3235,6 @@ struct ggml_numa_nodes {
// //
struct ggml_state { struct ggml_state {
struct ggml_context_container contexts[GGML_MAX_CONTEXTS];
struct ggml_numa_nodes numa; struct ggml_numa_nodes numa;
}; };
@ -3816,17 +3816,12 @@ struct ggml_context * ggml_init(struct ggml_init_params params) {
const uint64_t t_start = ggml_time_us(); UNUSED(t_start); const uint64_t t_start = ggml_time_us(); UNUSED(t_start);
g_state = (struct ggml_state) { g_state = (struct ggml_state) {
/*.contexts =*/ { { 0 } },
/*.numa =*/ { /*.numa =*/ {
.n_nodes = 0, .n_nodes = 0,
.total_cpus = 0, .total_cpus = 0,
}, },
}; };
for (int i = 0; i < GGML_MAX_CONTEXTS; ++i) {
g_state.contexts[i].used = false;
}
const uint64_t t_end = ggml_time_us(); UNUSED(t_end); const uint64_t t_end = ggml_time_us(); UNUSED(t_end);
GGML_PRINT_DEBUG("%s: g_state initialized in %f ms\n", __func__, (t_end - t_start)/1000.0f); GGML_PRINT_DEBUG("%s: g_state initialized in %f ms\n", __func__, (t_end - t_start)/1000.0f);
@ -3839,24 +3834,11 @@ struct ggml_context * ggml_init(struct ggml_init_params params) {
is_first_call = false; is_first_call = false;
} }
// find non-used context in g_state
struct ggml_context * ctx = NULL;
for (int i = 0; i < GGML_MAX_CONTEXTS; i++) {
if (!g_state.contexts[i].used) {
g_state.contexts[i].used = true;
ctx = &g_state.contexts[i].context;
GGML_PRINT_DEBUG("%s: found unused context %d\n", __func__, i);
break;
}
}
if (ctx == NULL) {
GGML_LOG_ERROR("%s: ran out of contexts (max = %d)\n", __func__, GGML_MAX_CONTEXTS);
ggml_critical_section_end(); ggml_critical_section_end();
struct ggml_context * ctx = GGML_ALIGNED_MALLOC(sizeof(struct ggml_context));
if (ctx == NULL) {
GGML_LOG_ERROR("%s: failed to allocate ggml_context\n", __func__);
return NULL; return NULL;
} }
@ -3886,42 +3868,31 @@ struct ggml_context * ggml_init(struct ggml_init_params params) {
GGML_PRINT_DEBUG("%s: context initialized\n", __func__); GGML_PRINT_DEBUG("%s: context initialized\n", __func__);
ggml_critical_section_end();
return ctx; return ctx;
} }
void ggml_reset(struct ggml_context * ctx) {
if (ctx == NULL) {
return;
}
ctx->n_objects = 0;
ctx->objects_begin = NULL;
ctx->objects_end = NULL;
ctx->scratch = (struct ggml_scratch) { 0, 0, NULL, };
ctx->scratch_save = (struct ggml_scratch) { 0, 0, NULL, };
}
void ggml_free(struct ggml_context * ctx) { void ggml_free(struct ggml_context * ctx) {
if (ctx == NULL) { if (ctx == NULL) {
return; return;
} }
// make this function thread safe
ggml_critical_section_start();
bool found = false;
for (int i = 0; i < GGML_MAX_CONTEXTS; i++) {
if (&g_state.contexts[i].context == ctx) {
g_state.contexts[i].used = false;
GGML_PRINT_DEBUG("%s: context %d has been freed. memory used = %zu\n",
__func__, i, ggml_used_mem(ctx));
if (ctx->mem_buffer_owned) { if (ctx->mem_buffer_owned) {
GGML_ALIGNED_FREE(ctx->mem_buffer); GGML_ALIGNED_FREE(ctx->mem_buffer);
} }
found = true; GGML_ALIGNED_FREE(ctx);
break;
}
}
if (!found) {
GGML_PRINT_DEBUG("%s: context not found\n", __func__);
}
ggml_critical_section_end();
} }
size_t ggml_used_mem(const struct ggml_context * ctx) { size_t ggml_used_mem(const struct ggml_context * ctx) {