ggml : allocate contexts on the heap (v2)

2025-06-21 16:09:55 +00:00 · 2024-10-31 12:46:20 +02:00
parent 3689d49b81
commit 987f3145d0
3 changed files with 25 additions and 53 deletions
--- a/ggml/include/ggml.h
+++ b/ggml/include/ggml.h
@ -217,7 +217,6 @@

 #define GGML_MAX_DIMS           4
 #define GGML_MAX_PARAMS         2048
-#define GGML_MAX_CONTEXTS       64
 #define GGML_MAX_SRC            10
 #define GGML_MAX_N_THREADS      512
 #define GGML_MAX_OP_PARAMS      64
@ -657,6 +656,7 @@ extern "C" {
    };

    // scratch buffer
+    // TODO: deprecate and remove
    struct ggml_scratch {
        size_t offs;
        size_t size;
@ -760,8 +760,9 @@ extern "C" {

    // main

-    GGML_API struct ggml_context * ggml_init(struct ggml_init_params params);
-    GGML_API void                  ggml_free(struct ggml_context * ctx);
+    GGML_API struct ggml_context * ggml_init (struct ggml_init_params params);
+    GGML_API void                  ggml_reset(struct ggml_context * ctx);
+    GGML_API void                  ggml_free (struct ggml_context * ctx);

    GGML_API size_t  ggml_used_mem(const struct ggml_context * ctx);

--- a/ggml/src/ggml-metal.m
+++ b/ggml/src/ggml-metal.m
@ -3129,7 +3129,7 @@ static enum ggml_status ggml_metal_graph_compute(

 // default buffer
 static id<MTLDevice> g_backend_device = nil;
-static int g_backend_device_ref_count = 0;
+static int g_backend_device_ref_count = 0; // TODO: make thread-safe

 static id<MTLDevice> ggml_backend_metal_get_device(void) {
    if (g_backend_device == nil) {
--- a/ggml/src/ggml.c
+++ b/ggml/src/ggml.c
@ -308,6 +308,7 @@ void ggml_abort(const char * file, int line, const char * fmt, ...) {
 }

 #define GGML_DEBUG 0
+
 #define GGML_GELU_FP16
 #define GGML_GELU_QUICK_FP16

@ -1985,7 +1986,7 @@ static const size_t GGML_OBJECT_SIZE = sizeof(struct ggml_object);

 struct ggml_context {
    size_t mem_size;
-    void* mem_buffer;
+    void * mem_buffer;
    bool   mem_buffer_owned;
    bool   no_alloc;
    bool   no_alloc_save; // this is used to save the no_alloc state when using scratch buffers
@ -3234,7 +3235,6 @@ struct ggml_numa_nodes {
 //

 struct ggml_state {
-    struct ggml_context_container contexts[GGML_MAX_CONTEXTS];
    struct ggml_numa_nodes numa;
 };

@ -3816,17 +3816,12 @@ struct ggml_context * ggml_init(struct ggml_init_params params) {
            const uint64_t t_start = ggml_time_us(); UNUSED(t_start);

            g_state = (struct ggml_state) {
-                /*.contexts =*/ { { 0 } },
                /*.numa =*/ {
                    .n_nodes = 0,
                    .total_cpus = 0,
                },
            };

-            for (int i = 0; i < GGML_MAX_CONTEXTS; ++i) {
-                g_state.contexts[i].used = false;
-            }
-
            const uint64_t t_end = ggml_time_us(); UNUSED(t_end);

            GGML_PRINT_DEBUG("%s: g_state initialized in %f ms\n", __func__, (t_end - t_start)/1000.0f);
@ -3839,24 +3834,11 @@ struct ggml_context * ggml_init(struct ggml_init_params params) {
        is_first_call = false;
    }

-    // find non-used context in g_state
-    struct ggml_context * ctx = NULL;
-
-    for (int i = 0; i < GGML_MAX_CONTEXTS; i++) {
-        if (!g_state.contexts[i].used) {
-            g_state.contexts[i].used = true;
-            ctx = &g_state.contexts[i].context;
-
-            GGML_PRINT_DEBUG("%s: found unused context %d\n", __func__, i);
-            break;
-        }
-    }
+    ggml_critical_section_end();

+    struct ggml_context * ctx = GGML_ALIGNED_MALLOC(sizeof(struct ggml_context));
    if (ctx == NULL) {
-        GGML_LOG_ERROR("%s: ran out of contexts (max = %d)\n", __func__, GGML_MAX_CONTEXTS);
-
-        ggml_critical_section_end();
-
+        GGML_LOG_ERROR("%s: failed to allocate ggml_context\n", __func__);
        return NULL;
    }

@ -3886,42 +3868,31 @@ struct ggml_context * ggml_init(struct ggml_init_params params) {

    GGML_PRINT_DEBUG("%s: context initialized\n", __func__);

-    ggml_critical_section_end();
-
    return ctx;
 }

+void ggml_reset(struct ggml_context * ctx) {
+    if (ctx == NULL) {
+        return;
+    }
+
+    ctx->n_objects     = 0;
+    ctx->objects_begin = NULL;
+    ctx->objects_end   = NULL;
+    ctx->scratch       = (struct ggml_scratch) { 0, 0, NULL, };
+    ctx->scratch_save  = (struct ggml_scratch) { 0, 0, NULL, };
+}
+
 void ggml_free(struct ggml_context * ctx) {
    if (ctx == NULL) {
        return;
    }

-    // make this function thread safe
-    ggml_critical_section_start();
-
-    bool found = false;
-
-    for (int i = 0; i < GGML_MAX_CONTEXTS; i++) {
-        if (&g_state.contexts[i].context == ctx) {
-            g_state.contexts[i].used = false;
-
-            GGML_PRINT_DEBUG("%s: context %d has been freed. memory used = %zu\n",
-                    __func__, i, ggml_used_mem(ctx));
-
-            if (ctx->mem_buffer_owned) {
-                GGML_ALIGNED_FREE(ctx->mem_buffer);
-            }
-
-            found = true;
-            break;
-        }
+    if (ctx->mem_buffer_owned) {
+        GGML_ALIGNED_FREE(ctx->mem_buffer);
    }

-    if (!found) {
-        GGML_PRINT_DEBUG("%s: context not found\n", __func__);
-    }
-
-    ggml_critical_section_end();
+    GGML_ALIGNED_FREE(ctx);
 }

 size_t ggml_used_mem(const struct ggml_context * ctx) {