From fc7b0e2c289658cc28880290e4138a2bbfcffd7b Mon Sep 17 00:00:00 2001 From: slaren Date: Wed, 31 Jan 2024 13:43:03 +0100 Subject: [PATCH] ggml : limit n_threads to the max n_tasks (llama/5238) --- ggml.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/ggml.c b/ggml.c index f6e797d7..1286ea8e 100644 --- a/ggml.c +++ b/ggml.c @@ -16985,12 +16985,16 @@ struct ggml_cplan ggml_graph_plan(const struct ggml_cgraph * cgraph, int n_threa struct ggml_cplan cplan; memset(&cplan, 0, sizeof(struct ggml_cplan)); + int max_tasks = 1; + // thread scheduling for the different operations + work buffer size estimation for (int i = 0; i < cgraph->n_nodes; i++) { struct ggml_tensor * node = cgraph->nodes[i]; const int n_tasks = ggml_get_n_tasks(node, n_threads); + max_tasks = MAX(max_tasks, n_tasks); + size_t cur = 0; switch (node->op) { @@ -17157,7 +17161,7 @@ struct ggml_cplan ggml_graph_plan(const struct ggml_cgraph * cgraph, int n_threa work_size += CACHE_LINE_SIZE*(n_threads - 1); } - cplan.n_threads = n_threads; + cplan.n_threads = MIN(max_tasks, n_threads); cplan.work_size = work_size; cplan.work_data = NULL;