From 55bcd62a4be5df37429411e8065d5665122996c6 Mon Sep 17 00:00:00 2001 From: "Dr. Tom Murphy VII Ph.D" <499244+tom7@users.noreply.github.com> Date: Mon, 5 Feb 2024 06:13:57 -0500 Subject: [PATCH] ggml : avoid duplicating function calls using MIN/MAX macros (llama/5325) * Avoid duplicating function calls when using MIN/MAX macros. Since these copy "a" and "b" they ask the compiler to evaluate one of them twice. The compiler doesn't have a problem with removing the duplication in something like MAX(0, x + 2), but in some cases we're calling functions, and those calls just happen twice. By explicitly evaluating at the expression we get smaller and faster code without duplicate calls. See ggml_rope_yarn_corr_dims in Compiler Explorer: https://godbolt.org/z/Ee4KMrvKh Code behaves exactly the same. * Update ggml.c --------- Co-authored-by: Georgi Gerganov --- ggml.c | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/ggml.c b/ggml.c index 6dc9a525..d0e95bb6 100644 --- a/ggml.c +++ b/ggml.c @@ -2470,7 +2470,8 @@ size_t ggml_get_max_tensor_size(const struct ggml_context * ctx) { size_t max_size = 0; for (struct ggml_tensor * tensor = ggml_get_first_tensor(ctx); tensor != NULL; tensor = ggml_get_next_tensor(ctx, tensor)) { - max_size = MAX(max_size, ggml_nbytes(tensor)); + size_t bytes = ggml_nbytes(tensor); + max_size = MAX(max_size, bytes); } return max_size; @@ -11887,8 +11888,10 @@ GGML_CALL void ggml_rope_yarn_corr_dims( int n_dims, int n_orig_ctx, float freq_base, float beta_fast, float beta_slow, float dims[2] ) { // start and end correction dims - dims[0] = MAX(0, floorf(ggml_rope_yarn_corr_dim(n_dims, n_orig_ctx, beta_fast, freq_base))); - dims[1] = MIN(n_dims - 1, ceilf(ggml_rope_yarn_corr_dim(n_dims, n_orig_ctx, beta_slow, freq_base))); + float start = floorf(ggml_rope_yarn_corr_dim(n_dims, n_orig_ctx, beta_fast, freq_base)); + float end = ceilf(ggml_rope_yarn_corr_dim(n_dims, n_orig_ctx, beta_slow, freq_base)); + dims[0] = MAX(0, start); + dims[1] = MIN(n_dims - 1, end); } static void ggml_compute_forward_rope_f32(