From 4160b930f1f15749d442c70a477714e308d1bbd3 Mon Sep 17 00:00:00 2001 From: Molly Sophia Date: Tue, 6 Aug 2024 15:26:46 +0800 Subject: [PATCH] ggml : add epsilon as a parameter for group_norm (llama/8818) Signed-off-by: Molly Sophia --- ggml/include/ggml.h | 7 ++++--- ggml/src/ggml-cuda/norm.cu | 9 ++++++--- ggml/src/ggml-metal.m | 6 ++---- ggml/src/ggml-sycl/norm.cpp | 9 ++++++--- ggml/src/ggml.c | 19 ++++++++++++------- 5 files changed, 30 insertions(+), 20 deletions(-) diff --git a/ggml/include/ggml.h b/ggml/include/ggml.h index a9e88e59..15602a96 100644 --- a/ggml/include/ggml.h +++ b/ggml/include/ggml.h @@ -1140,16 +1140,17 @@ extern "C" { // group normalize along ne0*ne1*n_groups // used in stable-diffusion - // TODO: eps is hardcoded to 1e-6 for now GGML_API struct ggml_tensor * ggml_group_norm( struct ggml_context * ctx, struct ggml_tensor * a, - int n_groups); + int n_groups, + float eps); GGML_API struct ggml_tensor * ggml_group_norm_inplace( struct ggml_context * ctx, struct ggml_tensor * a, - int n_groups); + int n_groups, + float eps); // a - x // b - dy diff --git a/ggml/src/ggml-cuda/norm.cu b/ggml/src/ggml-cuda/norm.cu index 30866d51..133e219f 100644 --- a/ggml/src/ggml-cuda/norm.cu +++ b/ggml/src/ggml-cuda/norm.cu @@ -142,8 +142,7 @@ static void norm_f32_cuda(const float * x, float * dst, const int ncols, const i } } -static void group_norm_f32_cuda(const float * x, float * dst, const int num_groups, const int group_size, const int ne_elements, cudaStream_t stream) { - static const float eps = 1e-6f; +static void group_norm_f32_cuda(const float * x, float * dst, const int num_groups, const float eps, const int group_size, const int ne_elements, cudaStream_t stream) { if (group_size < 1024) { const dim3 block_dims(WARP_SIZE, 1, 1); group_norm_f32<<>>(x, dst, group_size, ne_elements, eps); @@ -196,8 +195,12 @@ void ggml_cuda_op_group_norm(ggml_backend_cuda_context & ctx, ggml_tensor * dst) GGML_ASSERT( dst->type == GGML_TYPE_F32); int num_groups = dst->op_params[0]; + + float eps; + memcpy(&eps, dst->op_params + 1, sizeof(float)); + int group_size = src0->ne[0] * src0->ne[1] * ((src0->ne[2] + num_groups - 1) / num_groups); - group_norm_f32_cuda(src0_d, dst_d, num_groups * src0->ne[3], group_size, ggml_nelements(src0), stream); + group_norm_f32_cuda(src0_d, dst_d, num_groups * src0->ne[3], eps, group_size, ggml_nelements(src0), stream); } void ggml_cuda_op_rms_norm(ggml_backend_cuda_context & ctx, ggml_tensor * dst) { diff --git a/ggml/src/ggml-metal.m b/ggml/src/ggml-metal.m index 3396f291..9fc08ab3 100644 --- a/ggml/src/ggml-metal.m +++ b/ggml/src/ggml-metal.m @@ -2236,10 +2236,8 @@ static enum ggml_status ggml_metal_graph_compute( GGML_ASSERT(ne00 % 4 == 0); GGML_ASSERT(ggml_is_contiguous(src0)); - //float eps; - //memcpy(&eps, dst->op_params, sizeof(float)); - - const float eps = 1e-6f; // TODO: temporarily hardcoded + float eps; + memcpy(&eps, dst->op_params + 1, sizeof(float)); const int32_t n_groups = ((int32_t *) dst->op_params)[0]; diff --git a/ggml/src/ggml-sycl/norm.cpp b/ggml/src/ggml-sycl/norm.cpp index cccf87d0..b3159b9d 100644 --- a/ggml/src/ggml-sycl/norm.cpp +++ b/ggml/src/ggml-sycl/norm.cpp @@ -225,9 +225,8 @@ static void norm_f32_sycl(const float* x, float* dst, const int ncols, } static void group_norm_f32_sycl(const float* x, float* dst, - const int num_groups, const int group_size, + const int num_groups, const float eps, const int group_size, const int ne_elements, queue_ptr stream, int device) { - static const float eps = 1e-6f; if (group_size < 1024) { const sycl::range<3> block_dims(1, 1, WARP_SIZE); stream->submit([&](sycl::handler& cgh) { @@ -343,8 +342,12 @@ void ggml_sycl_op_group_norm(ggml_backend_sycl_context& ctx, const ggml_tensor* GGML_ASSERT(dst->type == GGML_TYPE_F32); int num_groups = dst->op_params[0]; + + float eps; + memcpy(&eps, dst->op_params + 1, sizeof(float)); + int group_size = src0->ne[0] * src0->ne[1] * ((src0->ne[2] + num_groups - 1) / num_groups); - group_norm_f32_sycl(src0_dd, dst_dd, num_groups, group_size, src0->ne[0] * src0->ne[1] * src0->ne[2], main_stream, ctx.device); + group_norm_f32_sycl(src0_dd, dst_dd, num_groups, eps, group_size, src0->ne[0] * src0->ne[1] * src0->ne[2], main_stream, ctx.device); (void)src1; (void)dst; diff --git a/ggml/src/ggml.c b/ggml/src/ggml.c index 2f96e76f..c937b5e5 100644 --- a/ggml/src/ggml.c +++ b/ggml/src/ggml.c @@ -5377,6 +5377,7 @@ static struct ggml_tensor * ggml_group_norm_impl( struct ggml_context * ctx, struct ggml_tensor * a, int n_groups, + float eps, bool inplace) { bool is_node = false; @@ -5387,7 +5388,8 @@ static struct ggml_tensor * ggml_group_norm_impl( struct ggml_tensor * result = inplace ? ggml_view_tensor(ctx, a) : ggml_dup_tensor(ctx, a); - result->op_params[0] = n_groups; + ggml_set_op_params_i32(result, 0, n_groups); + ggml_set_op_params_f32(result, 1, eps); result->op = GGML_OP_GROUP_NORM; result->grad = is_node ? ggml_dup_tensor(ctx, result) : NULL; @@ -5399,15 +5401,17 @@ static struct ggml_tensor * ggml_group_norm_impl( struct ggml_tensor * ggml_group_norm( struct ggml_context * ctx, struct ggml_tensor * a, - int n_groups) { - return ggml_group_norm_impl(ctx, a, n_groups, false); + int n_groups, + float eps) { + return ggml_group_norm_impl(ctx, a, n_groups, eps, false); } struct ggml_tensor * ggml_group_norm_inplace( struct ggml_context * ctx, struct ggml_tensor * a, - int n_groups) { - return ggml_group_norm_impl(ctx, a, n_groups, true); + int n_groups, + float eps) { + return ggml_group_norm_impl(ctx, a, n_groups, eps, true); } // ggml_mul_mat @@ -12098,10 +12102,11 @@ static void ggml_compute_forward_group_norm_f32( GGML_TENSOR_UNARY_OP_LOCALS - const float eps = 1e-6f; // TODO: make this a parameter - // TODO: optimize + float eps; + memcpy(&eps, dst->op_params + 1, sizeof(float)); + int n_channels = src0->ne[2]; int n_groups = dst->op_params[0]; int n_channels_per_group = (n_channels + n_groups - 1) / n_groups;