ggml : add epsilon as a parameter for group_norm (llama/8818)

Signed-off-by: Molly Sophia <mollysophia379@gmail.com>
This commit is contained in:
Molly Sophia 2024-08-06 15:26:46 +08:00 committed by Georgi Gerganov
parent 7a96e661e4
commit 4160b930f1
5 changed files with 30 additions and 20 deletions

View File

@ -1140,16 +1140,17 @@ extern "C" {
// group normalize along ne0*ne1*n_groups // group normalize along ne0*ne1*n_groups
// used in stable-diffusion // used in stable-diffusion
// TODO: eps is hardcoded to 1e-6 for now
GGML_API struct ggml_tensor * ggml_group_norm( GGML_API struct ggml_tensor * ggml_group_norm(
struct ggml_context * ctx, struct ggml_context * ctx,
struct ggml_tensor * a, struct ggml_tensor * a,
int n_groups); int n_groups,
float eps);
GGML_API struct ggml_tensor * ggml_group_norm_inplace( GGML_API struct ggml_tensor * ggml_group_norm_inplace(
struct ggml_context * ctx, struct ggml_context * ctx,
struct ggml_tensor * a, struct ggml_tensor * a,
int n_groups); int n_groups,
float eps);
// a - x // a - x
// b - dy // b - dy

View File

@ -142,8 +142,7 @@ static void norm_f32_cuda(const float * x, float * dst, const int ncols, const i
} }
} }
static void group_norm_f32_cuda(const float * x, float * dst, const int num_groups, const int group_size, const int ne_elements, cudaStream_t stream) { static void group_norm_f32_cuda(const float * x, float * dst, const int num_groups, const float eps, const int group_size, const int ne_elements, cudaStream_t stream) {
static const float eps = 1e-6f;
if (group_size < 1024) { if (group_size < 1024) {
const dim3 block_dims(WARP_SIZE, 1, 1); const dim3 block_dims(WARP_SIZE, 1, 1);
group_norm_f32<WARP_SIZE><<<num_groups, block_dims, 0, stream>>>(x, dst, group_size, ne_elements, eps); group_norm_f32<WARP_SIZE><<<num_groups, block_dims, 0, stream>>>(x, dst, group_size, ne_elements, eps);
@ -196,8 +195,12 @@ void ggml_cuda_op_group_norm(ggml_backend_cuda_context & ctx, ggml_tensor * dst)
GGML_ASSERT( dst->type == GGML_TYPE_F32); GGML_ASSERT( dst->type == GGML_TYPE_F32);
int num_groups = dst->op_params[0]; int num_groups = dst->op_params[0];
float eps;
memcpy(&eps, dst->op_params + 1, sizeof(float));
int group_size = src0->ne[0] * src0->ne[1] * ((src0->ne[2] + num_groups - 1) / num_groups); int group_size = src0->ne[0] * src0->ne[1] * ((src0->ne[2] + num_groups - 1) / num_groups);
group_norm_f32_cuda(src0_d, dst_d, num_groups * src0->ne[3], group_size, ggml_nelements(src0), stream); group_norm_f32_cuda(src0_d, dst_d, num_groups * src0->ne[3], eps, group_size, ggml_nelements(src0), stream);
} }
void ggml_cuda_op_rms_norm(ggml_backend_cuda_context & ctx, ggml_tensor * dst) { void ggml_cuda_op_rms_norm(ggml_backend_cuda_context & ctx, ggml_tensor * dst) {

View File

@ -2236,10 +2236,8 @@ static enum ggml_status ggml_metal_graph_compute(
GGML_ASSERT(ne00 % 4 == 0); GGML_ASSERT(ne00 % 4 == 0);
GGML_ASSERT(ggml_is_contiguous(src0)); GGML_ASSERT(ggml_is_contiguous(src0));
//float eps; float eps;
//memcpy(&eps, dst->op_params, sizeof(float)); memcpy(&eps, dst->op_params + 1, sizeof(float));
const float eps = 1e-6f; // TODO: temporarily hardcoded
const int32_t n_groups = ((int32_t *) dst->op_params)[0]; const int32_t n_groups = ((int32_t *) dst->op_params)[0];

View File

@ -225,9 +225,8 @@ static void norm_f32_sycl(const float* x, float* dst, const int ncols,
} }
static void group_norm_f32_sycl(const float* x, float* dst, static void group_norm_f32_sycl(const float* x, float* dst,
const int num_groups, const int group_size, const int num_groups, const float eps, const int group_size,
const int ne_elements, queue_ptr stream, int device) { const int ne_elements, queue_ptr stream, int device) {
static const float eps = 1e-6f;
if (group_size < 1024) { if (group_size < 1024) {
const sycl::range<3> block_dims(1, 1, WARP_SIZE); const sycl::range<3> block_dims(1, 1, WARP_SIZE);
stream->submit([&](sycl::handler& cgh) { stream->submit([&](sycl::handler& cgh) {
@ -343,8 +342,12 @@ void ggml_sycl_op_group_norm(ggml_backend_sycl_context& ctx, const ggml_tensor*
GGML_ASSERT(dst->type == GGML_TYPE_F32); GGML_ASSERT(dst->type == GGML_TYPE_F32);
int num_groups = dst->op_params[0]; int num_groups = dst->op_params[0];
float eps;
memcpy(&eps, dst->op_params + 1, sizeof(float));
int group_size = src0->ne[0] * src0->ne[1] * ((src0->ne[2] + num_groups - 1) / num_groups); int group_size = src0->ne[0] * src0->ne[1] * ((src0->ne[2] + num_groups - 1) / num_groups);
group_norm_f32_sycl(src0_dd, dst_dd, num_groups, group_size, src0->ne[0] * src0->ne[1] * src0->ne[2], main_stream, ctx.device); group_norm_f32_sycl(src0_dd, dst_dd, num_groups, eps, group_size, src0->ne[0] * src0->ne[1] * src0->ne[2], main_stream, ctx.device);
(void)src1; (void)src1;
(void)dst; (void)dst;

View File

@ -5377,6 +5377,7 @@ static struct ggml_tensor * ggml_group_norm_impl(
struct ggml_context * ctx, struct ggml_context * ctx,
struct ggml_tensor * a, struct ggml_tensor * a,
int n_groups, int n_groups,
float eps,
bool inplace) { bool inplace) {
bool is_node = false; bool is_node = false;
@ -5387,7 +5388,8 @@ static struct ggml_tensor * ggml_group_norm_impl(
struct ggml_tensor * result = inplace ? ggml_view_tensor(ctx, a) : ggml_dup_tensor(ctx, a); struct ggml_tensor * result = inplace ? ggml_view_tensor(ctx, a) : ggml_dup_tensor(ctx, a);
result->op_params[0] = n_groups; ggml_set_op_params_i32(result, 0, n_groups);
ggml_set_op_params_f32(result, 1, eps);
result->op = GGML_OP_GROUP_NORM; result->op = GGML_OP_GROUP_NORM;
result->grad = is_node ? ggml_dup_tensor(ctx, result) : NULL; result->grad = is_node ? ggml_dup_tensor(ctx, result) : NULL;
@ -5399,15 +5401,17 @@ static struct ggml_tensor * ggml_group_norm_impl(
struct ggml_tensor * ggml_group_norm( struct ggml_tensor * ggml_group_norm(
struct ggml_context * ctx, struct ggml_context * ctx,
struct ggml_tensor * a, struct ggml_tensor * a,
int n_groups) { int n_groups,
return ggml_group_norm_impl(ctx, a, n_groups, false); float eps) {
return ggml_group_norm_impl(ctx, a, n_groups, eps, false);
} }
struct ggml_tensor * ggml_group_norm_inplace( struct ggml_tensor * ggml_group_norm_inplace(
struct ggml_context * ctx, struct ggml_context * ctx,
struct ggml_tensor * a, struct ggml_tensor * a,
int n_groups) { int n_groups,
return ggml_group_norm_impl(ctx, a, n_groups, true); float eps) {
return ggml_group_norm_impl(ctx, a, n_groups, eps, true);
} }
// ggml_mul_mat // ggml_mul_mat
@ -12098,10 +12102,11 @@ static void ggml_compute_forward_group_norm_f32(
GGML_TENSOR_UNARY_OP_LOCALS GGML_TENSOR_UNARY_OP_LOCALS
const float eps = 1e-6f; // TODO: make this a parameter
// TODO: optimize // TODO: optimize
float eps;
memcpy(&eps, dst->op_params + 1, sizeof(float));
int n_channels = src0->ne[2]; int n_channels = src0->ne[2];
int n_groups = dst->op_params[0]; int n_groups = dst->op_params[0];
int n_channels_per_group = (n_channels + n_groups - 1) / n_groups; int n_channels_per_group = (n_channels + n_groups - 1) / n_groups;