mirror of
https://github.com/ggerganov/whisper.cpp.git
synced 2025-06-16 22:08:07 +00:00
ggml : add epsilon as a parameter for group_norm (llama/8818)
Signed-off-by: Molly Sophia <mollysophia379@gmail.com>
This commit is contained in:
committed by
Georgi Gerganov
parent
7a96e661e4
commit
4160b930f1
@ -5377,6 +5377,7 @@ static struct ggml_tensor * ggml_group_norm_impl(
|
||||
struct ggml_context * ctx,
|
||||
struct ggml_tensor * a,
|
||||
int n_groups,
|
||||
float eps,
|
||||
bool inplace) {
|
||||
|
||||
bool is_node = false;
|
||||
@ -5387,7 +5388,8 @@ static struct ggml_tensor * ggml_group_norm_impl(
|
||||
|
||||
struct ggml_tensor * result = inplace ? ggml_view_tensor(ctx, a) : ggml_dup_tensor(ctx, a);
|
||||
|
||||
result->op_params[0] = n_groups;
|
||||
ggml_set_op_params_i32(result, 0, n_groups);
|
||||
ggml_set_op_params_f32(result, 1, eps);
|
||||
|
||||
result->op = GGML_OP_GROUP_NORM;
|
||||
result->grad = is_node ? ggml_dup_tensor(ctx, result) : NULL;
|
||||
@ -5399,15 +5401,17 @@ static struct ggml_tensor * ggml_group_norm_impl(
|
||||
struct ggml_tensor * ggml_group_norm(
|
||||
struct ggml_context * ctx,
|
||||
struct ggml_tensor * a,
|
||||
int n_groups) {
|
||||
return ggml_group_norm_impl(ctx, a, n_groups, false);
|
||||
int n_groups,
|
||||
float eps) {
|
||||
return ggml_group_norm_impl(ctx, a, n_groups, eps, false);
|
||||
}
|
||||
|
||||
struct ggml_tensor * ggml_group_norm_inplace(
|
||||
struct ggml_context * ctx,
|
||||
struct ggml_tensor * a,
|
||||
int n_groups) {
|
||||
return ggml_group_norm_impl(ctx, a, n_groups, true);
|
||||
int n_groups,
|
||||
float eps) {
|
||||
return ggml_group_norm_impl(ctx, a, n_groups, eps, true);
|
||||
}
|
||||
|
||||
// ggml_mul_mat
|
||||
@ -12098,10 +12102,11 @@ static void ggml_compute_forward_group_norm_f32(
|
||||
|
||||
GGML_TENSOR_UNARY_OP_LOCALS
|
||||
|
||||
const float eps = 1e-6f; // TODO: make this a parameter
|
||||
|
||||
// TODO: optimize
|
||||
|
||||
float eps;
|
||||
memcpy(&eps, dst->op_params + 1, sizeof(float));
|
||||
|
||||
int n_channels = src0->ne[2];
|
||||
int n_groups = dst->op_params[0];
|
||||
int n_channels_per_group = (n_channels + n_groups - 1) / n_groups;
|
||||
|
Reference in New Issue
Block a user