tests: add gradient tests for all backends (ggml/932)

* tests: add gradient checking to test-backend-ops

* remove old comment

* reorder includes

* adjust SIN/COS parameters

* add documentation, use supports_op if possible
This commit is contained in:
Johannes Gäßler
2024-09-03 17:21:46 +02:00
committed by Georgi Gerganov
parent 34972dbe22
commit 5d6dc19f04
9 changed files with 118 additions and 24 deletions

View File

@ -5131,6 +5131,7 @@ struct ggml_tensor * ggml_concat(
bool is_node = false;
if (a->grad || b->grad) {
GGML_ABORT("fatal error"); // TODO: implement
is_node = true;
}
@ -5252,6 +5253,7 @@ struct ggml_tensor * ggml_leaky_relu(
bool is_node = false;
if (!inplace && (a->grad)) {
GGML_ABORT("fatal error"); // TODO: not implemented
is_node = true;
}
@ -5677,6 +5679,7 @@ static struct ggml_tensor * ggml_set_impl(
// make a view of the destination
struct ggml_tensor * result = inplace ? ggml_view_tensor(ctx, a) : ggml_dup_tensor(ctx, a);
GGML_ASSERT(offset < (size_t)(1 << 30));
int32_t params[] = { nb1, nb2, nb3, offset, inplace ? 1 : 0 };
ggml_set_op_params(result, params, sizeof(params));
@ -6634,14 +6637,12 @@ struct ggml_tensor * ggml_rope_back(
GGML_ASSERT(ggml_is_vector(b));
GGML_ASSERT(b->type == GGML_TYPE_I32);
GGML_ASSERT(a->ne[2] == b->ne[0]);
GGML_ASSERT(c == NULL && "freq factors not implemented yet");
GGML_ASSERT((mode & 4) == 0 && "ggml_rope_back() for ChatGLM not implemented yet");
bool is_node = false;
if (a->grad) {
is_node = false; // TODO: implement backward
GGML_ASSERT(false && "backwards pass not implemented");
is_node = false;
}
struct ggml_tensor * result = ggml_dup_tensor(ctx, a);
@ -6659,6 +6660,7 @@ struct ggml_tensor * ggml_rope_back(
result->grad = is_node ? ggml_dup_tensor(ctx, result) : NULL;
result->src[0] = a;
result->src[1] = b;
result->src[2] = c;
return result;
}
@ -7212,6 +7214,11 @@ struct ggml_tensor * ggml_argsort(
enum ggml_sort_order order) {
bool is_node = false;
if (a->grad) {
GGML_ABORT("fatal error"); // TODO: not implemented
is_node = true;
}
struct ggml_tensor * result = ggml_new_tensor(ctx, GGML_TYPE_I32, GGML_MAX_DIMS, a->ne);
ggml_set_op_params_i32(result, 0, (int32_t) order);
@ -10745,9 +10752,6 @@ static void ggml_compute_forward_sum_f32(
return;
}
assert(ggml_is_scalar(dst));
assert(ggml_is_scalar(dst));
assert(src0->nb[0] == sizeof(float));
@ -18000,14 +18004,10 @@ static void ggml_compute_backward(struct ggml_context * ctx, struct ggml_tensor
if (src0->grad || src1->grad) {
GGML_ASSERT(src0->type == tensor->type);
GGML_ASSERT(tensor->grad->type == tensor->type);
GGML_ASSERT(tensor->grad->type == src1->grad->type);
GGML_ASSERT(!src1->grad || src1->grad->type == tensor->grad->type);
tensor_grad_view = ggml_view_4d(ctx,
tensor->grad,
src1->grad->ne[0],
src1->grad->ne[1],
src1->grad->ne[2],
src1->grad->ne[3],
tensor->grad, src1->ne[0], src1->ne[1], src1->ne[2], src1->ne[3],
nb1, nb2, nb3, offset);
}
@ -18076,9 +18076,9 @@ static void ggml_compute_backward(struct ggml_context * ctx, struct ggml_tensor
memcpy(&offset, tensor->op_params, sizeof(offset));
size_t nb1 = tensor->nb[1];
size_t nb2 = tensor->nb[2];
size_t nb3 = tensor->nb[3];
size_t nb1 = tensor->nb[1];
size_t nb2 = tensor->nb[2];
size_t nb3 = tensor->nb[3];
if (src0->type != src0->grad->type) {
// gradient is typically F32, but src0 could be other type