ggml : sync (ggml-alloc, GPU, eps, etc.) (#1220)

* ggml : sync (ggml-alloc, GPU, eps, etc.) * ggml : fix build * wasm : fix build
2025-06-14 12:58:10 +00:00 · 2023-09-05 13:54:40 +03:00
parent 6780c98e19
commit 59a3d0cb57
17 changed files with 11254 additions and 4095 deletions
--- a/examples/talk/gpt-2.cpp
+++ b/examples/talk/gpt-2.cpp
@ -379,6 +379,7 @@ bool gpt2_model_load(const std::string & fname, gpt2_model & model, gpt_vocab &
 //   - embd_inp:  the embeddings of the tokens in the context
 //   - embd_w:    the predicted logits for the next token
 //
+// TODO: sync latest version from ggml repo
 bool gpt2_eval(
        const gpt2_model & model,
        const int n_threads,
@ -420,7 +421,6 @@ bool gpt2_eval(

    struct ggml_context * ctx0 = ggml_init(params);
    struct ggml_cgraph gf = {};
-    gf.n_threads = n_threads;

    struct ggml_tensor * embd = ggml_new_tensor_1d(ctx0, GGML_TYPE_I32, N);
    memcpy(embd->data, embd_inp.data(), N*ggml_element_size(embd));
@ -442,7 +442,7 @@ bool gpt2_eval(
        // norm
        {
            // [ 768, N]
-            cur = ggml_norm(ctx0, inpL);
+            cur = ggml_norm(ctx0, inpL, 1e-5f);

            // cur = ln_1_g*cur + ln_1_b
            // [ 768, N]
@ -589,7 +589,7 @@ bool gpt2_eval(
        {
            // norm
            {
-                cur = ggml_norm(ctx0, inpFF);
+                cur = ggml_norm(ctx0, inpFF, 1e-5f);

                // cur = ln_2_g*cur + ln_2_b
                // [ 768, N]
@ -644,7 +644,7 @@ bool gpt2_eval(
    // norm
    {
        // [ 768, N]
-        inpL = ggml_norm(ctx0, inpL);
+        inpL = ggml_norm(ctx0, inpL, 1e-5f);

        // inpL = ln_f_g*inpL + ln_f_b
        // [ 768, N]
@ -664,8 +664,8 @@ bool gpt2_eval(
    //inpL = ggml_soft_max(ctx0, inpL);

    // run the computation
-    ggml_build_forward_expand(&gf, inpL);
-    ggml_graph_compute       (ctx0, &gf);
+    ggml_build_forward_expand  (&gf, inpL);
+    ggml_graph_compute_with_ctx(ctx0, &gf, n_threads);

    //if (n_past%100 == 0) {
    //    ggml_graph_print   (&gf);