talk-llama : sync llama.cpp

2025-06-12 20:18:08 +00:00 · 2024-05-12 20:12:46 +03:00
parent fe179ae0cc
commit 3fa7d29876
7 changed files with 4613 additions and 1485 deletions
--- a/examples/talk-llama/talk-llama.cpp
+++ b/examples/talk-llama/talk-llama.cpp
@ -35,10 +35,10 @@ std::vector<llama_token> llama_tokenize(struct llama_context * ctx, const std::s

 std::string llama_token_to_piece(const struct llama_context * ctx, llama_token token) {
    std::vector<char> result(8, 0);
-    const int n_tokens = llama_token_to_piece(llama_get_model(ctx), token, result.data(), result.size());
+    const int n_tokens = llama_token_to_piece(llama_get_model(ctx), token, result.data(), result.size(), false);
    if (n_tokens < 0) {
        result.resize(-n_tokens);
-        int check = llama_token_to_piece(llama_get_model(ctx), token, result.data(), result.size());
+        int check = llama_token_to_piece(llama_get_model(ctx), token, result.data(), result.size(), false);
        GGML_ASSERT(check == -n_tokens);
    } else {
        result.resize(n_tokens);