talk-llama : sync llama.cpp (#2709)

2025-06-13 04:28:07 +00:00 · 2025-01-13 08:55:48 +02:00
parent 45d3faf961
commit 35d0e02c72
36 changed files with 12960 additions and 11341 deletions
--- a/examples/talk-llama/llama-vocab.cpp
+++ b/examples/talk-llama/llama-vocab.cpp
@ -1,5 +1,7 @@
 #include "llama-vocab.h"

+#include "llama-impl.h"
+
 #include "unicode.h"

 #include <algorithm>
@ -16,22 +18,6 @@
 // helpers
 //

-LLAMA_ATTRIBUTE_FORMAT(1, 2)
-static std::string format(const char * fmt, ...) {
-    va_list ap;
-    va_list ap2;
-    va_start(ap, fmt);
-    va_copy(ap2, ap);
-    int size = vsnprintf(NULL, 0, fmt, ap);
-    GGML_ASSERT(size >= 0 && size < INT_MAX); // NOLINT
-    std::vector<char> buf(size + 1);
-    int size2 = vsnprintf(buf.data(), size + 1, fmt, ap2);
-    GGML_ASSERT(size2 == size);
-    va_end(ap2);
-    va_end(ap);
-    return std::string(buf.data(), size);
-}
-
 struct naive_trie {
    naive_trie() : has_value(false), value(0) {
    }
@ -396,6 +382,13 @@ struct llm_tokenizer_bpe : llm_tokenizer {
                    "\\p{N}+",
                };
                break;
+            case LLAMA_VOCAB_PRE_TYPE_DEEPSEEK3_LLM:
+                regex_exprs = {
+                    "\\p{N}{1,3}",
+                    "[一-龥぀-ゟ゠-ヿ]+",
+                    "[!\"#$%&'()*+,\\-./:;<=>?@\\[\\\\\\]^_`{|}~][A-Za-z]+|[^\r\n\\p{L}\\p{P}\\p{S}]?[\\p{L}\\p{M}]+| ?[\\p{P}\\p{S}]+[\r\n]*|\\s*[\r\n]+|\\s+(?!\\S)|\\s+",
+                };
+                break;
            case LLAMA_VOCAB_PRE_TYPE_DEEPSEEK_CODER:
                regex_exprs = {
                    "[\r\n]",
@ -504,7 +497,7 @@ struct llm_tokenizer_bpe_session {

    bool append_bos(std::vector<llama_vocab::id> & output) const {
        if (vocab.tokenizer_add_bos) {
-            GGML_ASSERT(vocab.special_bos_id != -1);
+            GGML_ASSERT(vocab.special_bos_id != LLAMA_TOKEN_NULL);
            output.push_back(vocab.special_bos_id);
            return true;
        }
@ -513,7 +506,7 @@ struct llm_tokenizer_bpe_session {

    bool append_eos(std::vector<llama_vocab::id> & output) const {
        if (vocab.tokenizer_add_eos) {
-            GGML_ASSERT(vocab.special_eos_id != -1);
+            GGML_ASSERT(vocab.special_eos_id != LLAMA_TOKEN_NULL);
            output.push_back(vocab.special_eos_id);
            return true;
        }
@ -1410,7 +1403,7 @@ static void tokenizer_st_partition(const llama_vocab & vocab, std::forward_list<
                        if (source == 0) {
                            buffer.erase_after(buffer.before_begin());
                        } else {
-                            buffer.erase_after(std::next(buffer.begin(), (source-1)));
+                            buffer.erase_after(std::next(buffer.begin(), (source - 1)));
                        }

                        // repeat for the right side
@ -1424,7 +1417,7 @@ static void tokenizer_st_partition(const llama_vocab & vocab, std::forward_list<
                        if (source == 0) {
                            buffer.erase_after(buffer.before_begin());
                        } else {
-                            buffer.erase_after(std::next(buffer.begin(), (source-1)));
+                            buffer.erase_after(std::next(buffer.begin(), (source - 1)));
                        }
                        break;
                    }
@ -1461,7 +1454,7 @@ std::vector<llama_vocab::id> llama_tokenize_internal(
                bool is_prev_special = true;  // prefix with space if first token

                if (add_special && vocab.tokenizer_add_bos) {
-                    GGML_ASSERT(vocab.special_bos_id != -1);
+                    GGML_ASSERT(vocab.special_bos_id != LLAMA_TOKEN_NULL);
                    output.push_back(vocab.special_bos_id);
                    is_prev_special = true;
                }
@ -1496,7 +1489,7 @@ std::vector<llama_vocab::id> llama_tokenize_internal(
                }

                if (add_special && vocab.tokenizer_add_eos) {
-                    GGML_ASSERT(vocab.special_eos_id != -1);
+                    GGML_ASSERT(vocab.special_eos_id != LLAMA_TOKEN_NULL);
                    output.push_back(vocab.special_eos_id);
                }
            } break;
@ -1529,7 +1522,7 @@ std::vector<llama_vocab::id> llama_tokenize_internal(
        case LLAMA_VOCAB_TYPE_WPM:
            {
                if (add_special) {
-                    GGML_ASSERT(vocab.special_cls_id != -1);
+                    GGML_ASSERT(vocab.special_cls_id != LLAMA_TOKEN_NULL);
                    output.push_back(vocab.special_cls_id);
                }

@ -1549,14 +1542,14 @@ std::vector<llama_vocab::id> llama_tokenize_internal(
                }

                if (add_special) {
-                    GGML_ASSERT(vocab.special_sep_id != -1);
+                    GGML_ASSERT(vocab.special_sep_id != LLAMA_TOKEN_NULL);
                    output.push_back(vocab.special_sep_id);
                }
            } break;
        case LLAMA_VOCAB_TYPE_UGM:
            {
                if (add_special && vocab.tokenizer_add_bos) {
-                    GGML_ASSERT(vocab.special_bos_id != -1);
+                    GGML_ASSERT(vocab.special_bos_id != LLAMA_TOKEN_NULL);
                    output.push_back(vocab.special_bos_id);
                }
                llm_tokenizer_ugm_session session(vocab);
@ -1581,7 +1574,7 @@ std::vector<llama_vocab::id> llama_tokenize_internal(
                }

                if (add_special && vocab.tokenizer_add_eos) {
-                    GGML_ASSERT(vocab.special_eos_id != -1);
+                    GGML_ASSERT(vocab.special_eos_id != LLAMA_TOKEN_NULL);
                    output.push_back(vocab.special_eos_id);
                }
            } break;
@ -1649,7 +1642,7 @@ llama_token_attr llama_token_get_attr_impl(const struct llama_vocab & vocab, lla
 }

 bool llama_token_is_eog_impl(const struct llama_vocab & vocab, llama_token token) {
-    return token != -1 && vocab.special_eog_ids.count(token) > 0;
+    return token != LLAMA_TOKEN_NULL && vocab.special_eog_ids.count(token) > 0;
 }

 bool llama_token_is_control_impl(const struct llama_vocab & vocab, llama_token token) {
@ -1657,7 +1650,7 @@ bool llama_token_is_control_impl(const struct llama_vocab & vocab, llama_token t
 }

 llama_token llama_token_bos_impl(const struct llama_vocab & vocab) {
-    return vocab.special_bos_id;
+    return vocab.type != LLAMA_VOCAB_TYPE_WPM ? vocab.special_bos_id : vocab.special_cls_id;
 }

 llama_token llama_token_eos_impl(const struct llama_vocab & vocab) {
@ -1867,6 +1860,10 @@ int32_t llama_detokenize_impl(
                         int32_t   text_len_max,
                            bool   remove_special,
                            bool   unparse_special) {
+    if (vocab.type == LLAMA_VOCAB_TYPE_NONE) {
+        return 0;
+    }
+
    GGML_ASSERT(vocab.tokenizer && "Tokenizer not initialized. Call llama_vocab::init_tokenizer() first.");

    int32_t avail = text_len_max;
@ -1884,7 +1881,7 @@ int32_t llama_detokenize_impl(
    }

    if (remove_special && vocab.tokenizer_add_eos) {
-        if (n_tokens > 0 && tokens[n_tokens-1] == vocab.special_eos_id) {
+        if (n_tokens > 0 && tokens[n_tokens - 1] == vocab.special_eos_id) {
            n_tokens--;
        }
    }