whisper.cpp/examples/talk-llama/unicode-data.h

21 lines
630 B
C
Raw Normal View History

#pragma once
#include <cstdint>
#include <vector>
2024-06-16 10:10:54 +00:00
#include <unordered_map>
#include <unordered_set>
2024-06-16 10:10:54 +00:00
struct range_nfd {
uint32_t first;
uint32_t last;
uint32_t nfd;
};
static const uint32_t MAX_CODEPOINTS = 0x110000;
extern const std::initializer_list<std::pair<uint32_t, uint16_t>> unicode_ranges_flags;
2024-06-16 10:10:54 +00:00
extern const std::unordered_set<uint32_t> unicode_set_whitespace;
extern const std::initializer_list<std::pair<uint32_t, uint32_t>> unicode_map_lowercase;
extern const std::initializer_list<std::pair<uint32_t, uint32_t>> unicode_map_uppercase;
extern const std::initializer_list<range_nfd> unicode_ranges_nfd;