whisper : adapt to latest ggml (skip) (#0)

This commit is contained in:
Georgi Gerganov
2024-10-05 13:14:03 +03:00
parent 0b1b094a67
commit 941912467d
13 changed files with 368 additions and 2585 deletions

View File

@ -7,7 +7,7 @@
#include <unordered_map>
#include <unordered_set>
const std::vector<std::pair<uint32_t, uint16_t>> unicode_ranges_flags = { // start, flags // last=next_start-1
const std::initializer_list<std::pair<uint32_t, uint16_t>> unicode_ranges_flags = { // start, flags // last=next_start-1
{0x000000, 0x0080},
{0x000020, 0x0008},
{0x000021, 0x0020},
@ -2311,7 +2311,8 @@ const std::unordered_set<uint32_t> unicode_set_whitespace = {
0x003000,
};
const std::unordered_map<uint32_t, uint32_t> unicode_map_lowercase = {
// list is always in ascending order, to enable binary searh
const std::initializer_list<std::pair<uint32_t, uint32_t>> unicode_map_lowercase = {
{0x000041, 0x000061},
{0x000042, 0x000062},
{0x000043, 0x000063},
@ -3747,7 +3748,8 @@ const std::unordered_map<uint32_t, uint32_t> unicode_map_lowercase = {
{0x01E921, 0x01E943},
};
const std::unordered_map<uint32_t, uint32_t> unicode_map_uppercase = {
// list is always in ascending order, to enable binary searh
const std::initializer_list<std::pair<uint32_t, uint32_t>> unicode_map_uppercase = {
{0x000061, 0x000041},
{0x000062, 0x000042},
{0x000063, 0x000043},
@ -5200,7 +5202,7 @@ const std::unordered_map<uint32_t, uint32_t> unicode_map_uppercase = {
{0x01E943, 0x01E921},
};
const std::vector<range_nfd> unicode_ranges_nfd = { // start, last, nfd
const std::initializer_list<range_nfd> unicode_ranges_nfd = { // start, last, nfd
{0x000000, 0x000000, 0x000000},
{0x0000C0, 0x0000C5, 0x000041},
{0x0000C7, 0x0000C7, 0x000043},