mirror of
https://github.com/ggerganov/whisper.cpp.git
synced 2025-06-15 05:18:07 +00:00
sync : ggml (#2001)
* sync : update scripts * sync : ggml * talk-llama : sync llama.cpp * make : WHISPER_CUBLAS -> WHISPER_CUDA * ci : try to fix sycl build * talk-llama : fix make build
This commit is contained in:
@ -377,6 +377,27 @@ typedef struct {
|
||||
} block_iq1_s;
|
||||
static_assert(sizeof(block_iq1_s) == sizeof(ggml_half) + QK_K/8 + QK_K/16, "wrong iq1_s block size/padding");
|
||||
|
||||
// 1.75 bpw
|
||||
typedef struct {
|
||||
uint8_t qs[QK_K/8]; // grid index, low 8 bits
|
||||
uint8_t qh[QK_K/16]; // grid index, high 3 bits + grid shift bit (for two groups of 8)
|
||||
#if QK_K == 64
|
||||
ggml_half d;
|
||||
#endif
|
||||
uint8_t scales[QK_K/32]; // 3-bit block scales (4-bit if QK_K == 64)
|
||||
} block_iq1_m;
|
||||
#if QK_K == 64
|
||||
static_assert(sizeof(block_iq1_m) == QK_K/8 + QK_K/16 + QK_K/32 + sizeof(ggml_half), "wrong iq1_m block size/padding");
|
||||
#else
|
||||
static_assert(sizeof(block_iq1_m) == QK_K/8 + QK_K/16 + QK_K/32, "wrong iq1_m block size/padding");
|
||||
#endif
|
||||
|
||||
// Used by IQ1_M quants
|
||||
typedef union {
|
||||
ggml_half f16;
|
||||
uint16_t u16;
|
||||
} iq1m_scale_t;
|
||||
|
||||
// Non-linear quants
|
||||
#define QK4_NL 32
|
||||
typedef struct {
|
||||
@ -1050,6 +1071,7 @@ GGML_TABLE_END()
|
||||
|
||||
#define NGRID_IQ1S 2048
|
||||
#define IQ1S_DELTA 0.125f
|
||||
#define IQ1M_DELTA 0.125f
|
||||
#if defined(GGML_COMMON_IMPL_C)
|
||||
GGML_TABLE_BEGIN(uint64_t, iq1s_grid, NGRID_IQ1S)
|
||||
0xffffffffffffffff, 0xffffffffffffff01, 0xffffffffffff0000, 0xffffffffffff01ff,
|
||||
|
Reference in New Issue
Block a user