GGUF: C++ refactor, backend support, misc fixes (llama/11030)

* GGUF: C++ refactor, backend support, misc fixes

remove ggml_tensor.backend

update CODEOWNERS [no ci]

remove gguf_get_data from API

revise GGUF API data types
This commit is contained in:
Johannes Gäßler 2025-01-07 18:01:58 +01:00 committed by Georgi Gerganov
parent 09fabffdf5
commit acdbe58631
6 changed files with 17 additions and 1434 deletions

View File

@ -243,7 +243,8 @@ set(GGML_PUBLIC_HEADERS
include/ggml-metal.h
include/ggml-rpc.h
include/ggml-sycl.h
include/ggml-vulkan.h)
include/ggml-vulkan.h
include/gguf.h)
set_target_properties(ggml PROPERTIES PUBLIC_HEADER "${GGML_PUBLIC_HEADERS}")
#if (GGML_METAL)

View File

@ -7,6 +7,7 @@
#include "ggml.h"
#include "ggml-alloc.h"
#include "ggml-backend.h"
#include "gguf.h"
#include <memory>
// Smart pointers for ggml types

View File

@ -241,12 +241,6 @@
#define GGML_ROPE_TYPE_MROPE 8
#define GGML_ROPE_TYPE_VISION 24
#define GGUF_MAGIC "GGUF"
#define GGUF_VERSION 3
#define GGUF_DEFAULT_ALIGNMENT 32
#define GGML_UNUSED(x) (void)(x)
#define GGML_PAD(x, n) (((x) + (n) - 1) & ~((n) - 1))
@ -403,12 +397,6 @@ extern "C" {
GGML_PREC_F32,
};
enum ggml_backend_type {
GGML_BACKEND_TYPE_CPU = 0,
GGML_BACKEND_TYPE_GPU = 10,
GGML_BACKEND_TYPE_GPU_SPLIT = 20,
};
// model file types
enum ggml_ftype {
GGML_FTYPE_UNKNOWN = -1,
@ -587,8 +575,6 @@ extern "C" {
struct ggml_tensor {
enum ggml_type type;
GGML_DEPRECATED(enum ggml_backend_type backend, "use the buffer type to find the storage location of the tensor");
struct ggml_backend_buffer * buffer;
int64_t ne[GGML_MAX_DIMS]; // number of elements
@ -2111,132 +2097,6 @@ extern "C" {
int64_t n_per_row,
const float * imatrix);
//
// gguf
//
enum gguf_type {
GGUF_TYPE_UINT8 = 0,
GGUF_TYPE_INT8 = 1,
GGUF_TYPE_UINT16 = 2,
GGUF_TYPE_INT16 = 3,
GGUF_TYPE_UINT32 = 4,
GGUF_TYPE_INT32 = 5,
GGUF_TYPE_FLOAT32 = 6,
GGUF_TYPE_BOOL = 7,
GGUF_TYPE_STRING = 8,
GGUF_TYPE_ARRAY = 9,
GGUF_TYPE_UINT64 = 10,
GGUF_TYPE_INT64 = 11,
GGUF_TYPE_FLOAT64 = 12,
GGUF_TYPE_COUNT, // marks the end of the enum
};
struct gguf_context;
struct gguf_init_params {
bool no_alloc;
// if not NULL, create a ggml_context and allocate the tensor data in it
struct ggml_context ** ctx;
};
GGML_API struct gguf_context * gguf_init_empty(void);
GGML_API struct gguf_context * gguf_init_from_file(const char * fname, struct gguf_init_params params);
//GGML_API struct gguf_context * gguf_init_from_buffer(..);
GGML_API void gguf_free(struct gguf_context * ctx);
GGML_API const char * gguf_type_name(enum gguf_type type);
GGML_API int gguf_get_version (const struct gguf_context * ctx);
GGML_API size_t gguf_get_alignment (const struct gguf_context * ctx);
GGML_API size_t gguf_get_data_offset(const struct gguf_context * ctx);
GGML_API void * gguf_get_data (const struct gguf_context * ctx);
GGML_API int gguf_get_n_kv(const struct gguf_context * ctx);
GGML_API int gguf_find_key(const struct gguf_context * ctx, const char * key);
GGML_API const char * gguf_get_key (const struct gguf_context * ctx, int key_id);
GGML_API enum gguf_type gguf_get_kv_type (const struct gguf_context * ctx, int key_id);
GGML_API enum gguf_type gguf_get_arr_type(const struct gguf_context * ctx, int key_id);
// will abort if the wrong type is used for the key
GGML_API uint8_t gguf_get_val_u8 (const struct gguf_context * ctx, int key_id);
GGML_API int8_t gguf_get_val_i8 (const struct gguf_context * ctx, int key_id);
GGML_API uint16_t gguf_get_val_u16 (const struct gguf_context * ctx, int key_id);
GGML_API int16_t gguf_get_val_i16 (const struct gguf_context * ctx, int key_id);
GGML_API uint32_t gguf_get_val_u32 (const struct gguf_context * ctx, int key_id);
GGML_API int32_t gguf_get_val_i32 (const struct gguf_context * ctx, int key_id);
GGML_API float gguf_get_val_f32 (const struct gguf_context * ctx, int key_id);
GGML_API uint64_t gguf_get_val_u64 (const struct gguf_context * ctx, int key_id);
GGML_API int64_t gguf_get_val_i64 (const struct gguf_context * ctx, int key_id);
GGML_API double gguf_get_val_f64 (const struct gguf_context * ctx, int key_id);
GGML_API bool gguf_get_val_bool(const struct gguf_context * ctx, int key_id);
GGML_API const char * gguf_get_val_str (const struct gguf_context * ctx, int key_id);
GGML_API const void * gguf_get_val_data(const struct gguf_context * ctx, int key_id);
GGML_API int gguf_get_arr_n (const struct gguf_context * ctx, int key_id);
GGML_API const void * gguf_get_arr_data(const struct gguf_context * ctx, int key_id);
GGML_API const char * gguf_get_arr_str (const struct gguf_context * ctx, int key_id, int i);
GGML_API int gguf_get_n_tensors (const struct gguf_context * ctx);
GGML_API int gguf_find_tensor (const struct gguf_context * ctx, const char * name);
GGML_API size_t gguf_get_tensor_offset(const struct gguf_context * ctx, int i);
GGML_API char * gguf_get_tensor_name (const struct gguf_context * ctx, int i);
GGML_API enum ggml_type gguf_get_tensor_type (const struct gguf_context * ctx, int i);
// removes key if it exists
GGML_API void gguf_remove_key(struct gguf_context * ctx, const char * key);
// overrides existing values or adds a new one
GGML_API void gguf_set_val_u8 (struct gguf_context * ctx, const char * key, uint8_t val);
GGML_API void gguf_set_val_i8 (struct gguf_context * ctx, const char * key, int8_t val);
GGML_API void gguf_set_val_u16 (struct gguf_context * ctx, const char * key, uint16_t val);
GGML_API void gguf_set_val_i16 (struct gguf_context * ctx, const char * key, int16_t val);
GGML_API void gguf_set_val_u32 (struct gguf_context * ctx, const char * key, uint32_t val);
GGML_API void gguf_set_val_i32 (struct gguf_context * ctx, const char * key, int32_t val);
GGML_API void gguf_set_val_f32 (struct gguf_context * ctx, const char * key, float val);
GGML_API void gguf_set_val_u64 (struct gguf_context * ctx, const char * key, uint64_t val);
GGML_API void gguf_set_val_i64 (struct gguf_context * ctx, const char * key, int64_t val);
GGML_API void gguf_set_val_f64 (struct gguf_context * ctx, const char * key, double val);
GGML_API void gguf_set_val_bool(struct gguf_context * ctx, const char * key, bool val);
GGML_API void gguf_set_val_str (struct gguf_context * ctx, const char * key, const char * val);
GGML_API void gguf_set_arr_data(struct gguf_context * ctx, const char * key, enum gguf_type type, const void * data, int n);
GGML_API void gguf_set_arr_str (struct gguf_context * ctx, const char * key, const char ** data, int n);
// set or add KV pairs from another context
GGML_API void gguf_set_kv(struct gguf_context * ctx, struct gguf_context * src);
// manage tensor info
GGML_API void gguf_add_tensor(struct gguf_context * ctx, const struct ggml_tensor * tensor);
GGML_API void gguf_set_tensor_type(struct gguf_context * ctx, const char * name, enum ggml_type type);
GGML_API void gguf_set_tensor_data(struct gguf_context * ctx, const char * name, const void * data, size_t size);
// writing gguf files can be done in 2 ways:
//
// - write the entire gguf_context to a binary file in a single pass:
//
// gguf_write_to_file(ctx, fname);
//
// - first prepare a file with a placeholder for the meta data, write the tensor data, then write the meta data:
//
// FILE * f = fopen(fname, "wb");
// fseek(f, gguf_get_meta_size(ctx), SEEK_SET);
// fwrite(f, ...);
// void * data = gguf_meta_get_meta_data(ctx);
// fseek(f, 0, SEEK_SET);
// fwrite(f, data, gguf_get_meta_size(ctx));
// free(data);
// fclose(f);
//
// write the entire context to a binary file
GGML_API void gguf_write_to_file(const struct gguf_context * ctx, const char * fname, bool only_meta);
// get the size in bytes of the meta data (header, kv pairs, tensor info) including padding
GGML_API size_t gguf_get_meta_size(const struct gguf_context * ctx);
GGML_API void gguf_get_meta_data(const struct gguf_context * ctx, void * data);
#ifdef __cplusplus
// restrict not standard in C++
# if defined(__GNUC__)

View File

@ -208,6 +208,7 @@ add_library(ggml-base
../include/ggml-backend.h
../include/ggml-cpp.h
../include/ggml-opt.h
../include/gguf.h
ggml.c
ggml-alloc.c
ggml-backend.cpp
@ -215,7 +216,8 @@ add_library(ggml-base
ggml-threading.cpp
ggml-threading.h
ggml-quants.c
ggml-quants.h)
ggml-quants.h
gguf.cpp)
target_include_directories(ggml-base PRIVATE .)

View File

@ -3,6 +3,8 @@
// GGML internal header
#include "ggml.h"
#include "gguf.h"
#include <assert.h>
#include <math.h>
#include <stdlib.h> // load `stdlib.h` before other headers to work around MinGW bug: https://sourceforge.net/p/mingw-w64/bugs/192/
@ -551,22 +553,15 @@ static inline ggml_bf16_t ggml_compute_fp32_to_bf16(float s) {
#define GGML_FP32_TO_BF16(x) ggml_compute_fp32_to_bf16(x)
#define GGML_BF16_TO_FP32(x) ggml_compute_bf16_to_fp32(x)
// expose GGUF internals for test code
GGML_API size_t gguf_type_size(enum gguf_type type);
GGML_API struct gguf_context * gguf_init_from_file_impl(FILE * file, struct gguf_init_params params);
struct gguf_buf {
void * data;
size_t size;
size_t offset;
};
GGML_API struct gguf_buf gguf_buf_init(size_t size);
GGML_API void gguf_buf_free(struct gguf_buf buf);
GGML_API void gguf_write_to_buf(const struct gguf_context * ctx, struct gguf_buf * buf, bool only_meta);
#ifdef __cplusplus
}
#endif
#ifdef __cplusplus
#include <vector>
// expose GGUF internals for test code
GGML_API size_t gguf_type_size(enum gguf_type type);
GGML_API struct gguf_context * gguf_init_from_file_impl(FILE * file, struct gguf_init_params params);
GGML_API void gguf_write_to_buf(const struct gguf_context * ctx, std::vector<int8_t> & buf, bool only_meta);
#endif // __cplusplus

File diff suppressed because it is too large Load Diff