mirror of
synced 2025-03-02 19:58:48 +00:00
Some checks failed
Bindings Tests (Ruby) / ubuntu-latest (push) Has been cancelled
CI / ubuntu-latest (linux/amd64) (push) Has been cancelled
CI / ubuntu-latest (linux/ppc64le) (push) Has been cancelled
CI / ubuntu-latest-arm64 (linux/arm64) (push) Has been cancelled
CI / ubuntu-latest-arm-v7 (linux/arm/v7) (push) Has been cancelled
CI / macOS-latest (push) Has been cancelled
CI / ubuntu-latest-gcc (linux/amd64, Debug) (push) Has been cancelled
CI / ubuntu-latest-gcc (linux/amd64, Release) (push) Has been cancelled
CI / ubuntu-latest-gcc (linux/ppc64le, Debug) (push) Has been cancelled
CI / ubuntu-latest-gcc (linux/ppc64le, Release) (push) Has been cancelled
CI / ubuntu-latest-gcc-arm64 (linux/arm64, Debug) (push) Has been cancelled
CI / ubuntu-latest-gcc-arm64 (linux/arm64, Release) (push) Has been cancelled
CI / ubuntu-latest-gcc-arm-v7 (linux/arm/v7, Debug) (push) Has been cancelled
CI / ubuntu-latest-gcc-arm-v7 (linux/arm/v7, Release) (push) Has been cancelled
CI / ubuntu-latest-clang (linux/amd64, Debug) (push) Has been cancelled
CI / ubuntu-latest-clang (linux/amd64, Release) (push) Has been cancelled
CI / ubuntu-latest-clang (linux/arm64, Debug) (push) Has been cancelled
CI / ubuntu-latest-clang (linux/arm64, Release) (push) Has been cancelled
CI / ubuntu-latest-clang (linux/ppc64le, Debug) (push) Has been cancelled
CI / ubuntu-latest-clang (linux/ppc64le, Release) (push) Has been cancelled
CI / ubuntu-latest-gcc-sanitized (linux/amd64, ADDRESS) (push) Has been cancelled
CI / ubuntu-latest-gcc-sanitized (linux/amd64, THREAD) (push) Has been cancelled
CI / ubuntu-latest-gcc-sanitized (linux/amd64, UNDEFINED) (push) Has been cancelled
CI / ubuntu-22-cmake-sycl (linux/amd64, icx, icpx, ON) (push) Has been cancelled
CI / ubuntu-22-cmake-sycl (linux/arm/v7, icx, icpx, ON) (push) Has been cancelled
CI / ubuntu-22-cmake-sycl (linux/arm64, icx, icpx, ON) (push) Has been cancelled
CI / ubuntu-22-cmake-sycl (linux/ppc64le, icx, icpx, ON) (push) Has been cancelled
CI / ubuntu-22-cmake-sycl-fp16 (linux/amd64, icx, icpx, ON) (push) Has been cancelled
CI / ubuntu-22-cmake-sycl-fp16 (linux/arm/v7, icx, icpx, ON) (push) Has been cancelled
CI / ubuntu-22-cmake-sycl-fp16 (linux/arm64, icx, icpx, ON) (push) Has been cancelled
CI / ubuntu-22-cmake-sycl-fp16 (linux/ppc64le, icx, icpx, ON) (push) Has been cancelled
CI / windows-msys2 (Release, clang-x86_64, CLANG64) (push) Has been cancelled
CI / windows-msys2 (Release, ucrt-x86_64, UCRT64) (push) Has been cancelled
CI / windows (Win32, Release, win32-x86, x86, 2.28.5, ON) (push) Has been cancelled
CI / windows (x64, Release, win32-x86-64, x64, 2.28.5, ON) (push) Has been cancelled
CI / windows-blas (Win32, ON, Release, x86, 2.28.5, ON) (push) Has been cancelled
CI / windows-blas (x64, ON, Release, x64, 2.28.5, ON) (push) Has been cancelled
CI / windows-cublas (x64, Release, ON, 11.8.0, ON, 2.28.5) (push) Has been cancelled
CI / windows-cublas (x64, Release, ON, 12.2.0, ON, 2.28.5) (push) Has been cancelled
CI / emscripten (Release) (push) Has been cancelled
CI / ios-xcode-build (Release) (push) Has been cancelled
CI / android (push) Has been cancelled
CI / quantize (push) Has been cancelled
Publish Docker image / Push Docker image to Docker Hub (map[dockerfile:.devops/main.Dockerfile platform:linux/amd64 tag:main]) (push) Has been cancelled
163 lines
5.5 KiB
163 lines
5.5 KiB
#pragma once
#include "llama.h"
#include "llama-impl.h"
#include "llama-arch.h"
#include "llama-mmap.h"
#include "ggml-cpp.h"
#include <cstddef>
#include <map>
#include <stdexcept>
#include <unordered_map>
using llama_buf_map = std::unordered_map<uint32_t, ggml_backend_buffer_t>;
enum llama_fver {
const char * llama_file_version_name(llama_fver version);
struct llama_model_loader {
// Holds information on a model weight
struct llama_tensor_weight {
uint16_t idx; // source file index
size_t offs; // tensor data offset in the original file
ggml_tensor * tensor;
llama_tensor_weight(const llama_file * file, uint16_t idx, const struct gguf_context * gguf_ctx, ggml_tensor * tensor) : idx(idx), tensor(tensor) {
const int tensor_idx = gguf_find_tensor(gguf_ctx, ggml_get_name(tensor));
if (tensor_idx < 0) {
throw std::runtime_error(format("tensor '%s' not found in the model", ggml_get_name(tensor)));
offs = gguf_get_data_offset(gguf_ctx) + gguf_get_tensor_offset(gguf_ctx, tensor_idx);
if (offs + ggml_nbytes(tensor) < offs || offs + ggml_nbytes(tensor) > file->size()) {
throw std::runtime_error(format("tensor '%s' data is not within the file bounds, model is corrupted or incomplete", ggml_get_name(tensor)));
// custom comparator to sort weights more nicely by layer
struct weight_name_comparer {
bool operator()(const std::string & a, const std::string & b) const {
int a_layer = -1;
int b_layer = -1;
sscanf(a.c_str(), "blk.%d.", &a_layer);
sscanf(b.c_str(), "blk.%d.", &b_layer);
if (a_layer != b_layer) {
return a_layer < b_layer;
return a < b;
static const int TENSOR_NOT_REQUIRED = 1;
static const int TENSOR_DUPLICATED = 2;
int n_kv = 0;
int n_tensors = 0;
int n_created = 0;
uint64_t n_elements = 0;
size_t n_bytes = 0;
bool use_mmap = false;
bool check_tensors;
llama_files files;
llama_ftype ftype;
llama_fver fver;
llama_mmaps mappings;
std::map<std::string, struct llama_tensor_weight, weight_name_comparer> weights_map;
std::unordered_map<std::string, struct llama_model_kv_override> kv_overrides;
gguf_context_ptr meta;
std::vector<ggml_context_ptr> contexts;
std::string arch_name;
size_t size_done = 0;
size_t size_data = 0;
std::vector<std::pair<size_t, size_t>> mmaps_used;
llama_model_loader(const std::string & fname, bool use_mmap, bool check_tensors, const struct llama_model_kv_override * param_overrides_p);
template<typename T>
typename std::enable_if<std::is_integral<T>::value, bool>::type
get_arr_n(const std::string & key, T & result, bool required = true);
template<typename T>
typename std::enable_if<std::is_integral<T>::value, bool>::type
get_arr_n(enum llm_kv kid, T & result, bool required = true);
template<typename T>
bool get_arr(const std::string & key, std::vector<T> & result, bool required = true);
template<typename T, size_t N_MAX>
bool get_arr(const std::string & key, std::array<T, N_MAX> & result, bool required = true);
template<typename T>
bool get_arr(enum llm_kv kid, T & result, bool required = true);
template<typename T>
bool get_key(const std::string & key, T & result, bool required = true);
template<typename T>
bool get_key(enum llm_kv kid, T & result, bool required = true);
template<typename T, size_t N_MAX>
bool get_key_or_arr(const std::string & key, std::array<T, N_MAX> & result, uint32_t n, bool required = true);
template<typename T>
bool get_key_or_arr(enum llm_kv kid, T & result, uint32_t n, bool required = true);
std::string get_arch_name() const;
enum llm_arch get_arch() const;
const llama_tensor_weight * get_weight(const char * name) const;
const llama_tensor_weight & require_weight(const char * name) const;
struct ggml_tensor * get_tensor_meta(const char * name) const;
struct ggml_tensor * require_tensor_meta(const std::string & name) const;
const struct ggml_tensor * check_tensor_dims(const std::string & name, const std::vector<int64_t> & ne, bool required) const;
struct ggml_tensor * create_tensor(struct ggml_context * ctx, const std::string & name, const std::initializer_list<int64_t> & ne, int flags = 0);
struct ggml_tensor * create_tensor_as_view(struct ggml_context * ctx, struct ggml_tensor * base, const std::string & name, const std::initializer_list<int64_t> & ne, size_t offset, bool required = true);
void done_getting_tensors() const;
void init_mappings(bool prefetch = true, llama_mlocks * mlock_mmaps = nullptr);
void get_mapping_range(size_t * first, size_t * last, void ** addr, int idx, ggml_context * ctx) const;
// for backwards compatibility, does not support ggml-backend
void load_data_for(struct ggml_tensor * cur) const;
// Returns false if cancelled by progress_callback
bool load_all_data(
struct ggml_context * ctx,
llama_buf_map & bufs,
llama_mlocks * lmlocks,
llama_progress_callback progress_callback,
void * progress_callback_user_data);
std::string ftype_name() const;
void print_info() const;