mirror of
https://github.com/ggerganov/whisper.cpp.git
synced 2025-06-14 12:58:10 +00:00
@ -445,7 +445,8 @@ llama_model_loader::llama_model_loader(
|
||||
std::vector<std::string> & splits,
|
||||
bool use_mmap,
|
||||
bool check_tensors,
|
||||
const struct llama_model_kv_override * param_overrides_p) {
|
||||
const llama_model_kv_override * param_overrides_p,
|
||||
const llama_model_tensor_buft_override * param_tensor_buft_overrides_p) {
|
||||
int trace = 0;
|
||||
if (getenv("LLAMA_TRACE")) {
|
||||
trace = atoi(getenv("LLAMA_TRACE"));
|
||||
@ -457,6 +458,8 @@ llama_model_loader::llama_model_loader(
|
||||
}
|
||||
}
|
||||
|
||||
tensor_buft_overrides = param_tensor_buft_overrides_p;
|
||||
|
||||
// Load the main GGUF
|
||||
struct ggml_context * ctx = NULL;
|
||||
struct gguf_init_params params = {
|
||||
@ -600,7 +603,9 @@ llama_model_loader::llama_model_loader(
|
||||
|
||||
if (trace > 0) {
|
||||
const uint16_t sid = w.idx;
|
||||
LLAMA_LOG_INFO("%s: - tensor split %2d: %32s %-8s [ %s ]\n", __func__, sid, ggml_get_name(tensor), ggml_type_name(type), llama_format_tensor_shape(tensor).c_str());
|
||||
LLAMA_LOG_INFO("%s: - tensor split %2d: %32s %-8s [ %s ] %8.2f MiB\n", __func__,
|
||||
sid, ggml_get_name(tensor), ggml_type_name(type), llama_format_tensor_shape(tensor).c_str(),
|
||||
ggml_nbytes(tensor)/1024.0f/1024.0f);
|
||||
}
|
||||
}
|
||||
|
||||
@ -640,9 +645,9 @@ llama_model_loader::llama_model_loader(
|
||||
ftype = (llama_ftype) (ftype | LLAMA_FTYPE_GUESSED);
|
||||
|
||||
{
|
||||
const int kid = gguf_find_key(meta.get(), "general.file_type"); // TODO: use LLM_KV
|
||||
if (kid >= 0) {
|
||||
ftype = (llama_ftype) gguf_get_val_u32(meta.get(), kid);
|
||||
uint32_t ftype_val = 0;
|
||||
if (get_key(LLM_KV_GENERAL_FILE_TYPE, ftype_val, false)) {
|
||||
ftype = (llama_ftype) ftype_val;
|
||||
}
|
||||
}
|
||||
|
||||
|
Reference in New Issue
Block a user