mirror of
https://github.com/ggerganov/whisper.cpp.git
synced 2025-06-16 13:58:09 +00:00
talk-llama : sync llama.cpp
ggml-ci
This commit is contained in:
@ -543,6 +543,12 @@ void llama_model::load_hparams(llama_model_loader & ml) {
|
||||
uint32_t n_vocab = 0;
|
||||
ml.get_key(LLM_KV_VOCAB_SIZE, n_vocab, false) || ml.get_arr_n(LLM_KV_TOKENIZER_LIST, n_vocab, false);
|
||||
|
||||
// for classifier models
|
||||
ml.get_arr(LLM_KV_CLASSIFIER_OUTPUT_LABELS, classifier_labels, false);
|
||||
if (!classifier_labels.empty()) {
|
||||
hparams.n_cls_out = classifier_labels.size();
|
||||
}
|
||||
|
||||
// arch-specific KVs
|
||||
switch (arch) {
|
||||
case LLM_ARCH_LLAMA:
|
||||
@ -686,7 +692,6 @@ void llama_model::load_hparams(llama_model_loader & ml) {
|
||||
ml.get_key(LLM_KV_ATTENTION_LAYERNORM_EPS, hparams.f_norm_eps);
|
||||
ml.get_key(LLM_KV_ATTENTION_CAUSAL, hparams.causal_attn);
|
||||
ml.get_key(LLM_KV_POOLING_TYPE, hparams.pooling_type, false);
|
||||
ml.get_arr_n(LLM_KV_CLASSIFIER_OUTPUT_LABELS, hparams.n_cls_out, false);
|
||||
|
||||
switch (hparams.n_layer) {
|
||||
case 3:
|
||||
@ -956,6 +961,11 @@ void llama_model::load_hparams(llama_model_loader & ml) {
|
||||
case 46: type = LLM_TYPE_27B; break;
|
||||
default: type = LLM_TYPE_UNKNOWN;
|
||||
}
|
||||
|
||||
// ref: https://github.com/google/gemma_pytorch/blob/014acb7ac4563a5f77c76d7ff98f31b568c16508/gemma/config.py#L173
|
||||
hparams.f_attention_scale = type == LLM_TYPE_27B
|
||||
? 1.0f / std::sqrt(float(hparams.n_embd / hparams.n_head(0)))
|
||||
: 1.0f / std::sqrt(float(hparams.n_embd_head_k));
|
||||
} break;
|
||||
case LLM_ARCH_GEMMA3:
|
||||
{
|
||||
@ -976,6 +986,7 @@ void llama_model::load_hparams(llama_model_loader & ml) {
|
||||
default: type = LLM_TYPE_UNKNOWN;
|
||||
}
|
||||
|
||||
// ref: https://github.com/google/gemma_pytorch/blob/014acb7ac4563a5f77c76d7ff98f31b568c16508/gemma/config.py#L289
|
||||
hparams.f_attention_scale = type == LLM_TYPE_27B
|
||||
? 1.0f / std::sqrt(float(hparams.n_embd / hparams.n_head(0)))
|
||||
: 1.0f / std::sqrt(float(hparams.n_embd_head_k));
|
||||
@ -4356,6 +4367,15 @@ void llama_model::print_info() const {
|
||||
LLAMA_LOG_INFO("%s: ssm_d_state = %u\n", __func__, hparams.ssm_d_state);
|
||||
LLAMA_LOG_INFO("%s: ssm_dt_rank = %u\n", __func__, hparams.ssm_dt_rank);
|
||||
LLAMA_LOG_INFO("%s: ssm_dt_b_c_rms = %d\n", __func__, hparams.ssm_dt_b_c_rms);
|
||||
|
||||
if (!classifier_labels.empty()) {
|
||||
LLAMA_LOG_INFO("%s: n_cls_out = %u\n", __func__, hparams.n_cls_out);
|
||||
|
||||
size_t i = 0;
|
||||
for (auto label : classifier_labels) {
|
||||
LLAMA_LOG_INFO("%s: cls_label[%2zu] = %s\n", __func__, i++, label.c_str());
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
LLAMA_LOG_INFO("%s: model type = %s\n", __func__, type_name().c_str());
|
||||
@ -8484,14 +8504,7 @@ struct llm_build_gemma2_iswa : public llm_graph_context {
|
||||
cb(Kcur, "Kcur", il);
|
||||
cb(Vcur, "Vcur", il);
|
||||
|
||||
// ref: https://github.com/google/gemma_pytorch/commit/03e657582d17cb5a8617ebf333c1c16f3694670e
|
||||
switch (model.type) {
|
||||
case LLM_TYPE_2B:
|
||||
case LLM_TYPE_9B:
|
||||
case LLM_TYPE_27B: Qcur = ggml_scale(ctx0, Qcur, 1.0f / sqrtf(float(n_embd_head))); break;
|
||||
default: GGML_ABORT("fatal error");
|
||||
};
|
||||
cb(Qcur, "Qcur_scaled", il);
|
||||
Qcur = ggml_scale(ctx0, Qcur, hparams.f_attention_scale);
|
||||
|
||||
cur = build_attn(inp_attn, gf,
|
||||
model.layers[il].wo, NULL,
|
||||
@ -8632,9 +8645,12 @@ struct llm_build_gemma3_iswa : public llm_graph_context {
|
||||
cb(Kcur, "Kcur", il);
|
||||
cb(Vcur, "Vcur", il);
|
||||
|
||||
// ref: https://github.com/google/gemma_pytorch/blob/014acb7ac4563a5f77c76d7ff98f31b568c16508/gemma/model.py#L315
|
||||
Qcur = ggml_scale(ctx0, Qcur, hparams.f_attention_scale);
|
||||
|
||||
cur = build_attn(inp_attn, gf,
|
||||
model.layers[il].wo, NULL,
|
||||
Qcur, Kcur, Vcur, nullptr, nullptr, hparams.f_attention_scale, il);
|
||||
Qcur, Kcur, Vcur, nullptr, nullptr, 1.0f, il);
|
||||
}
|
||||
|
||||
cur = build_norm(cur,
|
||||
@ -13600,6 +13616,18 @@ int32_t llama_model_n_swa(const llama_model * model) {
|
||||
return model->hparams.n_swa;
|
||||
}
|
||||
|
||||
uint32_t llama_model_n_cls_out(const struct llama_model * model) {
|
||||
return model->hparams.n_cls_out;
|
||||
}
|
||||
|
||||
const char * llama_model_cls_label(const struct llama_model * model, uint32_t i) {
|
||||
if (i < model->classifier_labels.size()) {
|
||||
return model->classifier_labels[i].c_str();
|
||||
}
|
||||
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
// deprecated
|
||||
int32_t llama_n_ctx_train(const llama_model * model) {
|
||||
return llama_model_n_ctx_train(model);
|
||||
@ -13760,7 +13788,7 @@ uint64_t llama_model_size(const llama_model * model) {
|
||||
}
|
||||
|
||||
const char * llama_model_chat_template(const llama_model * model, const char * name) {
|
||||
const auto key = name ? LLM_KV(model->arch, name)(LLM_KV_TOKENIZER_CHAT_TEMPLATE_N)
|
||||
const auto key = name ? LLM_KV(model->arch, name)(LLM_KV_TOKENIZER_CHAT_TEMPLATE)
|
||||
: LLM_KV(model->arch)(LLM_KV_TOKENIZER_CHAT_TEMPLATE);
|
||||
const auto & it = model->gguf_kv.find(key);
|
||||
if (it == model->gguf_kv.end()) {
|
||||
|
Reference in New Issue
Block a user