models : fix HF fine-tuned model conversion script (#157)

It works now
This commit is contained in:
Georgi Gerganov 2022-11-23 23:14:11 +02:00
parent 388e9f79ad
commit 5698bddbc9
No known key found for this signature in database
GPG Key ID: 449E073F9DC10735

View File

@ -9,27 +9,28 @@ import numpy as np
from transformers import WhisperForConditionalGeneration from transformers import WhisperForConditionalGeneration
conv_map = {'self_attn_layer_norm': 'attn_ln', conv_map = {
'encoder_attn.k_proj': 'attn.key', 'self_attn.k_proj' : 'attn.key',
'self_attn.out_proj': 'attn.out', 'self_attn.q_proj' : 'attn.query',
'encoder_attn.out_proj': 'cross_attn.out', 'self_attn.v_proj' : 'attn.value',
'self_attn.q_proj': 'attn.query', 'self_attn.out_proj' : 'attn.out',
'encoder_attn.q_proj': 'cross_attn.query', 'self_attn_layer_norm' : 'attn_ln',
'self_attn.v_proj': 'attn.value', 'encoder_attn.q_proj' : 'cross_attn.query',
'encoder_attn.v_proj': 'cross_attn.value', 'encoder_attn.v_proj' : 'cross_attn.value',
'encoder_attn_layer_norm': 'cross_attn_ln', 'encoder_attn.out_proj' : 'cross_attn.out',
'fc1': 'mlp.0', 'encoder_attn_layer_norm' : 'cross_attn_ln',
'fc2': 'mlp.2', 'fc1' : 'mlp.0',
'final_layer_norm': 'mlp_ln', 'fc2' : 'mlp.2',
'encoder.layer_norm.bias': 'encoder.ln_post.bias', 'final_layer_norm' : 'mlp_ln',
'encoder.layer_norm.weight': 'encoder.ln_post.weight', 'encoder.layer_norm.bias' : 'encoder.ln_post.bias',
'encoder.embed_positions.weight': 'encoder.positional_embedding', 'encoder.layer_norm.weight' : 'encoder.ln_post.weight',
'decoder.layer_norm.bias': 'decoder.ln.bias', 'encoder.embed_positions.weight': 'encoder.positional_embedding',
'decoder.layer_norm.weight': 'decoder.ln.weight', 'decoder.layer_norm.bias' : 'decoder.ln.bias',
'decoder.embed_positions.weight': 'decoder.positional_embedding', 'decoder.layer_norm.weight' : 'decoder.ln.weight',
'decoder.embed_tokens.weight': 'decoder.token_embedding.weight', 'decoder.embed_positions.weight': 'decoder.positional_embedding',
'proj_out.weight': 'decoder.proj.weight', 'decoder.embed_tokens.weight' : 'decoder.token_embedding.weight',
} 'proj_out.weight' : 'decoder.proj.weight',
}
# ref: https://github.com/openai/gpt-2/blob/master/src/encoder.py # ref: https://github.com/openai/gpt-2/blob/master/src/encoder.py
def bytes_to_unicode(): def bytes_to_unicode():
@ -95,12 +96,12 @@ fout.write(struct.pack("i", 0x67676d6c)) # magic: ggml in hex
fout.write(struct.pack("i", hparams["vocab_size"])) fout.write(struct.pack("i", hparams["vocab_size"]))
fout.write(struct.pack("i", hparams["max_source_positions"])) fout.write(struct.pack("i", hparams["max_source_positions"]))
fout.write(struct.pack("i", hparams["d_model"])) fout.write(struct.pack("i", hparams["d_model"]))
fout.write(struct.pack("i", hparams["decoder_attention_heads"]))
fout.write(struct.pack("i", hparams["decoder_layers"]))
fout.write(struct.pack("i", hparams["max_length"]))
fout.write(struct.pack("i", hparams["d_model"]))
fout.write(struct.pack("i", hparams["encoder_attention_heads"])) fout.write(struct.pack("i", hparams["encoder_attention_heads"]))
fout.write(struct.pack("i", hparams["encoder_layers"])) fout.write(struct.pack("i", hparams["encoder_layers"]))
fout.write(struct.pack("i", hparams["max_length"]))
fout.write(struct.pack("i", hparams["d_model"]))
fout.write(struct.pack("i", hparams["decoder_attention_heads"]))
fout.write(struct.pack("i", hparams["decoder_layers"]))
fout.write(struct.pack("i", hparams["num_mel_bins"])) fout.write(struct.pack("i", hparams["num_mel_bins"]))
fout.write(struct.pack("i", use_f16)) fout.write(struct.pack("i", use_f16))
@ -139,7 +140,7 @@ for name in list_vars.keys():
if nn[1] == "layers": if nn[1] == "layers":
nn[1] = "blocks" nn[1] = "blocks"
if ".".join(nn[3:-1]) == "self_attn.k_proj": if ".".join(nn[3:-1]) == "encoder_attn.k_proj":
mapped = "attn.key" if nn[0] == "encoder" else "cross_attn.key" mapped = "attn.key" if nn[0] == "encoder" else "cross_attn.key"
else: else:
mapped = conv_map[".".join(nn[3:-1])] mapped = conv_map[".".join(nn[3:-1])]