Skip to content

Commit

Permalink
Modify Target (#368)
Browse files Browse the repository at this point in the history
* Target

* Modify Target

Modify the name of linear layer in the LmTarget, SpTarget, MlmTarget
  • Loading branch information
Eric8932 authored Jul 17, 2023
1 parent b80a5e2 commit 4ea8f87
Show file tree
Hide file tree
Showing 26 changed files with 118 additions and 115 deletions.
6 changes: 3 additions & 3 deletions finetune/run_classifier_prompt.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,14 +27,14 @@ def __init__(self, args):
self.encoder = str2encoder[args.encoder](args)
self.target = MlmTarget(args, len(args.tokenizer.vocab))
if args.tie_weights:
self.target.mlm_linear_2.weight = self.embedding.word.embedding.weight
self.target.linear_2.weight = self.embedding.word.embedding.weight
self.answer_position = args.answer_position
self.device = args.device

def forward(self, src, tgt, seg):
emb = self.embedding(src, seg)
memory_bank = self.encoder(emb, seg)
output_mlm = self.target.act(self.target.mlm_linear_1(memory_bank))
output_mlm = self.target.act(self.target.linear_1(memory_bank))
output_mlm = self.target.layer_norm(output_mlm)
tgt_mlm = tgt.contiguous().view(-1)
if self.target.factorized_embedding_parameterization:
Expand All @@ -44,7 +44,7 @@ def forward(self, src, tgt, seg):
output_mlm = output_mlm[tgt_mlm > 0, :]
tgt_mlm = tgt_mlm[tgt_mlm > 0]
self.answer_position = self.answer_position.to(self.device).view(-1)
logits = self.target.mlm_linear_2(output_mlm)
logits = self.target.linear_2(output_mlm)
logits = logits * self.answer_position
prob = self.target.softmax(logits)
loss = self.target.criterion(prob, tgt_mlm)
Expand Down
4 changes: 2 additions & 2 deletions scripts/cloze_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -89,9 +89,9 @@ def __init__(self, args):
def forward(self, src, seg):
emb = self.embedding(src, seg)
output = self.encoder(emb, seg)
output = self.act(self.target.mlm_linear_1(output))
output = self.act(self.target.linear_1(output))
output = self.target.layer_norm(output)
output = self.target.mlm_linear_2(output)
output = self.target.linear_2(output)
prob = torch.nn.Softmax(dim=-1)(output)
return prob

Expand Down
16 changes: 8 additions & 8 deletions scripts/convert_albert_from_huggingface_to_uer.py
Original file line number Diff line number Diff line change
Expand Up @@ -64,14 +64,14 @@
output_model["encoder.transformer.feed_forward.linear_2.bias"] = \
input_model["albert.encoder.albert_layer_groups.0.albert_layers.0.ffn_output.bias"]

output_model["target.sp_linear_1.weight"] = input_model["albert.pooler.weight"]
output_model["target.sp_linear_1.bias"] = input_model["albert.pooler.bias"]
output_model["target.sp_linear_2.weight"] = input_model["sop_classifier.classifier.weight"]
output_model["target.sp_linear_2.bias"] = input_model["sop_classifier.classifier.bias"]
output_model["target.mlm_linear_1.weight"] = input_model["predictions.dense.weight"]
output_model["target.mlm_linear_1.bias"] = input_model["predictions.dense.bias"]
output_model["target.mlm_linear_2.weight"] = input_model["predictions.decoder.weight"]
output_model["target.mlm_linear_2.bias"] = input_model["predictions.bias"]
output_model["target.sp.linear_1.weight"] = input_model["albert.pooler.weight"]
output_model["target.sp.linear_1.bias"] = input_model["albert.pooler.bias"]
output_model["target.sp.linear_2.weight"] = input_model["sop_classifier.classifier.weight"]
output_model["target.sp.linear_2.bias"] = input_model["sop_classifier.classifier.bias"]
output_model["target.mlm.linear_1.weight"] = input_model["predictions.dense.weight"]
output_model["target.mlm.linear_1.bias"] = input_model["predictions.dense.bias"]
output_model["target.mlm.linear_2.weight"] = input_model["predictions.decoder.weight"]
output_model["target.mlm.linear_2.bias"] = input_model["predictions.bias"]
output_model["target.layer_norm.gamma"] = input_model["predictions.LayerNorm.weight"]
output_model["target.layer_norm.beta"] = input_model["predictions.LayerNorm.bias"]

Expand Down
16 changes: 8 additions & 8 deletions scripts/convert_albert_from_original_tf_to_uer.py
Original file line number Diff line number Diff line change
Expand Up @@ -82,16 +82,16 @@ def main():
output_model["encoder.transformer.layer_norm_2.beta"] = \
input_model["bert/encoder/transformer/group_0/inner_group_0/LayerNorm_1/beta"]

output_model["target.sp_linear_1.weight"] = input_model["bert/pooler/dense/kernel"]
output_model["target.sp_linear_1.bias"] = input_model["bert/pooler/dense/bias"]
output_model["target.sp_linear_2.weight"] = input_model["cls/seq_relationship/output_weights"]
output_model["target.sp_linear_2.bias"] = input_model["cls/seq_relationship/output_bias"]
output_model["target.mlm_linear_1.weight"] = input_model["cls/predictions/transform/dense/kernel"]
output_model["target.mlm_linear_1.bias"] = input_model["cls/predictions/transform/dense/bias"]
output_model["target.sp.linear_1.weight"] = input_model["bert/pooler/dense/kernel"]
output_model["target.sp.linear_1.bias"] = input_model["bert/pooler/dense/bias"]
output_model["target.sp.linear_2.weight"] = input_model["cls/seq_relationship/output_weights"]
output_model["target.sp.linear_2.bias"] = input_model["cls/seq_relationship/output_bias"]
output_model["target.mlm.linear_1.weight"] = input_model["cls/predictions/transform/dense/kernel"]
output_model["target.mlm.linear_1.bias"] = input_model["cls/predictions/transform/dense/bias"]
output_model["target.layer_norm.gamma"] = input_model["cls/predictions/transform/LayerNorm/gamma"]
output_model["target.layer_norm.beta"] = input_model["cls/predictions/transform/LayerNorm/beta"]
output_model["target.mlm_linear_2.weight"] = input_model["bert/embeddings/word_embeddings"]
output_model["target.mlm_linear_2.bias"] = input_model["cls/predictions/output_bias"]
output_model["target.mlm.linear_2.weight"] = input_model["bert/embeddings/word_embeddings"]
output_model["target.mlm.linear_2.bias"] = input_model["cls/predictions/output_bias"]

torch.save(output_model, args.output_model_path)

Expand Down
18 changes: 9 additions & 9 deletions scripts/convert_albert_from_uer_to_huggingface.py
Original file line number Diff line number Diff line change
Expand Up @@ -61,16 +61,16 @@
output_model["albert.encoder.albert_layer_groups.0.albert_layers.0.ffn_output.bias"] = \
input_model["encoder.transformer.feed_forward.linear_2.bias"]

output_model["albert.pooler.weight"] = input_model["target.sp_linear_1.weight"]
output_model["albert.pooler.bias"] = input_model["target.sp_linear_1.bias"]
output_model["sop_classifier.classifier.weight"] = input_model["target.sp_linear_2.weight"]
output_model["sop_classifier.classifier.bias"] = input_model["target.sp_linear_2.bias"]
output_model["predictions.dense.weight"] = input_model["target.mlm_linear_1.weight"]
output_model["predictions.dense.bias"] = input_model["target.mlm_linear_1.bias"]
output_model["albert.pooler.weight"] = input_model["target.sp.linear_1.weight"]
output_model["albert.pooler.bias"] = input_model["target.sp.linear_1.bias"]
output_model["sop_classifier.classifier.weight"] = input_model["target.sp.linear_2.weight"]
output_model["sop_classifier.classifier.bias"] = input_model["target.sp.linear_2.bias"]
output_model["predictions.dense.weight"] = input_model["target.mlm.linear_1.weight"]
output_model["predictions.dense.bias"] = input_model["target.mlm.linear_1.bias"]
output_model["predictions.LayerNorm.weight"] = input_model["target.layer_norm.gamma"]
output_model["predictions.LayerNorm.bias"] = input_model["target.layer_norm.beta"]
output_model["predictions.decoder.weight"] = input_model["target.mlm_linear_2.weight"]
output_model["predictions.decoder.bias"] = input_model["target.mlm_linear_2.bias"]
output_model["predictions.bias"] = input_model["target.mlm_linear_2.bias"]
output_model["predictions.decoder.weight"] = input_model["target.mlm.linear_2.weight"]
output_model["predictions.decoder.bias"] = input_model["target.mlm.linear_2.bias"]
output_model["predictions.bias"] = input_model["target.mlm.linear_2.bias"]

torch.save(output_model, args.output_model_path)
16 changes: 8 additions & 8 deletions scripts/convert_albert_from_uer_to_original_tf.py
Original file line number Diff line number Diff line change
Expand Up @@ -75,16 +75,16 @@ def main():
output_model["bert/encoder/transformer/group_0/inner_group_0/LayerNorm_1/beta"] = \
input_model["encoder.transformer.layer_norm_2.beta"]

output_model["bert/pooler/dense/kernel"] = input_model["target.sp_linear_1.weight"]
output_model["bert/pooler/dense/bias"] = input_model["target.sp_linear_1.bias"]
output_model["cls/seq_relationship/output_weights"] = input_model["target.sp_linear_2.weight"]
output_model["cls/seq_relationship/output_bias"] = input_model["target.sp_linear_2.bias"]
output_model["cls/predictions/transform/dense/kernel"] = input_model["target.mlm_linear_1.weight"]
output_model["cls/predictions/transform/dense/bias"] = input_model["target.mlm_linear_1.bias"]
output_model["bert/pooler/dense/kernel"] = input_model["target.sp.linear_1.weight"]
output_model["bert/pooler/dense/bias"] = input_model["target.sp.linear_1.bias"]
output_model["cls/seq_relationship/output_weights"] = input_model["target.sp.linear_2.weight"]
output_model["cls/seq_relationship/output_bias"] = input_model["target.sp.linear_2.bias"]
output_model["cls/predictions/transform/dense/kernel"] = input_model["target.mlm.linear_1.weight"]
output_model["cls/predictions/transform/dense/bias"] = input_model["target.mlm.linear_1.bias"]
output_model["cls/predictions/transform/LayerNorm/gamma"] = input_model["target.layer_norm.gamma"]
output_model["cls/predictions/transform/LayerNorm/beta"] = input_model["target.layer_norm.beta"]
output_model["bert/embeddings/word_embeddings"] = input_model["target.mlm_linear_2.weight"]
output_model["cls/predictions/output_bias"] = input_model["target.mlm_linear_2.bias"]
output_model["bert/embeddings/word_embeddings"] = input_model["target.mlm.linear_2.weight"]
output_model["cls/predictions/output_bias"] = input_model["target.mlm.linear_2.bias"]

tf_vars = []

Expand Down
16 changes: 8 additions & 8 deletions scripts/convert_bert_from_huggingface_to_uer.py
Original file line number Diff line number Diff line change
Expand Up @@ -66,16 +66,16 @@ def main():
convert_bert_transformer_encoder_from_huggingface_to_uer(input_model, output_model, args.layers_num)

if args.type == "bert":
output_model["target.sp_linear_1.weight"] = input_model["bert.pooler.dense.weight"]
output_model["target.sp_linear_1.bias"] = input_model["bert.pooler.dense.bias"]
output_model["target.sp_linear_2.weight"] = input_model["cls.seq_relationship.weight"]
output_model["target.sp_linear_2.bias"] = input_model["cls.seq_relationship.bias"]
output_model["target.mlm_linear_1.weight"] = input_model["cls.predictions.transform.dense.weight"]
output_model["target.mlm_linear_1.bias"] = input_model["cls.predictions.transform.dense.bias"]
output_model["target.sp.linear_1.weight"] = input_model["bert.pooler.dense.weight"]
output_model["target.sp.linear_1.bias"] = input_model["bert.pooler.dense.bias"]
output_model["target.sp.linear_2.weight"] = input_model["cls.seq_relationship.weight"]
output_model["target.sp.linear_2.bias"] = input_model["cls.seq_relationship.bias"]
output_model["target.mlm.linear_1.weight"] = input_model["cls.predictions.transform.dense.weight"]
output_model["target.mlm.linear_1.bias"] = input_model["cls.predictions.transform.dense.bias"]
output_model["target.layer_norm.gamma"] = input_model["cls.predictions.transform.LayerNorm.weight"]
output_model["target.layer_norm.beta"] = input_model["cls.predictions.transform.LayerNorm.bias"]
output_model["target.mlm_linear_2.weight"] = input_model["cls.predictions.decoder.weight"]
output_model["target.mlm_linear_2.bias"] = input_model["cls.predictions.bias"]
output_model["target.mlm.linear_2.weight"] = input_model["cls.predictions.decoder.weight"]
output_model["target.mlm.linear_2.bias"] = input_model["cls.predictions.bias"]

torch.save(output_model, args.output_model_path)

Expand Down
16 changes: 8 additions & 8 deletions scripts/convert_bert_from_original_tf_to_uer.py
Original file line number Diff line number Diff line change
Expand Up @@ -87,16 +87,16 @@ def main():
input_model["bert/encoder/layer_" + str(i) + "/output/LayerNorm/beta"]

if args.type == "bert":
output_model["target.sp_linear_1.weight"] = input_model["bert/pooler/dense/kernel"]
output_model["target.sp_linear_1.bias"] = input_model["bert/pooler/dense/bias"]
output_model["target.sp_linear_2.weight"] = input_model["cls/seq_relationship/output_weights"]
output_model["target.sp_linear_2.bias"] = input_model["cls/seq_relationship/output_bias"]
output_model["target.mlm_linear_1.weight"] = input_model["cls/predictions/transform/dense/kernel"]
output_model["target.mlm_linear_1.bias"] = input_model["cls/predictions/transform/dense/bias"]
output_model["target.sp.linear_1.weight"] = input_model["bert/pooler/dense/kernel"]
output_model["target.sp.linear_1.bias"] = input_model["bert/pooler/dense/bias"]
output_model["target.sp.linear_2.weight"] = input_model["cls/seq_relationship/output_weights"]
output_model["target.sp.linear_2.bias"] = input_model["cls/seq_relationship/output_bias"]
output_model["target.mlm.linear_1.weight"] = input_model["cls/predictions/transform/dense/kernel"]
output_model["target.mlm.linear_1.bias"] = input_model["cls/predictions/transform/dense/bias"]
output_model["target.layer_norm.gamma"] = input_model["cls/predictions/transform/LayerNorm/gamma"]
output_model["target.layer_norm.beta"] = input_model["cls/predictions/transform/LayerNorm/beta"]
output_model["target.mlm_linear_2.weight"] = input_model["bert/embeddings/word_embeddings"]
output_model["target.mlm_linear_2.bias"] = input_model["cls/predictions/output_bias"]
output_model["target.mlm.linear_2.weight"] = input_model["bert/embeddings/word_embeddings"]
output_model["target.mlm.linear_2.bias"] = input_model["cls/predictions/output_bias"]

torch.save(output_model, args.output_model_path)

Expand Down
16 changes: 8 additions & 8 deletions scripts/convert_bert_from_uer_to_huggingface.py
Original file line number Diff line number Diff line change
Expand Up @@ -64,16 +64,16 @@ def main():
convert_bert_transformer_encoder_from_uer_to_huggingface(input_model, output_model, args.layers_num)

if args.type == "bert":
output_model["bert.pooler.dense.weight"] = input_model["target.sp_linear_1.weight"]
output_model["bert.pooler.dense.bias"] = input_model["target.sp_linear_1.bias"]
output_model["cls.seq_relationship.weight"] = input_model["target.sp_linear_2.weight"]
output_model["cls.seq_relationship.bias"] = input_model["target.sp_linear_2.bias"]
output_model["cls.predictions.transform.dense.weight"] = input_model["target.mlm_linear_1.weight"]
output_model["cls.predictions.transform.dense.bias"] = input_model["target.mlm_linear_1.bias"]
output_model["bert.pooler.dense.weight"] = input_model["target.sp.linear_1.weight"]
output_model["bert.pooler.dense.bias"] = input_model["target.sp.linear_1.bias"]
output_model["cls.seq_relationship.weight"] = input_model["target.sp.linear_2.weight"]
output_model["cls.seq_relationship.bias"] = input_model["target.sp.linear_2.bias"]
output_model["cls.predictions.transform.dense.weight"] = input_model["target.mlm.linear_1.weight"]
output_model["cls.predictions.transform.dense.bias"] = input_model["target.mlm.linear_1.bias"]
output_model["cls.predictions.transform.LayerNorm.weight"] = input_model["target.layer_norm.gamma"]
output_model["cls.predictions.transform.LayerNorm.bias"] = input_model["target.layer_norm.beta"]
output_model["cls.predictions.decoder.weight"] = input_model["target.mlm_linear_2.weight"]
output_model["cls.predictions.bias"] = input_model["target.mlm_linear_2.bias"]
output_model["cls.predictions.decoder.weight"] = input_model["target.mlm.linear_2.weight"]
output_model["cls.predictions.bias"] = input_model["target.mlm.linear_2.bias"]

torch.save(output_model, args.output_model_path)

Expand Down
14 changes: 7 additions & 7 deletions scripts/convert_bert_from_uer_to_original_tf.py
Original file line number Diff line number Diff line change
Expand Up @@ -80,15 +80,15 @@ def main():
input_model["encoder.transformer." + str(i) + ".layer_norm_2.beta"]

if args.type == "bert":
output_model["bert/pooler/dense/kernel"] = input_model["target.sp_linear_1.weight"]
output_model["bert/pooler/dense/bias"] = input_model["target.sp_linear_1.bias"]
output_model["cls/seq_relationship/output_weights"] = input_model["target.sp_linear_2.weight"]
output_model["cls/seq_relationship/output_bias"] = input_model["target.sp_linear_2.bias"]
output_model["cls/predictions/transform/dense/kernel"] = input_model["target.mlm_linear_1.weight"]
output_model["cls/predictions/transform/dense/bias"] = input_model["target.mlm_linear_1.bias"]
output_model["bert/pooler/dense/kernel"] = input_model["target.sp.linear_1.weight"]
output_model["bert/pooler/dense/bias"] = input_model["target.sp.linear_1.bias"]
output_model["cls/seq_relationship/output_weights"] = input_model["target.sp.linear_2.weight"]
output_model["cls/seq_relationship/output_bias"] = input_model["target.sp.linear_2.bias"]
output_model["cls/predictions/transform/dense/kernel"] = input_model["target.mlm.linear_1.weight"]
output_model["cls/predictions/transform/dense/bias"] = input_model["target.mlm.linear_1.bias"]
output_model["cls/predictions/transform/LayerNorm/gamma"] = input_model["target.layer_norm.gamma"]
output_model["cls/predictions/transform/LayerNorm/beta"] = input_model["target.layer_norm.beta"]
output_model["cls/predictions/output_bias"] = input_model["target.mlm_linear_2.bias"]
output_model["cls/predictions/output_bias"] = input_model["target.mlm.linear_2.bias"]

tf_vars = []

Expand Down
2 changes: 1 addition & 1 deletion scripts/convert_gpt2_from_huggingface_to_uer.py
Original file line number Diff line number Diff line change
Expand Up @@ -54,6 +54,6 @@

output_model["encoder.layer_norm.gamma"] = input_model["transformer.ln_f.weight"]
output_model["encoder.layer_norm.beta"] = input_model["transformer.ln_f.bias"]
output_model["target.output_layer.weight"] = input_model["lm_head.weight"]
output_model["target.lm.output_layer.weight"] = input_model["lm_head.weight"]

torch.save(output_model, args.output_model_path)
4 changes: 2 additions & 2 deletions scripts/convert_pegasus_from_uer_to_huggingface.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,8 +28,8 @@
output_model["model.decoder.embed_positions.weight"] = input_model["tgt_embedding.sinusoidalpos.pe"].squeeze(1)
output_model["model.encoder.embed_tokens.weight"] = input_model["embedding.word.embedding.weight"]
output_model["model.decoder.embed_tokens.weight"] = input_model["tgt_embedding.word.embedding.weight"]
output_model["lm_head.weight"] = input_model["target.output_layer.weight"]
output_model["final_logits_bias"] = input_model["target.output_layer.bias"].unsqueeze(0)
output_model["lm_head.weight"] = input_model["target.lm.output_layer.weight"]
output_model["final_logits_bias"] = input_model["target.lm.output_layer.bias"].unsqueeze(0)

convert_encoder_decoder_transformer_from_uer_to_huggingface(input_model, output_model, args.layers_num)

Expand Down
2 changes: 1 addition & 1 deletion scripts/convert_t5_from_huggingface_to_uer.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@
input_model["encoder.block.0.layer.0.SelfAttention.relative_attention_bias.weight"]
output_model["decoder.self_pos_emb.relative_attention_bias.weight"] = \
input_model["decoder.block.0.layer.0.SelfAttention.relative_attention_bias.weight"]
output_model["target.output_layer.weight"] = \
output_model["target.lm.output_layer.weight"] = \
input_model["lm_head.weight"]

for i in range(args.layers_num):
Expand Down
2 changes: 1 addition & 1 deletion scripts/convert_t5_from_uer_to_huggingface.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,7 @@
output_model["decoder.embed_tokens.weight"] = \
input_model["tgt_embedding.word.embedding.weight"]
output_model["lm_head.weight"] = \
input_model["target.output_layer.weight"]
input_model["target.lm.output_layer.weight"]

for i in range(args.layers_num):
output_model["encoder.block." + str(i) + ".layer.0.SelfAttention.q.weight"] = \
Expand Down
8 changes: 4 additions & 4 deletions scripts/convert_xlmroberta_from_huggingface_to_uer.py
Original file line number Diff line number Diff line change
Expand Up @@ -63,17 +63,17 @@
output_model["encoder.transformer." + str(i) + ".layer_norm_2.beta"] = \
input_model["roberta.encoder.layer." + str(i) + ".output.LayerNorm.bias"]

output_model["target.mlm_linear_1.weight"] = \
output_model["target.mlm.linear_1.weight"] = \
input_model["lm_head.dense.weight"]
output_model["target.mlm_linear_1.bias"] = \
output_model["target.mlm.linear_1.bias"] = \
input_model["lm_head.dense.bias"]
output_model["target.layer_norm.gamma"] = \
input_model["lm_head.layer_norm.weight"]
output_model["target.layer_norm.beta"] = \
input_model["lm_head.layer_norm.bias"]
output_model["target.mlm_linear_2.weight"] = \
output_model["target.mlm.linear_2.weight"] = \
input_model["lm_head.decoder.weight"]
output_model["target.mlm_linear_2.bias"] = \
output_model["target.mlm.linear_2.bias"] = \
input_model["lm_head.bias"]

torch.save(output_model, args.output_model_path)
Loading

0 comments on commit 4ea8f87

Please sign in to comment.