v100

jarfo · jarfo · commit 33d7961c28d9 · 2019-03-15T14:36:58.000Z
diff --git a/local_joint_attention_wmt_en_de_big.sh b/local_joint_attention_wmt_en_de_big.sh
@@ -1,10 +1,10 @@
 #!/bin/bash
 
 #SBATCH --job-name=wmt14_en_de
-#SBATCH --gres=gpu:4
+#SBATCH --gres=gpu:8
 #SBATCH --cpus-per-task 1   # Number of CPUs per task
 #SBATCH --nodes=1
-#SBATCH --ntasks-per-node=4
+#SBATCH --ntasks-per-node=8
 #SBATCH --mem=30G           # CPU memory per node
 
 
@@ -17,24 +17,26 @@ DATA=data-bin/wmt16_en_de_bpe32k
 SAVE="checkpoints/$exp"
 mkdir -p $SAVE
 
-python -m torch.distributed.launch --nproc_per_node 4 train.py \
-    $DATA --fp16  --log-interval 100 --no-progress-bar \
+python -m torch.distributed.launch --nproc_per_node 8 $(which fairseq-train) \
+    $DATA --fp16 --log-interval 100 --no-progress-bar \
     --max-update 30000 --share-all-embeddings \
     --optimizer adam --adam-betas '(0.9, 0.98)' \
     --clip-norm 0.0 --weight-decay 0.0 \
     --criterion label_smoothed_cross_entropy --label-smoothing 0.1 \
-    --min-lr 1e-09 --update-freq 96 --keep-last-epochs 10 \
-    --ddp-backend=no_c10d --max-tokens 1200 \
+    --min-lr 1e-09 --update-freq 32 --keep-last-epochs 10 \
+    --ddp-backend=no_c10d --max-tokens 1800 \
     --lr-scheduler cosine --warmup-init-lr 1e-7 --warmup-updates 10000 \
     --lr-shrink 1 --max-lr 0.0009 --lr 1e-7 --min-lr 1e-9 --warmup-init-lr 1e-07 \
     --t-mult 1 --lr-period-updates 20000 \
     --arch local_joint_attention_wmt_en_de_big --save-dir $SAVE \
-    --dropout 0.3 --attention-dropout 0.3
+    --dropout 0.3 --attention-dropout 0.3 \
+    --user-dir models
 
 # Checkpoint averaging
 python scripts/average_checkpoints.py --inputs $SAVE \
     --num-epoch-checkpoints 10 --output "${SAVE}/checkpoint_last10_avg.pt"
 
 # Evaluation
-CUDA_VISIBLE_DEVICES=0 python generate.py $DATA --path "${SAVE}/checkpoint_last10_avg.pt" --batch-size 32 --beam 5 --remove-bpe --lenpen 0.35 --gen-subset test > wmt16_gen.txt
+CUDA_VISIBLE_DEVICES=0 fairseq-generate $DATA --path "${SAVE}/checkpoint_last10_avg.pt" --batch-size 32 --beam 5 \
+    --user-dir models --remove-bpe --lenpen 0.35 --gen-subset test > wmt16_gen.txt
 bash scripts/compound_split_bleu.sh wmt16_gen.txt
diff --git a/local_joint_attention_wmt_en_de_big_v100.sh b/local_joint_attention_wmt_en_de_big_v100.sh
@@ -0,0 +1,42 @@
+#!/bin/bash
+
+#SBATCH --job-name=wmt14_en_de
+#SBATCH --gres=gpu:8
+#SBATCH --cpus-per-task 1   # Number of CPUs per task
+#SBATCH --nodes=1
+#SBATCH --ntasks-per-node=8
+#SBATCH --mem=30G           # CPU memory per node
+
+
+stage=0
+exp=`basename $0 | sed -e 's/^run_//' -e 's/.sh$//'`
+exp=local_joint_attention_wmt_en_de_big
+echo $exp
+
+DATA=data-bin/wmt16_en_de_bpe32k
+SAVE="checkpoints/$exp"
+mkdir -p $SAVE
+
+python -m torch.distributed.launch --nproc_per_node 8 $(which fairseq-train) \
+    $DATA --fp16 --log-interval 100 --no-progress-bar \
+    --max-update 30000 --share-all-embeddings \
+    --optimizer adam --adam-betas '(0.9, 0.98)' \
+    --clip-norm 0.0 --weight-decay 0.0 \
+    --criterion label_smoothed_cross_entropy --label-smoothing 0.1 \
+    --min-lr 1e-09 --update-freq 32 --keep-last-epochs 10 \
+    --ddp-backend=no_c10d --max-tokens 1800 \
+    --lr-scheduler cosine --warmup-init-lr 1e-7 --warmup-updates 10000 \
+    --lr-shrink 1 --max-lr 0.0009 --lr 1e-7 --min-lr 1e-9 --warmup-init-lr 1e-07 \
+    --t-mult 1 --lr-period-updates 20000 \
+    --arch local_joint_attention_wmt_en_de_big --save-dir $SAVE \
+    --dropout 0.3 --attention-dropout 0.3 \
+    --user-dir models
+
+# Checkpoint averaging
+python scripts/average_checkpoints.py --inputs $SAVE \
+    --num-epoch-checkpoints 10 --output "${SAVE}/checkpoint_last10_avg.pt"
+
+# Evaluation
+CUDA_VISIBLE_DEVICES=0 fairseq-generate $DATA --path "${SAVE}/checkpoint_last10_avg.pt" --batch-size 32 --beam 5 \
+    --user-dir models --remove-bpe --lenpen 0.35 --gen-subset test > wmt16_gen.txt
+bash scripts/compound_split_bleu.sh wmt16_gen.txt
diff --git a/score.py b/score.py
@@ -0,0 +1,78 @@
+#!/usr/bin/env python3
+# Copyright (c) 2017-present, Facebook, Inc.
+# All rights reserved.
+#
+# This source code is licensed under the license found in the LICENSE file in
+# the root directory of this source tree. An additional grant of patent rights
+# can be found in the PATENTS file in the same directory.
+"""
+BLEU scoring of generated translations against reference translations.
+"""
+
+import argparse
+import os
+import sys
+
+from fairseq import bleu, tokenizer
+from fairseq.data import dictionary
+
+
+def get_parser():
+    parser = argparse.ArgumentParser(description='Command-line script for BLEU scoring.')
+    # fmt: off
+    parser.add_argument('-s', '--sys', default='-', help='system output')
+    parser.add_argument('-r', '--ref', required=True, help='references')
+    parser.add_argument('-o', '--order', default=4, metavar='N',
+                        type=int, help='consider ngrams up to this order')
+    parser.add_argument('--ignore-case', action='store_true',
+                        help='case-insensitive scoring')
+    parser.add_argument('--sacrebleu', action='store_true',
+                        help='score with sacrebleu')
+    # fmt: on
+    return parser
+
+
+def main():
+    parser = get_parser()
+    args = parser.parse_args()
+    print(args)
+
+    assert args.sys == '-' or os.path.exists(args.sys), \
+        "System output file {} does not exist".format(args.sys)
+    assert os.path.exists(args.ref), \
+        "Reference file {} does not exist".format(args.ref)
+
+    dict = dictionary.Dictionary()
+
+    def readlines(fd):
+        for line in fd.readlines():
+            if args.ignore_case:
+                yield line.lower()
+            else:
+                yield line
+
+    if args.sacrebleu:
+        import sacrebleu
+
+        def score(fdsys):
+            with open(args.ref) as fdref:
+                print(sacrebleu.corpus_bleu(fdsys, [fdref]))
+    else:
+        def score(fdsys):
+            with open(args.ref) as fdref:
+                scorer = bleu.Scorer(dict.pad(), dict.eos(), dict.unk())
+                for sys_tok, ref_tok in zip(readlines(fdsys), readlines(fdref)):
+                    sys_tok = tokenizer.Tokenizer.tokenize(sys_tok, dict)
+                    ref_tok = tokenizer.Tokenizer.tokenize(ref_tok, dict)
+                    scorer.add(ref_tok, sys_tok)
+                print(scorer.result_string(args.order))
+
+    if args.sys == '-':
+        score(sys.stdin)
+    else:
+        with open(args.sys, 'r') as f:
+            score(f)
+
+
+if __name__ == '__main__':
+    main()
diff --git a/scripts/compound_split_bleu.sh b/scripts/compound_split_bleu.sh
@@ -0,0 +1,20 @@
+#!/bin/bash
+
+if [ $# -ne 1 ]; then
+    echo "usage: $0 GENERATE_PY_OUTPUT"
+    exit 1
+fi
+
+GEN=$1
+
+SYS=$GEN.sys
+REF=$GEN.ref
+
+if [ $(tail -n 1 $GEN | grep BLEU | wc -l) -ne 1 ]; then
+    echo "not done generating"
+    exit
+fi
+
+grep ^H $GEN | cut -f3- | perl -ple 's{(\S)-(\S)}{$1 ##AT##-##AT## $2}g' > $SYS
+grep ^T $GEN | cut -f2- | perl -ple 's{(\S)-(\S)}{$1 ##AT##-##AT## $2}g' > $REF
+python score.py --sys $SYS --ref $REF