jiasenlu
diff --git a/‎README.md‎
Lines changed: 10 additions & 8 deletions b/‎README.md‎
Lines changed: 10 additions & 8 deletions
diff --git a/‎eval_tasks.py‎
Lines changed: 3 additions & 9 deletions b/‎eval_tasks.py‎
Lines changed: 3 additions & 9 deletions
diff --git a/‎parallel/data_parallel.py‎
Lines changed: 0 additions & 226 deletions b/‎parallel/data_parallel.py‎
Lines changed: 0 additions & 226 deletions
@@ -10,8 +10,8 @@ Code and pre-trained models for **ViLBERT: Pretraining Task-Agnostic Visiolingui
 ```text
 conda create -n vilbert python=3.6
 conda activate vilbert
-git clone https://github.com/jiasenlu/ViLBert
-cd ViLBert
+git clone https://github.com/jiasenlu/vilbert_v0.1
+cd vilbert_v0.1
 pip install -r requirements.txt
 ```
 
@@ -45,13 +45,13 @@ Check `README.md` under `data` for more details.
 
 ## Visiolinguistic Pre-training
 
-To train the model: 
+Once you extracted all the image features, to train the model: 
 
 ```
 
 ```
 
-Distributed Training:
+train the model in a distributed setting:
 ```
 
 ```
@@ -60,10 +60,12 @@ Distributed Training:
 
 We can directly use the Pre-trained ViLBERT model for zero-shot image retrieval tasks on Flickr30k. 
 
-To evaluate on Flickr30k:
+First, update `featyres_h5path1` and `val_annotations_jsonpath` in  `vlbert_task.yml` to load the Flickr30k testset image feature and jsonfile (defualt is training feature). 
 
-```
-python 
+Then, use the following command to evaluate pre-trained 6 layer ViLBERT model. (only support single GPU for evaluation now):
+
+```bash
+python eval_retrieval.py --bert_model bert-base-uncased --from_pretrained save/bert_base_6_layer_6_connect/pytorch_model_9.bin --config_file config/bert_base_6layer_6conect.json --task 3 --split test --batch_size 1 --zero_shot
 ```
 
 
@@ -91,7 +93,7 @@ python -m torch.distributed.launch --nproc_per_node=8 --nnodes=1 --node_rank=0 t
 ```
 
 ### Image Retrieval
-```
+```bash
 python -m torch.distributed.launch --nproc_per_node=8 --nnodes=1 --node_rank=0 train_tasks.py --bert_model bert-base-uncased --from_pretrained save/bert_base_6_layer_6_connect_freeze_0/pytorch_model_8.bin  --config_file config/bert_base_6layer_6conect.json  --learning_rate 4e-5 --num_workers 9 --tasks 11 --save_name pretrained
 ```
 
 
@@ -88,7 +88,7 @@ def main():
         "Positive power of 2: static loss scaling value.\n",
     )
     parser.add_argument(
-        "--num_workers", type=int, default=16, help="Number of workers in the dataloader."
+        "--num_workers", type=int, default=10, help="Number of workers in the dataloader."
     )
     parser.add_argument(
         "--save_name",
@@ -97,10 +97,7 @@ def main():
         help="save name for training.", 
     )
     parser.add_argument(
-        "--use_chunk", default=0, type=float, help="whether use chunck for parallel training."
-    )
-    parser.add_argument(
-        "--batch_size", default=1024, type=int, help="what is the batch size?"
+        "--batch_size", default=1000, type=int, help="what is the batch size?"
     )
     parser.add_argument(
         "--tasks", default='', type=str, help="1-2-3... training task separate by -"
@@ -117,7 +114,7 @@ def main():
 
     args = parser.parse_args()
     with open('vlbert_tasks.yml', 'r') as f:
-        task_cfg = edict(yaml.load(f))
+        task_cfg = edict(yaml.safe_load(f))
 
     random.seed(args.seed)
     np.random.seed(args.seed)
@@ -150,7 +147,6 @@ def main():
         torch.cuda.set_device(args.local_rank)
         device = torch.device("cuda", args.local_rank)
         n_gpu = 1
-        # Initializes the distributed backend which will take care of sychronizing nodes/GPUs
         torch.distributed.init_process_group(backend="nccl")
 
     logger.info(
@@ -202,12 +198,10 @@ def main():
 
     no_decay = ["bias", "LayerNorm.bias", "LayerNorm.weight"]
 
-    print("***** Running training *****")
     print("  Num Iters: ", task_num_iters)
     print("  Batch size: ", task_batch_size)    
 
     model.eval()
-    # when run evaluate, we run each task sequentially. 
     for task_id in task_ids:
         results = []
         others = []