Skip to content

Commit 06ccee0

Browse files
committed
add eval_retrieval
1 parent 82e2a3c commit 06ccee0

File tree

1 file changed

+275
-0
lines changed

1 file changed

+275
-0
lines changed

eval_retrieval.py

Lines changed: 275 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,275 @@
1+
import argparse
2+
import json
3+
import logging
4+
import os
5+
import random
6+
from io import open
7+
import numpy as np
8+
9+
from tensorboardX import SummaryWriter
10+
from tqdm import tqdm
11+
from bisect import bisect
12+
import yaml
13+
from easydict import EasyDict as edict
14+
import sys
15+
16+
import torch
17+
import torch.nn.functional as F
18+
import torch.nn as nn
19+
20+
from vilbert.task_utils import LoadDatasetEval, LoadLosses, ForwardModelsTrain, ForwardModelsVal, EvaluatingModel
21+
from vilbert.vilbert import VILBertForVLTasks, BertForMultiModalPreTraining
22+
from vilbert.basebert import BaseBertForVLTasks
23+
24+
import vilbert.utils as utils
25+
import torch.distributed as dist
26+
27+
logging.basicConfig(
28+
format="%(asctime)s - %(levelname)s - %(name)s - %(message)s",
29+
datefmt="%m/%d/%Y %H:%M:%S",
30+
level=logging.INFO,
31+
)
32+
logger = logging.getLogger(__name__)
33+
34+
def main():
35+
parser = argparse.ArgumentParser()
36+
37+
parser.add_argument(
38+
"--bert_model",
39+
default="bert-base-uncased",
40+
type=str,
41+
help="Bert pre-trained model selected in the list: bert-base-uncased, "
42+
"bert-large-uncased, bert-base-cased, bert-base-multilingual, bert-base-chinese.",
43+
)
44+
parser.add_argument(
45+
"--from_pretrained",
46+
default="bert-base-uncased",
47+
type=str,
48+
help="Bert pre-trained model selected in the list: bert-base-uncased, "
49+
"bert-large-uncased, bert-base-cased, bert-base-multilingual, bert-base-chinese.",
50+
)
51+
parser.add_argument(
52+
"--output_dir",
53+
default="results",
54+
type=str,
55+
help="The output directory where the model checkpoints will be written.",
56+
)
57+
parser.add_argument(
58+
"--config_file",
59+
default="config/bert_config.json",
60+
type=str,
61+
help="The config file which specified the model details.",
62+
)
63+
parser.add_argument(
64+
"--no_cuda", action="store_true", help="Whether not to use CUDA when available"
65+
)
66+
parser.add_argument(
67+
"--do_lower_case",
68+
default=True,
69+
type=bool,
70+
help="Whether to lower case the input text. True for uncased models, False for cased models.",
71+
)
72+
parser.add_argument(
73+
"--local_rank", type=int, default=-1, help="local_rank for distributed training on gpus"
74+
)
75+
parser.add_argument("--seed", type=int, default=42, help="random seed for initialization")
76+
parser.add_argument(
77+
"--fp16",
78+
action="store_true",
79+
help="Whether to use 16-bit float precision instead of 32-bit",
80+
)
81+
parser.add_argument(
82+
"--loss_scale",
83+
type=float,
84+
default=0,
85+
help="Loss scaling to improve fp16 numeric stability. Only used when fp16 set to True.\n"
86+
"0 (default value): dynamic loss scaling.\n"
87+
"Positive power of 2: static loss scaling value.\n",
88+
)
89+
parser.add_argument(
90+
"--num_workers", type=int, default=16, help="Number of workers in the dataloader."
91+
)
92+
parser.add_argument(
93+
"--save_name",
94+
default='',
95+
type=str,
96+
help="save name for training.",
97+
)
98+
parser.add_argument(
99+
"--tasks", default='', type=str, help="1-2-3... training task separate by -"
100+
)
101+
parser.add_argument(
102+
"--in_memory", default=False, type=bool, help="whether use chunck for parallel training."
103+
)
104+
parser.add_argument(
105+
"--baseline", action="store_true", help="whether use single stream baseline."
106+
)
107+
parser.add_argument(
108+
"--zero_shot", action="store_true", help="whether use single stream baseline."
109+
)
110+
parser.add_argument(
111+
"--split", default="", type=str, help="which split to use."
112+
)
113+
parser.add_argument(
114+
"--batch_size", default=1, type=int, help="which split to use."
115+
)
116+
args = parser.parse_args()
117+
with open('vlbert_tasks.yml', 'r') as f:
118+
task_cfg = edict(yaml.safe_load(f))
119+
120+
random.seed(args.seed)
121+
np.random.seed(args.seed)
122+
torch.manual_seed(args.seed)
123+
124+
if args.baseline:
125+
from pytorch_pretrained_bert.modeling import BertConfig
126+
else:
127+
from vilbert.vilbert import BertConfig
128+
129+
task_names = []
130+
for i, task_id in enumerate(args.tasks.split('-')):
131+
task = 'TASK' + task_id
132+
name = task_cfg[task]['name']
133+
task_names.append(name)
134+
135+
# timeStamp = '-'.join(task_names) + '_' + args.config_file.split('/')[1].split('.')[0]
136+
if '/' in args.from_pretrained:
137+
timeStamp = args.from_pretrained.split('/')[1]
138+
else:
139+
timeStamp = args.from_pretrained
140+
141+
savePath = os.path.join(args.output_dir, timeStamp)
142+
143+
config = BertConfig.from_json_file(args.config_file)
144+
bert_weight_name = json.load(open("config/" + args.bert_model + "_weight_name.json", "r"))
145+
146+
if args.local_rank == -1 or args.no_cuda:
147+
device = torch.device("cuda" if torch.cuda.is_available() and not args.no_cuda else "cpu")
148+
n_gpu = torch.cuda.device_count()
149+
else:
150+
torch.cuda.set_device(args.local_rank)
151+
device = torch.device("cuda", args.local_rank)
152+
n_gpu = 1
153+
# Initializes the distributed backend which will take care of sychronizing nodes/GPUs
154+
torch.distributed.init_process_group(backend="nccl")
155+
156+
logger.info(
157+
"device: {} n_gpu: {}, distributed training: {}, 16-bits training: {}".format(
158+
device, n_gpu, bool(args.local_rank != -1), args.fp16
159+
)
160+
)
161+
162+
default_gpu = False
163+
if dist.is_available() and args.local_rank != -1:
164+
rank = dist.get_rank()
165+
if rank == 0:
166+
default_gpu = True
167+
else:
168+
default_gpu = True
169+
170+
if default_gpu and not os.path.exists(savePath):
171+
os.makedirs(savePath)
172+
173+
task_batch_size, task_num_iters, task_ids, task_datasets_val, task_dataloader_val \
174+
= LoadDatasetEval(args, task_cfg, args.tasks.split('-'))
175+
176+
num_labels = max([dataset.num_labels for dataset in task_datasets_val.values()])
177+
178+
config.fast_mode = True
179+
if args.zero_shot:
180+
model = BertForMultiModalPreTraining.from_pretrained(args.from_pretrained, config)
181+
else:
182+
model = VILBertForVLTasks.from_pretrained(
183+
args.from_pretrained, config, num_labels=num_labels, default_gpu=default_gpu
184+
)
185+
186+
task_losses = LoadLosses(args, task_cfg, args.tasks.split('-'))
187+
model.to(device)
188+
if args.local_rank != -1:
189+
try:
190+
from apex.parallel import DistributedDataParallel as DDP
191+
except ImportError:
192+
raise ImportError(
193+
"Please install apex from https://www.github.com/nvidia/apex to use distributed and fp16 training."
194+
)
195+
model = DDP(model, deay_allreduce=True)
196+
197+
elif n_gpu > 1:
198+
model = nn.DataParallel(model)
199+
200+
no_decay = ["bias", "LayerNorm.bias", "LayerNorm.weight"]
201+
202+
print(" Num Iters: ", task_num_iters)
203+
print(" Batch size: ", task_batch_size)
204+
205+
model.eval()
206+
# when run evaluate, we run each task sequentially.
207+
for task_id in task_ids:
208+
results = []
209+
others = []
210+
211+
score_matrix = np.zeros((5000, 1000))
212+
target_matrix = np.zeros((5000, 1000))
213+
rank_matrix = np.ones((5000)) * 1000
214+
count = 0
215+
216+
for i, batch in enumerate(task_dataloader_val[task_id]):
217+
batch = tuple(t.cuda(device=device, non_blocking=True) for t in batch)
218+
features, spatials, image_mask, question, input_mask, segment_ids, target, caption_idx, image_idx = batch
219+
220+
if task_id in ['TASK3']:
221+
batch_size = features.size(0)
222+
features = features.squeeze(0)
223+
spatials = spatials.squeeze(0)
224+
image_mask = image_mask.squeeze(0)
225+
226+
with torch.no_grad():
227+
if args.zero_shot:
228+
_, _, vil_logit, _ = model(question, features, spatials, segment_ids, input_mask, image_mask)
229+
230+
score_matrix[caption_idx, image_idx*500:(image_idx+1)*500] = torch.softmax(vil_logit, dim=1)[:,0].view(-1).cpu().numpy()
231+
target_matrix[caption_idx, image_idx*500:(image_idx+1)*500] = target.view(-1).float().cpu().numpy()
232+
233+
else:
234+
_, vil_logit, _, _, _, _, _ = model(question, features, spatials, segment_ids, input_mask, image_mask)
235+
score_matrix[caption_idx, image_idx*500:(image_idx+1)*500] = vil_logit.view(-1).cpu().numpy()
236+
target_matrix[caption_idx, image_idx*500:(image_idx+1)*500] = target.view(-1).float().cpu().numpy()
237+
238+
if image_idx.item() == 1:
239+
rank = np.where((np.argsort(-score_matrix[caption_idx]) == np.where(target_matrix[caption_idx]==1)[0][0]) == 1)[0][0]
240+
rank_matrix[caption_idx] = rank
241+
242+
rank_matrix_tmp = rank_matrix[:caption_idx+1]
243+
r1 = 100.0 * np.sum(rank_matrix_tmp < 1) / len(rank_matrix_tmp)
244+
r5 = 100.0 * np.sum(rank_matrix_tmp < 5) / len(rank_matrix_tmp)
245+
r10 = 100.0 * np.sum(rank_matrix_tmp < 10) / len(rank_matrix_tmp)
246+
247+
medr = np.floor(np.median(rank_matrix_tmp) + 1)
248+
meanr = np.mean(rank_matrix_tmp) + 1
249+
print("%d Final r1:%.3f, r5:%.3f, r10:%.3f, mder:%.3f, meanr:%.3f" %(count, r1, r5, r10, medr, meanr))
250+
251+
results.append(np.argsort(-score_matrix[caption_idx]).tolist()[:20])
252+
count += 1
253+
254+
255+
r1 = 100.0 * np.sum(rank_matrix < 1) / len(rank_matrix)
256+
r5 = 100.0 * np.sum(rank_matrix < 5) / len(rank_matrix)
257+
r10 = 100.0 * np.sum(rank_matrix < 10) / len(rank_matrix)
258+
259+
medr = np.floor( np.median(rank_matrix) + 1)
260+
meanr = np.mean(rank_matrix) + 1
261+
262+
print("************************************************")
263+
print("Final r1:%.3f, r5:%.3f, r10:%.3f, mder:%.3f, meanr:%.3f" %(r1, r5, r10, medr, meanr))
264+
print("************************************************")
265+
266+
if args.split:
267+
json_path = os.path.join(savePath, args.split)
268+
else:
269+
json_path = os.path.join(savePath, task_cfg[task_id]['val_split'])
270+
json.dump(results, open(json_path+ '_result.json', 'w'))
271+
json.dump(others, open(json_path+ '_others.json', 'w'))
272+
273+
if __name__ == "__main__":
274+
275+
main()

0 commit comments

Comments
 (0)