-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathevaluate.py
132 lines (105 loc) · 5.09 KB
/
evaluate.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
from utils.vqaTools.vqa import VQA
from utils.vqaEvaluation.vqaEval import VQAEval
from utils.data_gen import VQADataGenerator
from models.build import build_model
import joblib
import os
import pandas as pd
import numpy as np
import tensorflow as tf
import json
from sklearn.preprocessing import LabelEncoder
from matplotlib import pyplot as plt
def generate_result(data_gen, question_ids, model, label2ans, output_file, verbose=True):
"""
Generate list of predicted answers for each question and store as json file
"""
def generator_predict(model, data_gen):
for data, label in data_gen:
yield model.predict_on_batch(data)
print("\nGenerating results...")
json_list = []
decoder = np.vectorize(lambda x: label2ans[int(x)])
progbar = tf.keras.utils.Progbar(len(question_ids), verbose=1)
predict_gen = generator_predict(model, data_gen)
for i in range(len(data_gen)):
y_predict = next(predict_gen)
y_predict = np.argmax(y_predict, axis=-1)
y_predict_text = decoder(y_predict)
for j in range(len(y_predict_text)):
prediction = y_predict_text[j]
question_id = question_ids[i * data_gen.batch_size + j]
json_list.append({'answer': prediction, 'question_id': int(question_id)})
progbar.add(1)
json.dump(json_list, open(output_file, 'w'))
if verbose: print(f"Results generated and saved as {output_file}.")
def vqaEval(C):
"""
Function to calculate VQA score and generate accuracy reports. Adapted from VQA v2.0 evaluation code.
"""
annFile = os.path.join(C.VAL_DIR, 'v2_mscoco_val2014_annotations.json')
quesFile = os.path.join(C.VAL_DIR, 'v2_OpenEnded_mscoco_val2014_questions.json')
resFile = C.JSON_RESULT
fileTypes = ['accuracy', 'evalQA', 'evalQuesType', 'evalAnsType']
[accuracyFile, evalQAFile, evalQuesTypeFile, evalAnsTypeFile] = [
f'{C.OUTPUT_DIR}/{C.MODEL_NAME}_{fileType}.json' for fileType in fileTypes]
# create vqa object and vqaRes object
vqa = VQA(annFile, quesFile)
vqaRes = vqa.loadRes(resFile, quesFile)
# create vqaEval object by taking vqa and vqaRes
vqaEval = VQAEval(vqa, vqaRes, n=2) # n is precision of accuracy (number of places after decimal), default is 2
# evaluate results
"""
If you have a list of question ids on which you would like to evaluate your results, pass it as a list to below
function
By default it uses all the question ids in annotation file
"""
vqaEval.evaluate()
if C.VERBOSE:
# print accuracies
print("\n")
print("Overall Accuracy is: %.02f\n" % (vqaEval.accuracy['overall']))
print("Per Question Type Accuracy is the following:")
for quesType in vqaEval.accuracy['perQuestionType']:
print("%s : %.02f" % (quesType, vqaEval.accuracy['perQuestionType'][quesType]))
print("\n")
print("Per Answer Type Accuracy is the following:")
for ansType in vqaEval.accuracy['perAnswerType']:
print("%s : %.02f" % (ansType, vqaEval.accuracy['perAnswerType'][ansType]))
print("\n")
# plot accuracy for various question types
plt.bar(range(len(vqaEval.accuracy['perQuestionType'])), vqaEval.accuracy['perQuestionType'].values(),
align='center')
plt.xticks(range(len(vqaEval.accuracy['perQuestionType'])), vqaEval.accuracy['perQuestionType'].keys(),
rotation='0', fontsize=10)
plt.title('Per Question Type Accuracy', fontsize=10)
plt.xlabel('Question Types', fontsize=10)
plt.ylabel('Accuracy', fontsize=10)
plt.savefig(os.path.join(C.OUTPUT_DIR, f"{C.MODEL_NAME}_{C.RUN_MODE}_qtype_acc.png"))
if C.VERBOSE: print(f"Per question type accuracy graph saved at {C.OUTPUT_DIR}.")
# save evaluation results
json.dump(vqaEval.accuracy, open(accuracyFile, 'w'))
json.dump(vqaEval.evalQA, open(evalQAFile, 'w'))
json.dump(vqaEval.evalQuesType, open(evalQuesTypeFile, 'w'))
json.dump(vqaEval.evalAnsType, open(evalAnsTypeFile, 'w'))
if C.VERBOSE: print(f"\nEvaluation results saved at {C.OUTPUT_DIR}.")
def evaluate(C, model=None):
# evaluate the model at end of training
print("\nStarting evaluation process...")
eval_gen = VQADataGenerator(C, mode=C.RUN_MODE)
# If only evaluating, then load model
if not model:
model = build_model(C)
# this is list obtaining corresponding index to each answer
label2ans = joblib.load(C.LABEL_ENCODER_PATH).classes_
if C.RUN_MODE == 'eval':
question_ids = pd.DataFrame(json.load(open(C.QUESTION_PATH["val"], 'r'))['questions'])['question_id']
elif C.RUN_MODE == 'test':
question_ids = pd.DataFrame(json.load(open(C.QUESTION_PATH["test"], 'r'))['questions'])['question_id']
generate_result(eval_gen, question_ids, model, label2ans, C.JSON_RESULT, C.VERBOSE)
if C.RUN_MODE == 'eval':
if C.VERBOSE: print("\nCalculating score for VQA v2 validation dataset...")
vqaEval(C)
else:
if C.VERBOSE: print("\nTo calculate score for the test-set, check out https://visualqa.org/challenge.html")
print("Evaluation done.")