-
Notifications
You must be signed in to change notification settings - Fork 26
/
caption_evaluation.py
34 lines (28 loc) · 1.47 KB
/
caption_evaluation.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
from nltk.translate import bleu_score
import nltk.translate.gleu_score as gleu
from nlgeval import NLGEval
def get_evalutation_scores(hypothesis, refrences, testing_mode=False):
gleu_scores = {"Gleu_1": gleu.corpus_gleu(refrences, hypothesis, min_len=1, max_len=1),
"Gleu_2": gleu.corpus_gleu(refrences, hypothesis, min_len=1, max_len=2),
"Gleu_3": gleu.corpus_gleu(refrences, hypothesis, min_len=1, max_len=3),
"Gleu_4": gleu.corpus_gleu(refrences, hypothesis, min_len=1, max_len=4)
}
if testing_mode:
for i in range(len(hypothesis)):
hypothesis[i] = ' '.join(hypothesis[i])
refs = [[]]
for i in range(len(refrences)):
refs[0].append(' '.join(refrences[i][0]))
if refs[0][-1] == "":
refs[0][-1] = "no"
refrences = refs
n = NLGEval()
scores = n.compute_metrics(ref_list=refrences, hyp_list=hypothesis)
else:
scores = {"Bleu_1": bleu_score.corpus_bleu(refrences, hypothesis, weights=[1.0]),
"Bleu_2": bleu_score.corpus_bleu(refrences, hypothesis, weights=[1. / 2, 1. / 2]),
"Bleu_3": bleu_score.corpus_bleu(refrences, hypothesis, weights=[1. / 3, 1. / 3, 1. / 3]),
"Bleu_4": bleu_score.corpus_bleu(refrences, hypothesis, weights=[1. / 4, 1. / 4, 1. / 4, 1. / 4])}
for key, val in gleu_scores.items():
scores[key] = val
return scores