-
Notifications
You must be signed in to change notification settings - Fork 46
/
Copy pathrougescore.py
121 lines (107 loc) · 3.5 KB
/
rougescore.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
from __future__ import division
import collections
import six
def _ngrams(words, n):
queue = collections.deque(maxlen=n)
for w in words:
queue.append(w)
if len(queue) == n:
yield tuple(queue)
def _ngram_counts(words, n):
return collections.Counter(_ngrams(words, n))
def _ngram_count(words, n):
return max(len(words) - n + 1, 0)
def _counter_overlap(counter1, counter2):
result = 0
for k, v in six.iteritems(counter1):
result += min(v, counter2[k])
return result
def _safe_divide(numerator, denominator):
if denominator > 0:
return numerator / denominator
else:
return 0
def _safe_f1(matches, recall_total, precision_total, alpha):
recall_score = _safe_divide(matches, recall_total)
precision_score = _safe_divide(matches, precision_total)
denom = (1.0 - alpha) * precision_score + alpha * recall_score
if denom > 0.0:
return (precision_score * recall_score) / denom
else:
return 0.0
def rouge_n(peer, models, n, alpha):
"""
Compute the ROUGE-N score of a peer with respect to one or more models, for
a given value of `n`.
"""
matches = 0
recall_total = 0
peer_counter = _ngram_counts(peer, n)
for model in models:
model_counter = _ngram_counts(model, n)
matches += _counter_overlap(peer_counter, model_counter)
recall_total += _ngram_count(model, n)
precision_total = len(models) * _ngram_count(peer, n)
return _safe_f1(matches, recall_total, precision_total, alpha)
def rouge_1(peer, models, alpha):
"""
Compute the ROUGE-1 (unigram) score of a peer with respect to one or more
models.
"""
return rouge_n(peer, models, 1, alpha)
def rouge_2(peer, models, alpha):
"""
Compute the ROUGE-2 (bigram) score of a peer with respect to one or more
models.
"""
return rouge_n(peer, models, 2, alpha)
def rouge_3(peer, models, alpha):
"""
Compute the ROUGE-3 (trigram) score of a peer with respect to one or more
models.
"""
return rouge_n(peer, models, 3, alpha)
def lcs(a, b):
"""
Compute the length of the longest common subsequence between two sequences.
Time complexity: O(len(a) * len(b))
Space complexity: O(min(len(a), len(b)))
"""
# This is an adaptation of the standard LCS dynamic programming algorithm
# tweaked for lower memory consumption.
# Sequence a is laid out along the rows, b along the columns.
# Minimize number of columns to minimize required memory
if len(a) < len(b):
a, b = b, a
# Sequence b now has the minimum length
# Quit early if one sequence is empty
if len(b) == 0:
return 0
# Use a single buffer to store the counts for the current row, and
# overwrite it on each pass
row = [0] * len(b)
for ai in a:
left = 0
diag = 0
for j, bj in enumerate(b):
up = row[j]
if ai == bj:
value = diag + 1
else:
value = max(left, up)
row[j] = value
left = value
diag = up
# Return the last cell of the last row
return left
def rouge_l(peer, models, alpha):
"""
Compute the ROUGE-L score of a peer with respect to one or more models.
"""
matches = 0
recall_total = 0
for model in models:
matches += lcs(model, peer)
recall_total += len(model)
precision_total = len(models) * len(peer)
return _safe_f1(matches, recall_total, precision_total, alpha)