-
Notifications
You must be signed in to change notification settings - Fork 1
/
eval_tldr.py
31 lines (27 loc) · 1.03 KB
/
eval_tldr.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
import os
'''
Evaluate SciTLDR by its original script (which takes max of multiple references)
'''
def eval_res(pred_file, ref_file):
cmd = f'python "scitldr/scripts/cal-rouge.py" {pred_file} {ref_file} --workers 40 > tldr_results.txt'
os.system(cmd)
def post_process_zeroshot_REF(fname):
with open(fname + '.postprocessed', 'w') as o:
for line in open(fname):
line = line.replace('In REF,', '')
if line.startswith('REF'):
line = line.replace('REF', 'this paper')
else:
line = line.replace('REF', '')
line = line.replace('REF', '')
o.write(line.strip() + '\n')
pred_file = "output/scitldr/test/test-scitldr-zeroshot/generated_predictions.txt"
post_process_zeroshot_REF(pred_file)
pred_file = pred_file + '.postprocessed'
ref_file = "scitldr/SciTLDR-Data/SciTLDR-A/test.jsonl"
eval_res(pred_file, ref_file)
with open('tldr_results.txt') as f:
for line in f:
if '||multi-max' in line:
print(line.strip())
print()