Skip to content

Commit c93d7d3

Browse files
author
Jin Qiao
committed
added data, results
1 parent d0c2759 commit c93d7d3

File tree

133 files changed

+112302
-0
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

133 files changed

+112302
-0
lines changed

data/genehop.json

Lines changed: 540 additions & 0 deletions
Large diffs are not rendered by default.

data/geneturing.json

Lines changed: 470 additions & 0 deletions
Large diffs are not rendered by default.

evaluate.py

Lines changed: 108 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,108 @@
1+
__author__ = 'qiao'
2+
3+
'''
4+
evaluate GeneGPT on all GeneTuring tasks and one GeneHop task (Disease gene location)
5+
'''
6+
7+
import glob
8+
import json
9+
import os
10+
import sys
11+
12+
def get_answer(answer, task):
13+
14+
mapper = {'Caenorhabditis elegans': 'worm',
15+
'Homo sapiens': 'human',
16+
'Danio rerio': 'zebrafish',
17+
'Mus musculus': 'mouse',
18+
'Saccharomyces cerevisiae': 'yeast',
19+
'Rattus norvegicus': 'rat',
20+
'Gallus gallus': 'chicken'}
21+
22+
if task == 'SNP location':
23+
answer = answer.strip().split()[-1]
24+
if 'chr' not in answer:
25+
answer = 'chr' + answer
26+
27+
elif task == 'Gene location':
28+
answer = answer.strip().split()[-1]
29+
if 'chr' not in answer:
30+
answer = 'chr' + answer
31+
32+
elif task == 'Gene disease association':
33+
answer = answer.strip().replace('Answer: ', '')
34+
answer = answer.split(', ')
35+
36+
elif task == 'Disease gene location':
37+
answer = answer.strip().replace('Answer: ', '')
38+
answer = answer.split(', ')
39+
40+
elif task == 'Protein-coding genes':
41+
answer = answer.strip().replace('Answer: ', '')
42+
if answer == 'Yes':
43+
answer = 'TRUE'
44+
elif answer == 'No':
45+
answer = 'NA'
46+
47+
elif task == 'Multi-species DNA aligment':
48+
answer = answer.strip().replace('Answer: ', '')
49+
answer = mapper.get(answer, answer)
50+
51+
else:
52+
answer = answer.strip().replace('Answer: ', '')
53+
54+
return answer
55+
56+
57+
if __name__ == '__main__':
58+
qas = json.load(open('data/geneturing.json'))
59+
qas['Disease gene location'] = json.load(open('data/genehop.json'))['Disease gene location']
60+
61+
folder = sys.argv[1]
62+
63+
for task in glob.glob(os.path.join(folder, '*')):
64+
print(f'\nEvaluating {task}')
65+
preds = json.load(open(task))
66+
task = os.path.basename(task).replace('.json', '')
67+
68+
if task not in qas:
69+
print(f'{task} is not automatically evaluated.')
70+
continue
71+
72+
info = qas[task]
73+
pred_q2a = {}
74+
75+
for entry in preds:
76+
pred_q2a[entry[0]] = get_answer(entry[2], task)
77+
78+
correct = []
79+
80+
for question, answer in info.items():
81+
if task == 'Gene disease association':
82+
answer = answer.split(', ')
83+
answer_in = [ans in pred_q2a[question] for ans in answer]
84+
correct.append(sum(answer_in) / len(answer_in))
85+
86+
elif task == 'Disease gene location':
87+
answer_in = [ans in pred_q2a[question] for ans in answer]
88+
correct.append(sum(answer_in) / len(answer_in))
89+
90+
elif task == 'Human genome DNA aligment':
91+
pred = pred_q2a[question]
92+
pred_chr = pred.split(':')[0]
93+
answer_chr = answer.split(':')[0]
94+
95+
if pred == answer:
96+
correct.append(1)
97+
elif pred_chr == answer_chr:
98+
correct.append(0.5)
99+
else:
100+
correct.append(0)
101+
102+
else:
103+
if pred_q2a[question] == answer:
104+
correct.append(1)
105+
else:
106+
correct.append(0)
107+
108+
print(sum(correct) / len(correct))

genehop_results/genehop_blast_111011/sequence gene alias.json

Lines changed: 1888 additions & 0 deletions
Large diffs are not rendered by default.

genehop_results/genehop_eutils_101110/Disease gene location.json

Lines changed: 1384 additions & 0 deletions
Large diffs are not rendered by default.

genehop_results/genehop_eutils_101110/SNP gene function.json

Lines changed: 960 additions & 0 deletions
Large diffs are not rendered by default.

geneturing_results/000001/Gene SNP association.json

Lines changed: 788 additions & 0 deletions
Large diffs are not rendered by default.

geneturing_results/000001/Gene alias.json

Lines changed: 916 additions & 0 deletions
Large diffs are not rendered by default.

geneturing_results/000001/Gene disease association.json

Lines changed: 808 additions & 0 deletions
Large diffs are not rendered by default.

geneturing_results/000001/Gene location.json

Lines changed: 912 additions & 0 deletions
Large diffs are not rendered by default.

0 commit comments

Comments
 (0)