-
Notifications
You must be signed in to change notification settings - Fork 3
/
client.py
346 lines (290 loc) · 15.2 KB
/
client.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
"""
An example application which integrates 23andMe data with data from BRCA
Exchange, via the GA4GH API.
The main
What was done,
what we plan to do
- Don't hardcode demo user
- Document intersection algorithm
TODOs
"""
import getpass
import logging
import sys
import re
import pickle
import json
import csv
import os
from optparse import OptionParser
import google.protobuf.json_format as json_format
import requests
import flask
from flask import request
from wtforms import Form
from wtforms.fields import SelectField, IntegerField, SubmitField
from requests_oauthlib import OAuth2Session
from ga4gh_client import client as g4client
from ga4gh.exceptions import RequestNonSuccessException
PORT = 5000
API_SERVER = 'api.23andme.com'
BASE_CLIENT_URL = 'http://localhost:%s/' % PORT
DEFAULT_APP_REDIRECT_URI = '%sapp/' % BASE_CLIENT_URL
DEFAULT_API_REDIRECT_URI = '%svariants/search/' % BASE_CLIENT_URL
PAGE_HEADER = "farvan: 23andMe + BRCA Exchange (via GA4GH)"
REFERENCE_NAMES = ["13", "17"]
REFERENCE_NAMES_ALL = [str(x) for x in range(1, 23)] + ['X', 'Y', 'MT']
BRCA2_START = 32889611
access_token = None
# So we don't get errors if the redirect uri is not https.
os.environ["OAUTHLIB_INSECURE_TRANSPORT"] = '1'
# Pass in more scopes through the command line, or change these.
DEFAULT_SNPS = ['rs12913832', 'rs3088053', 'rs1000068', 'rs206118', 'rs206115', 'rs3094315', 'i3000001']
DEFAULT_SCOPES = ['names', 'basic', 'email', 'analyses', 'genomes'] + DEFAULT_SNPS
# The program will ask for a client_secret if you choose to not hardcode one
# here.
client_secret = None
parser = OptionParser(usage="usage: %prog -i CLIENT_ID [options]")
parser.add_option("-i", "--client-id", dest="client_id", default='',
help="Your client_id [REQUIRED]")
parser.add_option('-s', '--scopes', dest='scopes', action='append', default=[],
help='Your requested scopes. Eg: -s basic -s rs12913832')
parser.add_option("-c", "--client-secret", dest='client_secret',
help='The client secret')
parser.add_option("-r", "--app-redirect-uri", dest="app_redirect_uri", default=DEFAULT_APP_REDIRECT_URI, help="Your client's redirect_uri [%s]" % DEFAULT_APP_REDIRECT_URI)
parser.add_option("-z", "--api-redirect-uri", dest="api_redirect_uri", default=DEFAULT_API_REDIRECT_URI, help="Your client's redirect_uri [%s]" % DEFAULT_API_REDIRECT_URI)
parser.add_option("-a", "--23andMe-api-server", dest="t23andMe_api_server", default=API_SERVER, help="Almost always: [api.23andme.com]")
parser.add_option("-p", "--select-profile", dest='select_profile', action='store_true', default=False, help='If present, the auth screen will show a profile select screen')
parser.add_option("-f", "--ga4gh-api-server", dest="ga4gh_api_server", help="The GA4GH API server location.")
parser.add_option("-d", "--debug", dest="debug", action="store_true", default=False,
help="Whether or not to provide debugging output.")
parser.add_option("-k", "--snps-data", dest='snps_data',
help='A SNPS data file to use.')
(options, args) = parser.parse_args()
def _23andMe_queries(client_id, client_secret, app_redirect_uri):
"""Handles interaction with the 23andMe API. Returns the data."""
global access_token
if not access_token:
ttam_oauth = OAuth2Session(client_id, redirect_uri=app_redirect_uri)
token_dict = ttam_oauth.fetch_token(API_TOKEN_URL,
client_secret=client_secret,
authorization_response=request.url)
access_token = token_dict['access_token']
headers = {'Authorization': 'Bearer %s' % access_token}
# The documentation says you shouldn't call 'names' if the user wants to be
# anonymous. We should check on this. The only reason why we're using it
# now is to get the demo profiles.
names_response = requests.get("%s%s" % (BASE_API_URL, "/1/demo/names/"),
headers=headers,
verify=True)
genotype_responses = []
for profile in names_response.json()['profiles']:
genotype_response = requests.get("%s%s" % (BASE_API_URL, "/1/demo/genotypes/%s/" % profile['id']),
params={'locations': ' '.join(locations), 'format': 'embedded'},
headers=headers,
verify=True)
genotype_responses.append(genotype_response)
user_response = requests.get("%s%s" % (BASE_API_URL, "/1/demo/user/"),
headers=headers,
verify=True)
return genotype_responses, user_response, names_response, None, None, None, None
def _g4_queries(start=BRCA2_START, end=BRCA2_START+2000):
"""Performs queries against the GA4GH server.
#iterator = httpClient.search_variants(variant_set_id=variant_set.id,
reference_name=reference_name, start=105598600, end=105598700)
#reference_name=reference_name, start=32315650, end=32315660)
#reference_name="13", start=0, end=500000)
"""
if DEBUG:
httpClient = g4client.HttpClient(API_SERVER_GA4GH, logLevel=logging.DEBUG)
else:
httpClient = g4client.HttpClient(API_SERVER_GA4GH)
# There is currently only 1 dataset available in BRCA, but we'll be robust
# and iterate as if there were more.
datasets = list(httpClient.search_datasets())
results = list()
for dataset in datasets:
# There should be 3 variant sets; we're only concerned with hg37 for
# now though.
variant_sets = list(httpClient.search_variant_sets(dataset_id=dataset.id))
variant_set = filter(lambda x: x.id == 'brca-hg37', variant_sets)[0]
for reference_name in REFERENCE_NAMES:
iterator = httpClient.search_variants(
variant_set_id=variant_set.id, reference_name=reference_name,
start=start, end=end)
for variant in iterator:
r = (variant.reference_name, variant.start, variant.end,\
variant.reference_bases, variant.alternate_bases, variant.id,\
variant.info, variant.names)
results.append(r)
return (datasets, variant_sets, results)
def _compute_locations_from_snps_file(start=41196311, end=41196314, reference_name="13", s=""):
"""Computes a more reasonable list of SNPs than DEFAULT_SNPS.
It returns the rsIDs from the given SNPs file that are associated with the
given reference name, and has a position that falls within the given
range."""
cross = []
cross_augmented = []
found_header = False
with open(s, 'r') as fh:
reader = csv.reader(fh, delimiter='\t')
for row in reader:
if not row[0].startswith('#'):
if not found_header:
found_header = True
continue
index, snp, ch, p = row[0], row[1], row[2], int(row[3])
if ch == reference_name and p > start and p < end:
cross.append(snp)
cross_augmented.append((snp, p))
print "Crosses: %s" % len(cross)
#return cross if len(cross) > 0 else ' '.join(DEFAULT_SNPS)
#return cross + ' '.join(DEFAULT_SNPS) + scopes
#return " ".join([x[0] for x in cross])
return cross, cross_augmented
if not options.snps_data:
print("Should specify --snps-data option.")
sys.exit(1)
SNPS_DATA_FILE = options.snps_data
locations, locations_augmented = _compute_locations_from_snps_file(start=32889611, end=32889611+2000, s=SNPS_DATA_FILE)
#locations = _compute_locations_from_snps_file(start=41196311, end=41277500, s=SNPS_DATA_FILE)
#locations = _compute_locations_from_snps_file(start=41196311, end=41196314, s=SNPS_DATA_FILE)
print "Locations: %s %s" % (len(locations), locations)
scopes = options.scopes or (DEFAULT_SCOPES + locations)
print "Scopes: %s %s" % (len(scopes), scopes)
DEBUG = options.debug
BASE_API_URL = "https://%s" % options.t23andMe_api_server
API_AUTH_URL = '%s/authorize' % BASE_API_URL
API_TOKEN_URL = '%s/token/' % BASE_API_URL
API_SERVER_GA4GH = options.ga4gh_api_server
if options.select_profile:
API_AUTH_URL += '?select_profile=true'
app_redirect_uri = options.app_redirect_uri
api_redirect_uri = options.api_redirect_uri
client_id = options.client_id
if options.client_secret:
client_secret = options.client_secret
if not options.client_id:
print "missing param: CLIENT_ID:"
parser.print_usage()
print "Please navigate to your developer dashboard [%s/dev/] to retrieve your client_id.\n" % BASE_API_URL
exit()
if not client_secret:
print "Please navigate to your developer dashboard [%s/dev/] to retrieve your client_secret." % BASE_API_URL
client_secret = getpass.getpass("Please enter your client_secret: ")
app = flask.Flask(__name__)
app.config['SECRET_KEY'] = 'abc' # May not need this here? It wasn't there
class ApplicationForm(Form):
chromosome = SelectMultipleField("13", choices=REFERENCE_NAMES_ALL)
start = IntegerField()
end = IntegerField()
@app.route('/')
def index():
"""Here, we authenticate the user before transitioning to the app. There
should be no way of getting to the app without this step."""
ttam_oauth = OAuth2Session(client_id, redirect_uri=app_redirect_uri, scope=scopes)
auth_url, state = ttam_oauth.authorization_url(API_AUTH_URL)
return flask.render_template('index.html', auth_url=auth_url,
page_header=PAGE_HEADER, page_title=PAGE_HEADER, client_id=client_id)
@app.route('/variants/search/')
def variants_search_endpoint():
"""Implements the variants_search API endpoint, while incorporating 23andMe
results, if available."""
# Grab some arguments.
flask.session['23code'] = flask.request.args.get('code')
start = int(flask.request.args.get('start', 32889611))
end = int(flask.request.args.get('end', 32889611 + 1000))
reference_name = str(flask.request.args.get('reference_name', "13"))
# Perform GA4GH query.
g4 = g4client.HttpClient(API_SERVER_GA4GH)
variants = list(g4.search_variants(variant_set_id="brca-hg37", start=start,
end=end, reference_name=reference_name))
locations, _ = _compute_locations_from_snps_file(start=start, end=end,
reference_name=reference_name, s=SNPS_DATA_FILE)
global access_token
if not access_token:
ttam_oauth = OAuth2Session(client_id, redirect_uri=api_redirect_uri)
token_dict = ttam_oauth.fetch_token(API_TOKEN_URL,
client_secret=client_secret, authorization_response=request.url)
access_token = token_dict['access_token']
headers = {'Authorization': 'Bearer %s' % access_token}
genotype_responses = []
for profile in names_response:
genotype_response = requests.get("%s%s" % (BASE_API_URL, "/1/demo/genotypes/%s/" % profile['id']),
params={'locations': locations, 'format': 'embedded'},
headers=headers,
verify=True)
genotype_responses.append(genotype_response)
return flask.jsonify({"23andme": genotype_responses.json(), "g4": [json_format._MessageToJsonObject(v, True) for v in variants], "locations": locations, "query": {"start": start, "end": end, "reference_name": reference_name}})
@app.route('/app/', methods=['GET', 'POST'])
def application():
"""Represents our application, which makes use of 2 APIs: 23andMe, and BRCA
Exchange (via GA4GH)."""
print(dir(request))
import ipdb;ipdb.set_trace()
genotype_responses, user_response, names_response, profilepic_response, family_response, neanderthal_response, relatives_response = _23andMe_queries(client_id, client_secret, app_redirect_uri)
# An algorithm that computes useful results with G4 and 23andMe data.
#
# The main (and valuable) thing computed here.is, at the end, stored in the
# 'r' variable; a list of tuples, all of the same length of 4.
#
# Each tuple's first entry is a boolean indicating success or failure of
# individual API requests.
temp = []
g4 = g4client.HttpClient(API_SERVER_GA4GH)
for reference_name in REFERENCE_NAMES:
variants = list(g4.search_variants(variant_set_id="brca-hg37", start=BRCA2_START,
end=BRCA2_START+1000, reference_name=reference_name))
for gr in genotype_responses:
profile = gr.json()
r = list()
if 'genotypes' in profile:
for call in profile['genotypes']:
for location in locations_augmented:
if call['location'] == location[0]:
for variant in variants:
if variant.start == location[1]:
# TODO: We _should_ iterate over the
# variant.info['blah'] thing, but for now
# just use [0].
l = len(variant.info["Allele_Frequency"])
if l > 0:
v = variant.info["Allele_Frequency"][0]
else:
v = "(Nothing about allele frequency)"
e = ("Both have {}".format(location[0]), v,
"Individual presented: " + call['call'])
r.append(e)
if gr.status_code == 200:
temp.append((True, reference_name, gr.json(), r))
else:
temp.append((False, reference_name, gr.json(), r))
genotype_responses = temp
if request.method == 'GET':
pass
else request.method == 'POST':
form = ApplicationForm(request.form)
if form.validate():
chromosome = form['chromosome']
start = form['start']
end = form['end']
submit = form['submit']
return flask.render_template('app.html', page_header=PAGE_HEADER,
genotype_responses=genotype_responses,
home_url=BASE_CLIENT_URL, user_response_json=user_response.json(),
names_response_json=names_response.json(), page_title=PAGE_HEADER,
client_id=client_id, code=code,
user_request_success=user_request_success,
names_request_success=names_request_success,
api_results_url="http://localhost:5000/variants/search",
account_first_name=account_first_name,
account_last_name=account_last_name)
def _format_g4results(g):
o = []
for r in g:
r = (r.id, r.names, r.reference_bases, r.reference_name, r.start, r.end, r.calls, r.info['Hg37_Start'].values[0].number_value, r.info['Hg37_End'].values[0].number_value, r.info['AFR_Allele_frequency_1000_Genomes'].values[0].string_value, r.info['EUR_Allele_frequency_1000_Genomes'].values[0].string_value, r.info['Chr'].values[0].string_value, r.info['Pathogenicity_expert'].values[0].string_value, r.info['Ref'].values[0].string_value, r.info['Alt'].values[0].string_value, r.info['Pos'].values[0].string_value, r.info['Allele_Frequency'].values[0].string_value, r.info['Gene_Symbol'].values[0].string_value)
o.append(r)
return o
#g4results = _format_g4results(g4results)
if __name__ == '__main__':
app.run(debug=DEBUG, port=PORT)