-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathutils.py
59 lines (48 loc) · 1.56 KB
/
utils.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
# -*- coding:utf-8 -*-
import os
import json
import zlib
import jinja2
import config
import addlibdir
import eUtils
import tfidf
# Globals.
BASE_DIR = os.path.dirname(__file__)
TEMPLATE_DIR = os.path.join(BASE_DIR, 'templates')
JINJA_ENV = jinja2.Environment(
loader=jinja2.FileSystemLoader(TEMPLATE_DIR))
def render(template_name, template_vals={}):
template = JINJA_ENV.get_template(template_name)
return template.render(template_vals)
def decrypt(entity, field):
"""Convenience function to extract compressed attributes."""
return json.loads(zlib.decompress(getattr(entity, field)))
def try_to_update_term(data, term):
# Spaces cause eUtils queries to fail.
term = term.replace('\n', '').replace(' ', '+').upper()
# Minimal check for term inconsistencies.
for forbidden in ['/', ' ', 'CRDT', 'CRDAT']:
if forbidden in term: raise TermException(forbidden)
success = False
try:
# If we can create the micro-corpus with the new term,
# then do the update. Otherwise something went wrong.
abstr_sample = eUtils.fetch_abstr(
term = term,
retmax = config.RETMAX,
email = config.ADMAIL
)
mu_corpus = {}
for abstr in abstr_sample:
mu_corpus[abstr['pmid']] = tfidf.preprocess(abstr['text'])
data.mu_corpus = zlib.compress(json.dumps(mu_corpus))
except (eUtils.PubMedException, eUtils.NoHitException):
# PubMed error or no nit.
success = False
else:
success = True
data.term_valid = success
data.term = term
data.put()
return success