Skip to content

Commit 3091c35

Browse files
committed
Include JSON parsing, add galindia.ipynb
1 parent 078dadf commit 3091c35

File tree

6 files changed

+2040
-287
lines changed

6 files changed

+2040
-287
lines changed

.gitignore

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1 +1,2 @@
11
.DS_Store
2+
.pyc

README.md

Lines changed: 8 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -79,10 +79,17 @@ for k in myTree.Objects:
7979

8080
--------------------
8181

82+
## galindia
83+
![](figures/coa_galindia.png)
84+
85+
[`galindia.ipynb`](galindia.ipynb) is a work in progress that uses baltic to plot JSONs from [nextstrain.org](nextstrain.org) in order to allow customisable, static, publication-ready figures for phylogenies coming from nextstrain's augur pipeline.
86+
87+
--------------------
8288
## baltic was used in the following publications:
89+
- Dudas G, Carvalho L, Rambaut A, Bedford T. _MERS-CoV spillover at the camel-human interface_, 2017. __bioRxiv__ 173211; doi: https://doi.org/10.1101/173211.
8390
- Grubaugh ND, Ladner JT, Kraemer MUG, Dudas G, Tan AL, Gangavarapu K, Wiley MR, White S, Thézé J, ..., Bedford T, Pybus OG, Isern S, Palacios G, Andersen KG. _Multiple introductions of Zika virus into the United States revealed through genomic epidemiology_, 2017. __Nature__ 546:401–405.
8491
- Dudas G, Carvalho LM, Bedford T, Tatem AJ, ..., Suchard M, Lemey P, Rambaut A. _Virus genomes reveal the factors that spread and sustained the West African Ebola epidemic_, 2017. __Nature__ 544(7650): 309-315.
85-
- Bell SM, Bedford T. _Modern-Day SIV viral diversity generated by extensive recombination and cross-species transmission_, 2017. __bioRxiv__ 101725; doi: https://doi.org/10.1101/101725
92+
- Bell SM, Bedford T. _Modern-Day SIV viral diversity generated by extensive recombination and cross-species transmission_, 2017. __PLoS Pathog__ 13(7): e1006466.
8693
- Holmes EC, Dudas G, Rambaut A, Andersen KG. _The Evolution of Ebola virus: Insights from the 2013-2016 Epidemic_, 2016. __Nature__ 538(7624): 193-200.
8794
- Whitmer SLM, Albariño C, Shepard SS, Dudas G, ..., Nichol ST, Ströher U. _Preliminary Evaluation of the Effect of Investigational Ebola Virus Disease Treatments on Viral Genome Sequences_, 2016. __Journal of Infectious Diseases__:jiw177.
8895
- Rambaut A, Dudas G, Carvalho LM, Park DJ, Yozwiak NL, Holmes EC, Andersen KG. _Comment on “Mutation rate and genotype variation of Ebola virus from Mali case sequences”_, 2016. __Science__ 353(6300):658-658.

austechia.ipynb

Lines changed: 248 additions & 264 deletions
Large diffs are not rendered by default.

baltic.py

Lines changed: 136 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@
22
import copy
33
import math
44
import datetime as dt
5+
import json
56

67
def unique(o, idfun=repr):
78
"""Reduce a list down to its unique elements."""
@@ -23,6 +24,10 @@ def decimalDate(date,fmt="%Y-%m-%d",variable=False,dateSplitter='-'):
2324
eoy = dt.datetime(year + 1, 1, 1) ## get beginning of next year
2425
return year + ((adatetime - boy).total_seconds() / ((eoy - boy).total_seconds())) ## return fractional year
2526

27+
def convertDate(x,start,end):
28+
""" Converts calendar dates between given formats """
29+
return dt.datetime.strftime(dt.datetime.strptime(x,start),end)
30+
2631
class clade: ## clade class
2732
def __init__(self,givenName):
2833
self.branchType='leaf' ## clade class poses as a leaf
@@ -158,7 +163,7 @@ def singleType(self):
158163

159164
grandparent.children.append(child) ## add child to grandparent's children
160165
grandparent.children.remove(k) ## remove old parent from grandparent's children
161-
166+
grandparent.children=list(set(grandparent.children))
162167
child.length+=k.length ## adjust child length
163168

164169
multiTypeNodes.remove(k) ## remove old parent from multitype nodes
@@ -177,7 +182,7 @@ def treeStats(self):
177182
obs=self.Objects ## convenient list of all objects in the tree
178183
print '\nTree height: %.6f\nTree length: %.6f'%(self.treeHeight,sum([x.length for x in obs])) ## report the height and length of tree
179184

180-
nodes=self.nodes ## get all nodes
185+
nodes=[k for k in obs if k.branchType=='node'] ## get all nodes
181186
strictlyBifurcating=False ## assume tree is not strictly bifurcating
182187
multiType=False
183188
singleton=False
@@ -247,11 +252,11 @@ def traverse_tree(self,startNode=None,include_all=False,verbose=False):
247252
root=False ## root becomes true once you're unable to go back any further
248253

249254
while root==False: ## cycle indefinitely as long as there's more of the tree to explore
250-
if seen.count(cur_node.index)>3:
251-
if verbose==True:
252-
print 'node %s seen too many times, breaking'%(cur_node.index)
253-
root=True
254-
break
255+
# if seen.count(cur_node.index)>3:
256+
# if verbose==True:
257+
# print 'node %s seen too many times, breaking'%(cur_node.index)
258+
# root=True
259+
# break
255260

256261
if cur_node.branchType=='node': ## if currently dealing with a node
257262
if verbose==True:
@@ -349,10 +354,15 @@ def sortBranches(self,descending=True):
349354
self.nodes=[k for k in self.Objects if k.branchType=='node']
350355
self.drawTree() ## update x and y positions of each branch, since y positions will have changed because of sorting
351356

352-
def drawTree(self,order=None):
357+
def drawTree(self,order=None,verbose=False):
353358
""" Find x and y coordinates of each branch. """
354359
if order==None:
355360
order=[x for x in self.traverse_tree() if x.branchType=='leaf'] ## order is a list of tips recovered from a tree traversal to make sure they're plotted in the correct order along the vertical tree dimension
361+
if verbose==True:
362+
print 'Drawing tree in pre-order'
363+
else:
364+
if verbose==True:
365+
print 'Drawing tree with provided order'
356366

357367
name_order=[x.numName for x in order]
358368
skips=[1 if isinstance(x,leaf) else x.width+1 for x in order]
@@ -364,8 +374,12 @@ def drawTree(self,order=None):
364374
storePlotted=0
365375
drawn=[] ## drawn keeps track of what's been drawn
366376
while len(drawn)!=len(self.Objects): # keep drawing the tree until everything is drawn
377+
if verbose==True:
378+
print 'Drawing iteration %d'%(len(drawn))
367379
for k in [x for x in self.Objects if x.index not in drawn]: ## iterate through objects that have not been drawn
368380
if k.branchType=='leaf': ## if leaf - get position of leaf, draw branch connecting tip to parent node
381+
if verbose==True:
382+
print 'Setting leaf %s coordinates'%(k.index)
369383
x=k.height ## x position is height
370384
y_idx=name_order.index(k.numName) ## y position of leaf is given by the order in which tips were visited during the traversal
371385
y=sum(skips[y_idx:]) ## sum across skips to find y position
@@ -377,6 +391,8 @@ def drawTree(self,order=None):
377391

378392
if k.branchType=='node': ## if parent is non-root node and y positions of all its children are known
379393
if len([q.y for q in k.children if q.y!=None])==len(k.children):
394+
if verbose==True:
395+
print 'Setting node %s coordinates'%(k.index)
380396
x=k.height ## x position is height
381397
children_y_coords=[q.y for q in k.children if q.y!=None] ## get all existing y coordinates of the node
382398
y=sum(children_y_coords)/float(len(children_y_coords)) ## internal branch is in the middle of the vertical bar
@@ -500,7 +516,7 @@ def traverseWithinTrait(self,startNode,traitName,converterDict=None):
500516
collected.append(cur_node)
501517
cur_node=cur_node.parent
502518

503-
def commonAncestor(self,descendants,numName=False):
519+
def commonAncestor(self,descendants,numName=False,strict=False):
504520
types=[desc.__class__ for desc in descendants]
505521
assert len(set(types))==1,'More than one type of data detected in descendants list'
506522
if numName==False:
@@ -513,7 +529,13 @@ def commonAncestor(self,descendants,numName=False):
513529
ancestor=[k for k in allAncestors if sum([[self.tipMap[w] for w in k.leaves].count(l) for l in descendants])==len(descendants)][-1]
514530
else:
515531
ancestor=[k for k in allAncestors if sum([[w for w in k.leaves].count(l) for l in descendants])==len(descendants)][-1]
516-
return ancestor
532+
533+
if strict==False:
534+
return ancestor
535+
elif strict==True and len(ancestor.leaves)==len(descendants):
536+
return ancestor
537+
elif strict==True and len(ancestor.leaves)>len(descendants):
538+
return None
517539

518540
def collapseSubtree(self,cl,givenName,verbose=False,widthFunction=lambda x:x):
519541
""" Collapse an entire subtree into a clade object. """
@@ -583,7 +605,7 @@ def get_value(ob,tr):
583605
return ob.traits[tr]
584606
if verbose==True:
585607
print 'Collapsing based on trait'
586-
nodes_to_delete=[n for n in newTree.nodes if n.traits.has_key(trait) and f(get_value(n,trait))==True] ## fetch a list of all nodes who are not the root and who satisfy the condition
608+
nodes_to_delete=[n for n in newTree.nodes if f(get_value(n,trait))==True if n!=newTree.root] ## fetch a list of all nodes who are not the root and who satisfy the condition
587609
else:
588610
assert [w.branchType for w in designated_nodes].count('node')==len(designated_nodes),'Non-node class detected in list of nodes designated for deletion'
589611
assert len([w for w in designated_nodes if w.parent.index=='Root'])==0,'Root node was designated for deletion'
@@ -599,6 +621,8 @@ def get_value(ob,tr):
599621
k.parent.children+=zero_node ## add them to the zero node's parent
600622
old_parent=k ## node to be deleted is the old parent
601623
new_parent=k.parent ## once node is deleted, the parent to all their children will be the parent of the deleted node
624+
if new_parent==None:
625+
new_parent=self.root
602626
if verbose==True:
603627
print 'Removing node %s, attaching children %s to node %s'%(old_parent.index,[w.index for w in k.children],new_parent.index)
604628
for w in newTree.Objects: ## assign the parent of deleted node as the parent to any children of deleted node
@@ -624,12 +648,17 @@ def toString(self,traits=[],numName=False,verbose=False,nexus=False):
624648
root=False
625649

626650
while root==False:
651+
if verbose==True:
652+
print 'Branch this iteration: %s'%(cur_node.index)
627653
if cur_node.branchType=='node':
628654
if verbose==True:
629655
print 'Encountered node %s'%(cur_node.index)
630656
## if all children have been seen and not at root
631-
while sum([1 if x.index in seen else 0 for x in cur_node.children])==len(cur_node.children) and cur_node.index!='Root':
632-
if cur_node.parent.index=='Root':
657+
while sum([1 if x.index in seen else 0 for x in cur_node.children])==len(cur_node.children):
658+
if verbose==True:
659+
print 'Recursing downwards, currently at %s'%(cur_node.index)
660+
print '%s %s'%(len(set(seen)),len(cur_node.leaves))
661+
if cur_node.index=='Root':
633662
root=True
634663
break
635664
else:
@@ -727,12 +756,15 @@ def toString(self,traits=[],numName=False,verbose=False,nexus=False):
727756
elif cur_node.branchType=='leaf':
728757
if verbose==True:
729758
print 'Encountered leaf %s'%(cur_node.index)
759+
730760
seen.append(cur_node.index)
731761
if cur_node.parent.index=='Root':
732762
root=True
733763
break
734764
else:
735765
cur_node=cur_node.parent
766+
if ''.join(tree_string).count(')')-1==''.join(tree_string).count('('):
767+
tree_string.insert(0,'(')
736768

737769
if nexus==True:
738770
return '#NEXUS\nBegin trees;\ntree TREE1 = [&R] %s;\nEnd;'%(''.join(tree_string))
@@ -826,30 +858,33 @@ def make_tree(data,ll,verbose=False):
826858
print '%d adding multitype node %s'%(i,cerberus.group(1))
827859
i+=len(cerberus.group(1))
828860

829-
cerberus=re.match('(\:)*\[&([A-Za-z\_\-{}\,0-9\.\%=\"\+!#]+)\]',data[i:])## look for MCC comments
861+
cerberus=re.match('(\:)*\[(&[A-Za-z\_\-{}\,0-9\.\%=\"\'\+!#]+)\]',data[i:])## look for MCC comments
830862
#cerberus=re.match('\[&[A-Za-z\_\-{}\,0-9\.\%=\"\+]+\]',data[i:])## look for MCC comments
831863
if cerberus is not None:
832864
if verbose==True:
833865
print '%d comment: %s'%(i,cerberus.group(2))
834866
comment=cerberus.group(2)
835-
numerics=re.findall('[A-Za-z\_\.0-9]+=[0-9\-Ee\.]+',comment) ## find all entries that have values as floats
836-
strings=re.findall('[A-Za-z\_\.0-9]+=["|\']*[A-Za-z\_0-9\.\+]+["|\']*',comment) ## strings
837-
treelist=re.findall('[A-Za-z\_\.0-9]+={[A-Za-z\_,{}0-9\.]+}',comment) ## complete history logged robust counting (MCMC trees)
838-
sets=re.findall('[A-Za-z\_\.0-9\%]+={[A-Za-z\.\-0-9eE,\"\_]+}',comment) ## sets and ranges
867+
numerics=re.findall('[,&][A-Za-z\_\.0-9]+=[0-9\-Ee\.]+',comment) ## find all entries that have values as floats
868+
strings=re.findall('[,&][A-Za-z\_\.0-9]+=["|\']*[A-Za-z\_0-9\.\+]+["|\']*',comment) ## strings
869+
treelist=re.findall('[,&][A-Za-z\_\.0-9]+={[A-Za-z\_,{}0-9\.]+}',comment) ## complete history logged robust counting (MCMC trees)
870+
sets=re.findall('[,&][A-Za-z\_\.0-9\%]+={[A-Za-z\.\-0-9eE,\"\_]+}',comment) ## sets and ranges
839871
figtree=re.findall('\![A-Za-z]+=[A-Za-z0-9#]+',comment)
840872

841873
for vals in strings:
842874
tr,val=vals.split('=')
875+
tr=tr[1:]
843876
if '+' in val:
844877
val=val.split('+')[0] ## DO NOT ALLOW EQUIPROBABLE DOUBLE ANNOTATIONS (which are in format "A+B") - just get the first one
845878
ll.cur_node.traits[tr]=val.strip('"')
846879

847880
for vals in numerics: ## assign all parsed annotations to traits of current branch
848881
tr,val=vals.split('=') ## split each value by =, left side is name, right side is value
882+
tr=tr[1:]
849883
ll.cur_node.traits[tr]=float(val)
850884

851885
for val in treelist:
852886
tr,val=val.split('=')
887+
tr=tr[1:]
853888
microcerberus=re.findall('{([0-9]+,[0-9\.\-e]+,[A-Z]+,[A-Z]+)}',val)
854889
ll.cur_node.traits[tr]=[]
855890
for val in microcerberus:
@@ -858,17 +893,19 @@ def make_tree(data,ll,verbose=False):
858893

859894
for vals in sets:
860895
tr,val=vals.split('=')
896+
tr=tr[1:]
861897
if 'set' in tr:
862898
ll.cur_node.traits[tr]=[]
863899
for v in val[1:-1].split(','):
864900
if 'set.prob' in tr:
865901
ll.cur_node.traits[tr].append(float(v))
866902
else:
867903
ll.cur_node.traits[tr].append(v.strip('"'))
868-
elif 'range' in tr or 'HPD' in tr:
869-
ll.cur_node.traits[tr]=map(float,val[1:-1].split(','))
870904
else:
871-
print 'some other trait: %s'%(vals)
905+
try:
906+
ll.cur_node.traits[tr]=map(float,val[1:-1].split(','))
907+
except:
908+
print 'some other trait: %s'%(vals)
872909

873910
if len(figtree)>0:
874911
print 'FigTree comment found, ignoring'
@@ -897,12 +934,46 @@ def make_tree(data,ll,verbose=False):
897934
if data[i] == ';': ## look for string end
898935
break ## end loop
899936

937+
def make_treeJSON(tree,parent,JSONnode,json_translation):
938+
if 'attr' in JSONnode:
939+
attr = JSONnode.pop('attr')
940+
JSONnode.update(attr)
941+
942+
if JSONnode.has_key('children'): ## only nodes have children
943+
new_node=node()
944+
else:
945+
new_node=leaf()
946+
new_node.numName=JSONnode[json_translation['name']] ## set leaf numName
947+
948+
new_node.parent=tree.cur_node ## set parent-child relationships
949+
tree.cur_node.children.append(new_node)
950+
new_node.index=JSONnode[json_translation['name']] ## indexing is based on name
951+
new_node.traits={n:JSONnode[n] for n in JSONnode.keys() if n!='children'} ## set traits to non-children attributes
952+
tree.Objects.append(new_node)
953+
tree.cur_node=new_node
954+
955+
if isinstance(new_node, node):
956+
tree.nodes.append(new_node)
957+
else:
958+
tree.leaves.append(new_node)
959+
960+
if JSONnode.has_key('children'):
961+
for child in JSONnode['children']:
962+
make_treeJSON(tree,JSONnode,child,json_translation)
963+
tree.cur_node=tree.cur_node.parent
964+
965+
900966
def loadNexus(tree_path,tip_regex='\|([0-9]+\-[0-9]+\-[0-9]+)',date_fmt='%Y-%m-%d',treestring_regex='tree [A-Za-z\_]+([0-9]+)',variableDate=True,absoluteTime=True,verbose=False):
901967
tipFlag=False
902968
tips={}
903969
tipNum=0
904970
ll=None
905-
for line in open(tree_path,'r'):
971+
if isinstance(tree_path,str):
972+
handle=open(tree_path,'r')
973+
else:
974+
handle=tree_path
975+
976+
for line in handle:
906977
l=line.strip('\n')
907978

908979
cerberus=re.search('dimensions ntax=([0-9]+);',l.lower())
@@ -956,6 +1027,49 @@ def loadNexus(tree_path,tip_regex='\|([0-9]+\-[0-9]+\-[0-9]+)',date_fmt='%Y-%m-%
9561027

9571028
return ll
9581029

1030+
1031+
def loadJSON(tree_path,json_translation,json_meta=None,verbose=False,sort=True,stats=True):
1032+
assert json_translation.has_key('name') and json_translation.has_key('height'),'JSON translation dictionary missing entries: %s'%(', '.join([entry for entry in ['name','height'] if json_translation.has_key(entry)==False]))
1033+
ll=tree()
1034+
if verbose==True:
1035+
print 'Reading JSON'
1036+
with open(tree_path) as json_data:
1037+
d = json.load(json_data)
1038+
make_treeJSON(ll,'root',d,json_translation)
1039+
1040+
if json_translation.has_key('absoluteTime'):
1041+
if verbose==True:
1042+
print 'Setting absolute time'
1043+
for k in ll.Objects:
1044+
setattr(k,'absoluteTime',k.traits[json_translation['absoluteTime']])
1045+
1046+
if verbose==True:
1047+
print 'Setting heights'
1048+
for k in ll.Objects:
1049+
setattr(k,'height',k.traits[json_translation['height']])
1050+
1051+
if verbose==True:
1052+
print 'Setting lengths'
1053+
for k in ll.Objects:
1054+
k.length=k.height-k.parent.height
1055+
1056+
if verbose==True:
1057+
print 'Traversing and drawing tree'
1058+
1059+
ll.traverse_tree()
1060+
ll.drawTree()
1061+
if stats==True:
1062+
ll.treeStats() ## initial traversal, checks for stats
1063+
if sort==True:
1064+
ll.sortBranches() ## traverses tree, sorts branches, draws tree
1065+
1066+
if json_meta:
1067+
metadata=json.load(open(json_meta['file'],'r'))
1068+
cmap=dict(metadata['color_options'][json_meta['traitName']]['color_map'])
1069+
setattr(ll,'cmap',cmap)
1070+
1071+
return ll
1072+
9591073
if __name__ == '__main__':
9601074
import sys
9611075
ll=tree()

figures/coa_galindia.jpg

97.4 KB
Loading

0 commit comments

Comments
 (0)