-
Notifications
You must be signed in to change notification settings - Fork 3
/
graph.py
365 lines (309 loc) · 10.8 KB
/
graph.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
from collections import defaultdict
import os
from algorithms import Graph as gspanGraph,Vertex,Edge,subgraph_isomorphism
from copy import copy as clone
import time
from multiprocessing import Queue,Value,Process
class Graph:
def __init__(self):
self._graph = defaultdict(set)
self._labels = defaultdict(set)
self.support = None
self.dfs_code = None
self.dfsToNode = dict()
def connect(self, node1, node2):
"""
connect two nodes together
"""
self._graph[node1].add(node2)
self._graph[node2].add(node1)
def deConnect(self, node1, node2):
"""
remove connection
"""
self._graph[node1].remove(node2)
self._graph[node2].remove(node1)
def next(self, node):
"""
Return adjecent nodes
"""
return self._graph[node]
def out(self):
"""
Function to print graph, will only be usefull for smaller graphs
"""
for key in self._graph.keys():
outStr = str(key) + " -- {"
for value in self._graph[key]:
outStr += str(value) + ", "
if len(outStr) > 5:
outStr = outStr[:-2]
outStr += "}\t\t"
for label in self.getLabels(key):
outStr += str(label) + ", "
outStr = outStr[:-2]
print outStr
def toPNG(self, filename,render=False):
"""
Will output graph to a .dot file
If render is set to True it will render the png using graphviz dot
"""
dotname = filename.split('.')[0] + ".dot"
with open(dotname,'w') as outfile:
outfile.write("strict graph g {")
for key in self._graph.keys():
outStr = ''
#set the label (now arbitrarily a label is chosen if more than one is present)
if len(self.getLabels(key)) > 0:
labels = ""
for l in self.getLabels(key):
labels += str(l) + ","
labels = labels[:-1]
outStr += str(key) + " [label=\"" + str(labels) + "\"]\n"
#set the connections if there are
if len(self._graph[key]) == 0:
break
outStr += str(key) + " -- {"
for value in self._graph[key]:
outStr += str(value) + " "
if len(outStr) > 5:
outStr = outStr[:-1]
outStr += "}\n"
outfile.write(outStr)
outfile.write("}")
if render:
os.system("dot -Tpng " + dotname + " > " + filename)
os.system("rm " + dotname)
def setLabel(self, node, label):
"""
Set a single label to a node
"""
self._labels[node].add(label)
def setLabels(self, node, labels):
"""
Set a list/set of labels to a node
"""
for label in labels:
self.setLabel(node, label)
def getLabels(self, node):
"""
Return labels of a node as a set
"""
return self._labels[node]
#when working with only one label in testing phase
def getLabel(self,node):
"""
Returns the top label of a node, not usefull when working with multiple labels
"""
return self._labels[node].copy().pop()
def getNodes(self):
"""
Get list of nodes
"""
nodes = []
for key,value in self._graph.items():
nodes.append(key)
nodes = list(set(nodes))
return nodes
def getEdges(self):
"""
Get list of edges
"""
edges = []
for key,value in self._graph.items():
for v in value:
if v >= key:
edges.append((key,v))
edges = list(set(edges))
return edges
def removeNode(self,node):
"""
Remove a node and its connections from the graph
"""
for neighbour in self.next(node).copy():
self.deConnect(node,neighbour)
self._graph.pop(node)
def contains(self, node):
"""
True if node in graph
"""
S = set()
for key,values in self._graph.items():
S = S.union(key)
S = S.union(values)
return node in S
# depth first search to see if nodes are connected
def areConnected(self, node1, node2):
"""
True if two nodes are connected in some path
"""
self.visited = set()
self.path = list()
connected = self.connected(node1, node2)
# self.path = self.path.reverse()
return connected
def connected(self, node1, node2):
"""
Recursive call, not to be called directly
"""
if node1 == node2:
return True
neighbours = self.next(node1)
self.visited.add(node1)
if node2 in neighbours:
self.path.append(node2)
return True
for node in neighbours:
if not(node in self.visited):
if self.connected(node, node2):
self.path.append(node)
return True
return False
def shortestPath(self, node1, node2):
"""
Implementation of dijkstra's algorithm
Returns the length of the shortes path as well as the Shortest path
"""
if not(self.areConnected(node1, node2)):
return -1, []
values = dict()
for key in self._graph.keys():
values[key] = float("inf")
values[node1] = 0
visited = set()
visited.add(node1)
current = node1
while not(node2 in visited):
l = values[current]
for neighbour in self.next(current):
if not(neighbour in visited):
newlength = l + 1
if newlength < values[neighbour]:
values[neighbour] = newlength
visited.add(current)
lowest = ('', float("inf"))
for key in values.keys():
if not(key in visited) and (values[key] < lowest[1]):
lowest = (key, values[key])
current = lowest[0]
#backtrack
length = values[node2]
path = [node2]
previous = node2
for i in range(length-1,-1,-1):
newnode = [node for node in values.keys() if values[node] == i and node in self.next(previous)][0]
path.append(newnode)
previous = newnode
return length, list(reversed(path))
def maxpathfrom(self,a,nodes,q):
diameter = -1
for b in range(a+1,len(nodes)):
if a != b:
pathlength, _ = self.shortestPath(nodes[a],nodes[b])
if pathlength > diameter:
diameter = pathlength
q.put(diameter)
def diameter(self,cores=1):
"""
Returns the diameter of the Graph, i.e. max(shortestPath(a,b) for all a and b)
Set number of cores to parallellise the process as it is an intensive operation
"""
nodes = self.getNodes()
allnodes = len(nodes)
it = 0
q = Queue()
a = 0
while a < len(nodes):
pool = []
for i in range(0,cores):
if (a+i) < len(nodes):
p = Process(target=self.maxpathfrom,args=(a+i,nodes,q))
p.start()
pool.append(p)
for p in pool:
p.join()
a += cores
get = True
diameters = []
while get:
try:
top = q.get(block=False)
diameters.append(top)
except:
get = False
return max(diameters)
def EVCentrality(self,maxit=100):
"""
returns a dict of all Eigen Vector Centrality values
"""
nodes = self.getNodes()
V = defaultdict(lambda: 1)
for i in range(0,maxit):
W = defaultdict(lambda: 0)
for node in nodes:
for neighbour in self.next(node):
W[neighbour] = W[neighbour] + V[node]
V = clone(W)
S = 0
for _,i in V.items():
S += i
for key,value in V.items():
V[key] = float(value)/float(S)
return V
emptylabelnumber = 0
def clusteringCoefficient(self,node):
"""
Returns local clustering coefficient of a node
"""
neighbours = self.next(node)
k = len(neighbours)
if k < 2:
return 0
closedNeighbours = 0
for j in neighbours:
for n in neighbours:
if j != n:
if n in self.next(j):
closedNeighbours += 1
#each edge would be counted twice so drop the two
return float(closedNeighbours)/ float(k * (k-1))
def globalClusteringCoefficient(self):
"""
Returns global clustering coefficient (avg local CC) of a graph
"""
nodes = self.getNodes()
C = 0
for i in nodes:
Ci = self.clusteringCoefficient(i)
# print Ci
C += Ci
return C/len(nodes)
def convert(self,id,maxlabels=99999,uniqueEmptylabels=True):
"""
Convert this graph to the format needed for the gSpan algorithm
"""
newGraph = gspanGraph(id=id)
for key,values in self._graph.items():
l = self.getLabels(key).copy().pop();
if uniqueEmptylabels:
#make sure an empty label will get assigned a unique label so that no two empty labels are the same
if l == '':
l = "EMPTYLABEL" + str(Graph.emptylabelnumber)
Graph.emptylabelnumber += 1
v = Vertex(id=int(key.encode("hex"),16), label=l)
newGraph.add_vertex(vertex=v)
if "EMPTYLABEL" in l:
newGraph.labeldict[v.id] = set([l])
else:
newGraph.labeldict[v.id] = set(list(self.getLabels(key))[:maxlabels])
for key,values in self._graph.items():
for value in values:
e = Edge(label='_',from_vertex=newGraph.get_vertex(id=int(key.encode("hex"),16)), to_vertex=newGraph.get_vertex(id=int(value.encode("hex"),16)))
newGraph.add_edge(edge = e)
return newGraph
def subgraphIsomorphism(self,subgraph):
"""
True if there is a subgraph isomorphism of the subgraph in this graph
subgraph must be a gSpan DFS code
"""
return subgraph_isomorphism(subgraph,self.convert(0,uniqueEmptylabels=False))