Skip to content

Commit 623ff12

Browse files
author
Alvis Logins
committed
Initial commit
0 parents  commit 623ff12

11 files changed

+859
-0
lines changed

DegreeSolver.py

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,17 @@
1+
import networkx as nx
2+
import time
3+
from Solver import *
4+
5+
class DegreeSolver(Solver):
6+
7+
def run(self):
8+
t1 = time.time()
9+
degrees = [(node, self.G.degree([node])[node]) for node in self.G.nodes() if node not in self.seeds]
10+
blocked = []
11+
degrees.sort(key=lambda t: t[1])
12+
for i in range(self.k):
13+
blocked.append(degrees.pop()[0])
14+
t2 = time.time()
15+
16+
self.log['Total time'] = (t2-t1)
17+
self.log['Blocked nodes'] = [int(node) for node in blocked]

DomSolver.py

Lines changed: 111 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,111 @@
1+
'''
2+
The class implements DAVA - the seed-aware immunization algorithm based on dominator trees.
3+
'''
4+
5+
import networkx as nx
6+
import time
7+
from collections import defaultdict
8+
import sys
9+
import os
10+
sys.path.append(os.path.dirname(os.path.abspath(__file__)))
11+
import Solver as slv
12+
from functools import reduce
13+
import math
14+
import numpy as np
15+
16+
class DomSolver(slv.Solver):
17+
18+
def clear(self):
19+
for e in self.G.edges(data=True):
20+
if e[2]['weight'] == 1:
21+
e[2]['weight'] = 0.99999 # No p=1 allowed due to probability calculation along shortest path
22+
self.create_superseed_and_update_weights()
23+
24+
def create_superseed_and_update_weights(self):
25+
self.superseed_index = len(self.G)
26+
while self.superseed_index in self.G:
27+
self.superseed_index += 1
28+
neighbors = defaultdict(lambda: [])
29+
for seed in self.seeds:
30+
for n in self.G.neighbors(seed):
31+
neighbors[n].append(self.G[seed][n]['weight'])
32+
new_edges = [(self.superseed_index, n, DomSolver.get_total_weight(neighbors[n])) for n in neighbors]
33+
self.G.add_weighted_edges_from(new_edges)
34+
self.G = self.G.subgraph((set(self.G.nodes()) - set(self.seeds)) | set([self.superseed_index])).copy()
35+
for edge in self.G.edges():
36+
self.G[edge[0]][edge[1]]['weight'] = -math.log(self.G[edge[0]][edge[1]]['weight'])
37+
38+
@staticmethod
39+
def get_total_weight(list_of_probabilities):
40+
return 1. - reduce(lambda x, y: x*y, [(1.-p) for p in list_of_probabilities])
41+
42+
def run(self):
43+
t1 = time.time()
44+
blocked = []
45+
extra_time = 0
46+
47+
if not self.params.get("fast", False):
48+
for iteration in range(self.k):
49+
self.build_domtree()
50+
if iteration == 0:
51+
extra_time += self.save_tree_stats_return_time("first it")
52+
if iteration == self.k - 1:
53+
extra_time += self.save_tree_stats_return_time("last it")
54+
blocked += self.get_best_nodes(1)
55+
self.G.remove_node(blocked[-1])
56+
else:
57+
self.build_domtree()
58+
extra_time += self.save_tree_stats_return_time("first it")
59+
blocked = self.get_best_nodes(self.k)
60+
t2 = time.time()
61+
self.log['Total time'] = t2 - t1 - extra_time
62+
self.log['Blocked nodes'] = blocked
63+
64+
def save_tree_stats_return_time(self, prefix):
65+
t1 = time.time()
66+
g = self.domtree
67+
sp = nx.single_source_shortest_path_length(g,self.superseed_index)
68+
self.log['tree depth ' + prefix] = max([sp[n] for n in sp])
69+
self.log['first level node fraction ' + prefix] = g.degree(self.superseed_index)/len(self.G)
70+
first_level_degrees = [g.out_degree(n) for n in g.neighbors(self.superseed_index)]
71+
self.log['second level node fraction ' + prefix] = sum(first_level_degrees)/len(self.G)
72+
self.log['second level avg degree ' + prefix] = 0 if len(first_level_degrees) == 0 else np.mean(first_level_degrees)
73+
t2 = time.time()
74+
return t2 - t1
75+
76+
def build_domtree(self):
77+
tree_dict = nx.algorithms.dominance.immediate_dominators(self.G, self.superseed_index)
78+
self.domtree = nx.DiGraph()
79+
self.domtree.add_node(self.superseed_index)
80+
self.domtree.add_edges_from([(edge[1],edge[0]) for edge in tree_dict.items() if edge[0] != edge[1]])
81+
probabilities_from_root = nx.single_source_dijkstra_path_length(self.G, self.superseed_index)
82+
83+
#probability (v,u) = p(u)/p(v) from root
84+
for edge in self.domtree.edges():
85+
if edge[0] == self.superseed_index:
86+
probability = math.exp(-probabilities_from_root[edge[1]])
87+
else:
88+
probability = math.exp(-probabilities_from_root[edge[1]]+probabilities_from_root[edge[0]])
89+
self.domtree[edge[0]][edge[1]]['weight'] = probability
90+
91+
def traverseTreeRec(self, node):
92+
benefit = 1
93+
for n in self.domtree.neighbors(node):
94+
benefit += self.traverseTreeRec(n)*self.domtree[node][n]['weight']
95+
return benefit
96+
97+
def get_rank(self):
98+
rank = []
99+
if self.k > self.domtree.degree(self.superseed_index):
100+
self.log['error'] = "Problem is trivial"
101+
if self.domtree.degree(self.superseed_index) == 0:
102+
return [(0,np.random.choice([n for n in self.G.nodes() if n != self.superseed_index and n not in self.seeds], replace=False))]
103+
return [(0, next(self.domtree.neighbors(self.superseed_index)))]
104+
for n in self.domtree.neighbors(self.superseed_index):
105+
benefit = self.traverseTreeRec(n)*self.domtree[self.superseed_index][n]['weight']
106+
rank.append((benefit, n))
107+
return rank
108+
109+
def get_best_nodes(self, number_of_nodes):
110+
rank = self.get_rank()
111+
return [int(a[1]) for a in sorted(rank)[-number_of_nodes:]]

Generator.py

Lines changed: 138 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,138 @@
1+
import networkx as nx
2+
from networkx.algorithms import approximation
3+
import random
4+
import sys
5+
import time
6+
import os
7+
import argparse
8+
import numpy as np
9+
import scipy.io
10+
import logging
11+
from scipy.sparse import csr_matrix
12+
sys.path.append(os.path.join(os.environ['PHD_ROOT'], 'imin', 'src'))
13+
sys.path.append(os.path.join(os.environ['PHD_ROOT'], 'imin', 'scripts'))
14+
import helpers
15+
from FileManager import *
16+
17+
class Generator:
18+
def __init__(self, params):
19+
self.params = params
20+
self.generators = {
21+
'powerlaw_cluster': lambda: nx.powerlaw_cluster_graph(params["n"], params["m"], params["p"]),
22+
'stanford': lambda: self.get_stanford_graph(),
23+
'gnutella': lambda: self.get_gnutella_graph(),
24+
'grid': lambda: nx.convert_node_labels_to_integers(nx.grid_2d_graph(params['n'], params['n'])),
25+
'path': lambda: nx.path_graph(params["n"]),
26+
'binomial': lambda: nx.fast_gnp_random_graph(params['n'], params['p']),
27+
'watts_strogatz': lambda: nx.watts_strogatz_graph(params['n'], params['k'], params['p']),
28+
'karate': lambda: nx.karate_club_graph(),
29+
'vk': lambda: self.get_vk_graph(),
30+
'gaussian_random_partition': lambda: nx.gaussian_random_partition_graph(params['n'], params['s'], params['v'], params['p_in'], params['p_out'])
31+
}
32+
33+
def gen_graph_id(self):
34+
return str(helpers.get_static_hash(str(int(time.time())) + str(random.randint(10000, 99999)) + "_".join([str(self.params[p]) for p in self.params])))
35+
36+
def generate(self, number_of_graphs=1):
37+
for i in range(number_of_graphs):
38+
G = self.generators[self.params["graph_type"]]()
39+
if self.params["graph_type"] != 'vk':
40+
if self.params["graph_type"] not in ["gnutella", "stanford"]:
41+
G = self.add_random_directions(G, self.params["both_directions"])
42+
else:
43+
if self.params["both_directions"]:
44+
raise Exception("Not implemeted")
45+
G = self.assign_weights(G, self.params["weight_scale"], self.params["random_weight"])
46+
G.graph['graph_id'] = self.gen_graph_id()
47+
G.graph.update(self.params)
48+
yield G
49+
50+
@staticmethod # used in tests
51+
def assign_weights(G, weight_scale, random_weight):
52+
if random_weight:
53+
for e in G.edges():
54+
a = np.random.random()*weight_scale
55+
G[e[0]][e[1]]['weight'] = np.random.random()*weight_scale
56+
else:
57+
for e in G.edges():
58+
G[e[0]][e[1]]['weight'] = weight_scale
59+
return G
60+
61+
@staticmethod
62+
def add_random_directions(G, both=False):
63+
assert(not nx.is_directed(G))
64+
dG = nx.DiGraph()
65+
for e in G.edges():
66+
if both:
67+
dG.add_edge(e[0],e[1])
68+
dG.add_edge(e[1],e[0])
69+
for key in G[e[0]][e[1]]:
70+
dG[e[0]][e[1]][key] = G[e[0]][e[1]][key]
71+
dG[e[1]][e[0]][key] = G[e[0]][e[1]][key]
72+
else:
73+
if np.random.random() < 0.5:
74+
dG.add_edge(e[0],e[1])
75+
for key in G[e[0]][e[1]]:
76+
dG[e[0]][e[1]][key] = G[e[0]][e[1]][key]
77+
else:
78+
dG.add_edge(e[1],e[0])
79+
for key in G[e[1]][e[0]]:
80+
dG[e[1]][e[0]][key] = G[e[0]][e[1]][key]
81+
return dG
82+
83+
def get_stanford_graph(self):
84+
mat = scipy.io.loadmat(os.path.join(os.environ['ALLDATA_PATH'], 'imin', 'wb-cs-stanford.mat'))
85+
sparse = mat['Problem'][0][0][2]
86+
m = csr_matrix(sparse)
87+
g = nx.DiGraph()
88+
G = nx.from_numpy_matrix(m.toarray(), create_using=g)
89+
return G
90+
# g = G
91+
# g = G.to_undirected() -- mistake
92+
# nodeset = []
93+
# for g1 in nx.connected_components(g):
94+
# if len(g1) > 1000:
95+
# nodeset = g1
96+
# break
97+
# return G.subgraph(nodeset).copy()
98+
99+
def get_gnutella_graph(self):
100+
edges = []
101+
with open(os.path.join(os.environ['ALLDATA_PATH'], 'imin', 'p2p-Gnutella31.txt')) as f:
102+
nodes, edge_count = f.readline().split()
103+
nodes = int(nodes)
104+
edge_count = int(edge_count)
105+
for line in f:
106+
edges.append((int(line.split()[0]), int(line.split()[1])))
107+
assert(len(edges) == edge_count)
108+
G = nx.DiGraph()
109+
G.add_nodes_from(range(nodes))
110+
G.add_edges_from(edges)
111+
return G
112+
113+
def get_vk_graph(self):
114+
G = nx.read_gpickle(os.path.join(os.environ['ALLDATA_PATH'], 'imin', 'vk_graph_cleaned.pkl'))
115+
return G
116+
117+
@staticmethod
118+
def analyze_graph(G):
119+
G.graph['directed'] = nx.is_directed(G)
120+
G_und = G.to_undirected()
121+
# if G.graph['directed']:
122+
# G.graph['weakly_connected_components'] = nx.number_weakly_connected_components(G)
123+
# G.graph['largest_weak_component'] = max(nx.weakly_connected_components(G), key=len)
124+
# G.graph['strongly_connected_components'] = nx.number_strongly_connected_components(G)
125+
# else:
126+
G.graph['connected_components'] = nx.number_connected_components(G_und)
127+
G.graph['largest_component'] = len(max(nx.connected_components(G_und), key=len))
128+
129+
logging.info("Graph ID {}: components analyzed.".format(G.graph['graph_id']))
130+
G.graph['average_clustering'] = approximation.average_clustering(G_und)
131+
logging.info("Graph ID {}: clustering analyzed.".format(G.graph['graph_id']))
132+
degrees = [d for n, d in G.degree()]
133+
G.graph['min_degree'] = min(degrees),max(degrees),np.mean(degrees),np.median(degrees)
134+
G.graph['max_degree'] = max(degrees)
135+
G.graph['avg_degree'] = np.mean(degrees)
136+
G.graph['std_degree'] = np.std(degrees)
137+
G.graph['median_degree'] = np.median(degrees)
138+
logging.info("Graph ID {}: degrees analyzed.".format(G.graph['graph_id']))

0 commit comments

Comments
 (0)