|
| 1 | +import networkx as nx |
| 2 | +from networkx.algorithms import approximation |
| 3 | +import random |
| 4 | +import sys |
| 5 | +import time |
| 6 | +import os |
| 7 | +import argparse |
| 8 | +import numpy as np |
| 9 | +import scipy.io |
| 10 | +import logging |
| 11 | +from scipy.sparse import csr_matrix |
| 12 | +sys.path.append(os.path.join(os.environ['PHD_ROOT'], 'imin', 'src')) |
| 13 | +sys.path.append(os.path.join(os.environ['PHD_ROOT'], 'imin', 'scripts')) |
| 14 | +import helpers |
| 15 | +from FileManager import * |
| 16 | + |
| 17 | +class Generator: |
| 18 | + def __init__(self, params): |
| 19 | + self.params = params |
| 20 | + self.generators = { |
| 21 | + 'powerlaw_cluster': lambda: nx.powerlaw_cluster_graph(params["n"], params["m"], params["p"]), |
| 22 | + 'stanford': lambda: self.get_stanford_graph(), |
| 23 | + 'gnutella': lambda: self.get_gnutella_graph(), |
| 24 | + 'grid': lambda: nx.convert_node_labels_to_integers(nx.grid_2d_graph(params['n'], params['n'])), |
| 25 | + 'path': lambda: nx.path_graph(params["n"]), |
| 26 | + 'binomial': lambda: nx.fast_gnp_random_graph(params['n'], params['p']), |
| 27 | + 'watts_strogatz': lambda: nx.watts_strogatz_graph(params['n'], params['k'], params['p']), |
| 28 | + 'karate': lambda: nx.karate_club_graph(), |
| 29 | + 'vk': lambda: self.get_vk_graph(), |
| 30 | + 'gaussian_random_partition': lambda: nx.gaussian_random_partition_graph(params['n'], params['s'], params['v'], params['p_in'], params['p_out']) |
| 31 | + } |
| 32 | + |
| 33 | + def gen_graph_id(self): |
| 34 | + return str(helpers.get_static_hash(str(int(time.time())) + str(random.randint(10000, 99999)) + "_".join([str(self.params[p]) for p in self.params]))) |
| 35 | + |
| 36 | + def generate(self, number_of_graphs=1): |
| 37 | + for i in range(number_of_graphs): |
| 38 | + G = self.generators[self.params["graph_type"]]() |
| 39 | + if self.params["graph_type"] != 'vk': |
| 40 | + if self.params["graph_type"] not in ["gnutella", "stanford"]: |
| 41 | + G = self.add_random_directions(G, self.params["both_directions"]) |
| 42 | + else: |
| 43 | + if self.params["both_directions"]: |
| 44 | + raise Exception("Not implemeted") |
| 45 | + G = self.assign_weights(G, self.params["weight_scale"], self.params["random_weight"]) |
| 46 | + G.graph['graph_id'] = self.gen_graph_id() |
| 47 | + G.graph.update(self.params) |
| 48 | + yield G |
| 49 | + |
| 50 | + @staticmethod # used in tests |
| 51 | + def assign_weights(G, weight_scale, random_weight): |
| 52 | + if random_weight: |
| 53 | + for e in G.edges(): |
| 54 | + a = np.random.random()*weight_scale |
| 55 | + G[e[0]][e[1]]['weight'] = np.random.random()*weight_scale |
| 56 | + else: |
| 57 | + for e in G.edges(): |
| 58 | + G[e[0]][e[1]]['weight'] = weight_scale |
| 59 | + return G |
| 60 | + |
| 61 | + @staticmethod |
| 62 | + def add_random_directions(G, both=False): |
| 63 | + assert(not nx.is_directed(G)) |
| 64 | + dG = nx.DiGraph() |
| 65 | + for e in G.edges(): |
| 66 | + if both: |
| 67 | + dG.add_edge(e[0],e[1]) |
| 68 | + dG.add_edge(e[1],e[0]) |
| 69 | + for key in G[e[0]][e[1]]: |
| 70 | + dG[e[0]][e[1]][key] = G[e[0]][e[1]][key] |
| 71 | + dG[e[1]][e[0]][key] = G[e[0]][e[1]][key] |
| 72 | + else: |
| 73 | + if np.random.random() < 0.5: |
| 74 | + dG.add_edge(e[0],e[1]) |
| 75 | + for key in G[e[0]][e[1]]: |
| 76 | + dG[e[0]][e[1]][key] = G[e[0]][e[1]][key] |
| 77 | + else: |
| 78 | + dG.add_edge(e[1],e[0]) |
| 79 | + for key in G[e[1]][e[0]]: |
| 80 | + dG[e[1]][e[0]][key] = G[e[0]][e[1]][key] |
| 81 | + return dG |
| 82 | + |
| 83 | + def get_stanford_graph(self): |
| 84 | + mat = scipy.io.loadmat(os.path.join(os.environ['ALLDATA_PATH'], 'imin', 'wb-cs-stanford.mat')) |
| 85 | + sparse = mat['Problem'][0][0][2] |
| 86 | + m = csr_matrix(sparse) |
| 87 | + g = nx.DiGraph() |
| 88 | + G = nx.from_numpy_matrix(m.toarray(), create_using=g) |
| 89 | + return G |
| 90 | + # g = G |
| 91 | + # g = G.to_undirected() -- mistake |
| 92 | + # nodeset = [] |
| 93 | + # for g1 in nx.connected_components(g): |
| 94 | + # if len(g1) > 1000: |
| 95 | + # nodeset = g1 |
| 96 | + # break |
| 97 | + # return G.subgraph(nodeset).copy() |
| 98 | + |
| 99 | + def get_gnutella_graph(self): |
| 100 | + edges = [] |
| 101 | + with open(os.path.join(os.environ['ALLDATA_PATH'], 'imin', 'p2p-Gnutella31.txt')) as f: |
| 102 | + nodes, edge_count = f.readline().split() |
| 103 | + nodes = int(nodes) |
| 104 | + edge_count = int(edge_count) |
| 105 | + for line in f: |
| 106 | + edges.append((int(line.split()[0]), int(line.split()[1]))) |
| 107 | + assert(len(edges) == edge_count) |
| 108 | + G = nx.DiGraph() |
| 109 | + G.add_nodes_from(range(nodes)) |
| 110 | + G.add_edges_from(edges) |
| 111 | + return G |
| 112 | + |
| 113 | + def get_vk_graph(self): |
| 114 | + G = nx.read_gpickle(os.path.join(os.environ['ALLDATA_PATH'], 'imin', 'vk_graph_cleaned.pkl')) |
| 115 | + return G |
| 116 | + |
| 117 | + @staticmethod |
| 118 | + def analyze_graph(G): |
| 119 | + G.graph['directed'] = nx.is_directed(G) |
| 120 | + G_und = G.to_undirected() |
| 121 | + # if G.graph['directed']: |
| 122 | + # G.graph['weakly_connected_components'] = nx.number_weakly_connected_components(G) |
| 123 | + # G.graph['largest_weak_component'] = max(nx.weakly_connected_components(G), key=len) |
| 124 | + # G.graph['strongly_connected_components'] = nx.number_strongly_connected_components(G) |
| 125 | + # else: |
| 126 | + G.graph['connected_components'] = nx.number_connected_components(G_und) |
| 127 | + G.graph['largest_component'] = len(max(nx.connected_components(G_und), key=len)) |
| 128 | + |
| 129 | + logging.info("Graph ID {}: components analyzed.".format(G.graph['graph_id'])) |
| 130 | + G.graph['average_clustering'] = approximation.average_clustering(G_und) |
| 131 | + logging.info("Graph ID {}: clustering analyzed.".format(G.graph['graph_id'])) |
| 132 | + degrees = [d for n, d in G.degree()] |
| 133 | + G.graph['min_degree'] = min(degrees),max(degrees),np.mean(degrees),np.median(degrees) |
| 134 | + G.graph['max_degree'] = max(degrees) |
| 135 | + G.graph['avg_degree'] = np.mean(degrees) |
| 136 | + G.graph['std_degree'] = np.std(degrees) |
| 137 | + G.graph['median_degree'] = np.median(degrees) |
| 138 | + logging.info("Graph ID {}: degrees analyzed.".format(G.graph['graph_id'])) |
0 commit comments