-
Notifications
You must be signed in to change notification settings - Fork 22
/
Copy pathexamples.py
144 lines (115 loc) · 6.39 KB
/
examples.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
''' Examples that demonstrate some of the functionality in the NGF module
'''
from __future__ import division, print_function, absolute_import
from keras.layers import Input, merge, Dense
from keras import models
import utils
from NGF.preprocessing import tensorise_smiles, tensorise_smiles_mp
from NGF.layers import NeuralGraphHidden, NeuralGraphOutput
from NGF.models import build_graph_conv_model
from NGF.sparse import GraphTensor, EpochIterator
# ==============================================================================
# ================================ Load the data ===============================
# ==============================================================================
print("{:=^100}".format(' Data preprocessing '))
data, labels = utils.load_delaney()
# Tensorise data
X_atoms, X_bonds, X_edges = tensorise_smiles_mp(data)
print('Atoms:', X_atoms.shape)
print('Bonds:', X_bonds.shape)
print('Edges:', X_edges.shape)
# Load sizes from data shape
num_molecules = X_atoms.shape[0]
max_atoms = X_atoms.shape[1]
max_degree = X_bonds.shape[2]
num_atom_features = X_atoms.shape[-1]
num_bond_features = X_bonds.shape[-1]
# ==============================================================================
# =============== Example 1: Building a 3-layer graph convnet =================
# ==============================================================================
print("{:=^100}".format(' Example 1 '))
# Parameters
conv_width = 8
fp_length = 62
# Define the input layers
atoms0 = Input(name='atom_inputs', shape=(max_atoms, num_atom_features))
bonds = Input(name='bond_inputs', shape=(max_atoms, max_degree, num_bond_features))
edges = Input(name='edge_inputs', shape=(max_atoms, max_degree), dtype='int32')
# Define the convoluted atom feature layers
atoms1 = NeuralGraphHidden(conv_width, activation='relu', bias=False)([atoms0, bonds, edges])
atoms2 = NeuralGraphHidden(conv_width, activation='relu', bias=False)([atoms1, bonds, edges])
# Define the outputs of each (convoluted) atom featuer layer to fingerprint
fp_out0 = NeuralGraphOutput(fp_length, activation='softmax')([atoms0, bonds, edges])
fp_out1 = NeuralGraphOutput(fp_length, activation='softmax')([atoms1, bonds, edges])
fp_out2 = NeuralGraphOutput(fp_length, activation='softmax')([atoms2, bonds, edges])
# Sum outputs to obtain fingerprint
final_fp = merge([fp_out0, fp_out1, fp_out2], mode='sum')
# Build and compile model for regression.
main_prediction = Dense(1, activation='linear', name='main_prediction')(final_fp)
model = models.Model(input=[atoms0, bonds, edges], output=[main_prediction])
model.compile(optimizer='adagrad', loss='mse')
# Show summary
model.summary()
# Train the model
model.fit([X_atoms, X_bonds, X_edges], labels, nb_epoch=20, batch_size=32, validation_split=0.2)
# ==============================================================================
# ============ Example 2: Initialising layers in different ways ===============
# ==============================================================================
print("{:=^100}".format(' Example 2 '))
# Parameters
conv_width = 8
fp_length = 62
# Define the input layers
atoms0 = Input(name='atom_inputs', shape=(max_atoms, num_atom_features))
bonds = Input(name='bond_inputs', shape=(max_atoms, max_degree, num_bond_features))
edges = Input(name='edge_inputs', shape=(max_atoms, max_degree), dtype='int32')
# Define the convoluted atom feature layers
# All methods of initialisation are equaivalent!
atoms1 = NeuralGraphHidden(lambda: Dense(conv_width, activation='relu', bias=False))([atoms0, bonds, edges])
atoms2 = NeuralGraphHidden(Dense(conv_width, activation='relu', bias=False))([atoms1, bonds, edges])
# Define the outputs of each (convoluted) atom featuer layer to fingerprint
# All methods of initialisation are equaivalent!
fp_out0 = NeuralGraphOutput(Dense(fp_length, activation='softmax'))([atoms0, bonds, edges])
fp_out1 = NeuralGraphOutput(fp_length, activation='softmax')([atoms1, bonds, edges])
fp_out2 = NeuralGraphOutput(lambda: Dense(fp_length, activation='softmax'))([atoms2, bonds, edges])
# Sum outputs to obtain fingerprint
final_fp = merge([fp_out0, fp_out1, fp_out2], mode='sum')
# Build and compile model for regression.
main_prediction = Dense(1, activation='linear', name='main_prediction')(final_fp)
model2 = models.Model(input=[atoms0, bonds, edges], output=[main_prediction])
model2.compile(optimizer='adagrad', loss='mse')
# Show summary
model2.summary()
# ==============================================================================
# ================== Example 3: Using the model functions =====================
# ==============================================================================
print("{:=^100}".format(' Example 3 '))
model3 = build_graph_conv_model(max_atoms, max_degree, num_atom_features, num_bond_features,
learning_type='regression', conv_layer_sizes=[conv_width, conv_width],
fp_layer_size=[fp_length, fp_length, fp_length],
conv_activation='relu', fp_activation='softmax',
conv_bias=False)
# Show summary
model3.summary()
# ==============================================================================
# ===================== Example 4: Using sparse tensors =======================
# ==============================================================================
print("{:=^100}".format(' Example 4 '))
# Using sparse tensors will improve training speed a lot, because the number of
# max_atoms will be determined by the number molecule with the most atoms within
# a batch, rather than the molecule with the most atoms within the dataset
# Build the same model, but this time use None for num_atom_features, to allow
# variation of this variable per batch.
model4 = build_graph_conv_model(None, max_degree, num_atom_features, num_bond_features,
learning_type='regression', conv_layer_sizes=[conv_width, conv_width],
fp_layer_size=[fp_length, fp_length, fp_length],
conv_activation='relu', fp_activation='softmax',
conv_bias=False)
# Show summary
model4.summary()
# Convert the atom features into GraphTensors, by default, these are sparse
# along the max_atoms dimension.
X_mols = GraphTensor([X_atoms, X_bonds, X_edges])
# Build a generator and train the model
my_generator = EpochIterator((X_mols, labels), batch_size=128)
model4.fit_generator(my_generator, nb_epoch=20, samples_per_epoch=len(labels))