-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathexample_autoencoder_keras.py
233 lines (192 loc) · 9.12 KB
/
example_autoencoder_keras.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
import numpy as np
import argparse
import re
from datetime import datetime, timedelta
from sklearn import datasets, linear_model
from sklearn.preprocessing import label_binarize
from sklearn.metrics import mean_squared_error as mse
from hyperstream import HyperStream, TimeInterval
from hyperstream.utils import UTC
from keras.models import Sequential
from keras.layers.core import Dense, Dropout, Activation
from keras.layers.normalization import BatchNormalization
from keras.optimizers import SGD
from keras.wrappers.scikit_learn import KerasClassifier
import matplotlib.pyplot as plt
from utils import generate_hidden_images
class MyKerasUnsupervised(object):
"""This is a modification of the KerasUnsupervised in order to keep the
labels with the original values.
Implementation of the scikit-learn classifier API for Keras.
"""
def __init__(self, architecture='auto2s', lr=0.1):
self.model = None
self.architecture = architecture
self.lr = lr
def fit(self, x, classes=None, **kwargs):
"""Constructs a new model with `build_fn` & fit the model to `(x, y)`.
# Arguments
x : array-like, shape `(n_samples, n_features)`
Training samples where n_samples in the number of samples
and n_features is the number of features.
y : array-like, shape `(n_samples,)` or `(n_samples, n_outputs)`
True labels for X.
**kwargs: dictionary arguments
Legal arguments are the arguments of `Sequential.fit`
# Returns
history : object
details about the training history at each epoch.
"""
if self.model is None:
self.model = self.create_model(input_dim=x.shape[1],
architecture=self.architecture,
lr=self.lr)
return self.model.fit(x, x, batch_size=x.shape[0], epochs=1, verbose=0)
def create_model(self, input_dim=1, optimizer='rmsprop',
init='glorot_uniform', lr=1, momentum=0.0, decay=0.0,
nesterov=False , architecture='lr'):
"""
Parameters
----------
architecture: string: lr, mlp100, mlp100d, mlp100d100d
"""
previous_layer = input_dim
aux_architecture = re.split('(\d+)', architecture)
self.architecture = [aux_architecture[0]]
for s in aux_architecture[1:]:
if s.isdigit():
self.architecture.append(s)
else:
for ss in s:
self.architecture.append(ss)
if self.architecture[0] == 'auto':
encoder = Sequential()
decoder = Sequential()
actual = encoder
for i in range(1, len(self.architecture)):
if self.architecture[i] == 'd':
actual.add(Dropout(0.5))
elif self.architecture[i] == 's':
actual.add(Activation('sigmoid'))
elif self.architecture[i] == 'r':
actual.add(Activation('relu'))
elif self.architecture[i] == 'm':
actual.add(Dense(input_size, kernel_initializer=init))
actual.add(Activation('softmax'))
elif self.architecture[i] == 'n':
actual.add(BatchNormalization())
elif self.architecture[i].isdigit():
actual_layer = int(self.architecture[i])
actual.add(Dense(actual_layer, input_dim=previous_layer,
kernel_initializer=init))
previous_layer = actual_layer
elif self.architecture[i] == 'l':
continue
elif self.architecture[i] == '_':
actual = decoder
else:
raise(ValueError, 'Architecture with a wrong specification')
decoder.add(Dense(input_dim, input_dim=previous_layer,
kernel_initializer=init))
else:
raise(ValueError, 'Architecture with a wrong specification')
model = Sequential()
model.add(encoder)
model.add(decoder)
self.encoder = encoder
self.decoder = decoder
print(encoder.summary())
print(decoder.summary())
print(model.summary())
if optimizer == 'sgd':
optimizer = SGD(lr=lr, momentum=momentum, decay=decay,
nesterov=nesterov)
loss = 'mean_squared_error'
model.compile(loss=loss, optimizer=optimizer)
return model
def predict(self, x):
return self.model.predict(x, verbose=0)
def transform(self, x):
return self.encoder.predict(x, verbose=0)
def inverse_transform(self, h):
return self.decoder.predict(h, verbose=0)
def score(self, x, pred):
return mse(x, pred)
def get_arguments():
parser = argparse.ArgumentParser(description='Process some integers.')
parser.add_argument('-a', '--architecture', type=str,
default='auto2l',
help='''Autoencoder architecture in the following form:
start always with auto, then add hidden layers
specifying the number of units and the activation
functions with a letter. The available letters are s
for sigmoid, l for linear, m for softmax, d for
dropout with fixed value of 0.5. Eg. 'auto20s10s2s'
will generate an autoencoder with 20 sigmoid units, 10
sigmoid units 2 sigmoid units, 2 sigmoid units, 10
sigmoid units 20 sigmoid units and input_size units.''')
parser.add_argument('-d', '--dataset', type=str, default='iris',
help='''Dataset to use. Working options: iris,
breast_cancer, wine, digits''')
parser.add_argument('-e', '--epochs', type=int, default=10,
help='Number of epochs to run the classifier')
parser.add_argument('-s', '--seed', type=int, default=42,
help='Seed for the data shuffle')
parser.add_argument('-b', '--batchsize', type=int, default=1,
help='Batch size during training')
parser.add_argument('-l', '--learning-rate', type=float, default=1.0,
help='Learning rate')
return parser.parse_args()
def plot_digit(x):
shape = int(np.sqrt(x.shape[1]))
x.reshape(shape, shape)
def main(dataset, architecture, epochs, seed, batchsize, learning_rate):
hs = HyperStream(loglevel=30)
print(hs)
print([p.channel_id_prefix for p in hs.config.plugins])
M = hs.channel_manager.memory
data = getattr(datasets, 'load_{}'.format(dataset))()
data_tool = hs.plugins.sklearn.tools.dataset(data, shuffle=True,
epochs=epochs, seed=seed)
data_stream = M.get_or_create_stream('dataset')
model = MyKerasUnsupervised(architecture=architecture, lr=learning_rate)
unsupervised_tool = hs.plugins.sklearn.tools.unsupervised(model)
unsupervised_stream = M.get_or_create_stream('unsupervised')
now = datetime.utcnow().replace(tzinfo=UTC)
now = (now - timedelta(hours=1)).replace(tzinfo=UTC)
before = datetime.utcfromtimestamp(0).replace(tzinfo=UTC)
ti = TimeInterval(before, now)
data_tool.execute(sources=[], sink=data_stream, interval=ti)
print("Example of a data stream")
key, value = data_stream.window().iteritems().next()
print('[%s]: %s' % (key, value))
mini_batch_tool = hs.plugins.sklearn.tools.minibatch(batchsize=batchsize)
mini_batch_stream = M.get_or_create_stream('mini_batch')
mini_batch_tool.execute(sources=[data_stream], sink=mini_batch_stream,
interval=ti)
unsupervised_tool.execute(sources=[mini_batch_stream], sink=unsupervised_stream,
interval=ti)
scores = []
for key, value in unsupervised_stream.window():
scores.append(value['score'])
# The data is repeated the number of epochs. This makes the mini-batches to
# cycle and contain data from the begining and end of the dataset. This
# makes possible that the number of scores is not divisible by epochs.
if batchsize == 1:
print("Test scores per epoch")
scores = np.array(scores).reshape(epochs, -1)
print(scores.mean(axis=1).round(decimals=2))
else:
scores = np.array(scores).reshape(1,-1)
print("Test scores per minibatch (cyclic)")
print(scores.round(decimals=2))
if dataset == 'digits' and model.decoder.input_shape[1] == 2:
minmax = 5
image = generate_hidden_images(model, digit_size=8, n=15, minmax=minmax)
fig = plt.figure(figsize=(6,6))
ax = fig.add_subplot(111)
ax.imshow(image, extent = [-minmax, minmax, -minmax, minmax], cmap='Greys')
fig.savefig('autoencoder_{}.svg'.format(architecture))
if __name__ == '__main__':
arguments = get_arguments()
main(**vars(arguments))