-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathexample_classifier_keras.py
197 lines (166 loc) · 8.11 KB
/
example_classifier_keras.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
import numpy as np
import argparse
from datetime import datetime, timedelta
from sklearn import datasets, linear_model
from sklearn.preprocessing import label_binarize
from hyperstream import HyperStream, TimeInterval
from hyperstream.utils import UTC
from keras.models import Sequential
from keras.layers.core import Dense, Dropout, Activation
from keras.optimizers import SGD
from keras.wrappers.scikit_learn import KerasClassifier
class MyKerasClassifier(object):
"""This is a modification of the KerasClassifier in order to keep the
labels with the original values.
Implementation of the scikit-learn classifier API for Keras.
"""
#def __init__(self, optimizer='rmsprop', init='glorot_uniform', lr=1,
# momentum=0.0, decay=0.0, nesterov=False , architecture='lr'):
# return super(KerasClassifier, self).__init__()
#
def __init__(self, architecture='lr', lr=1):
self.model = None
self.architecture = architecture
self.lr = lr
def fit(self, x, y, classes=None, **kwargs):
"""Constructs a new model with `build_fn` & fit the model to `(x, y)`.
# Arguments
x : array-like, shape `(n_samples, n_features)`
Training samples where n_samples in the number of samples
and n_features is the number of features.
y : array-like, shape `(n_samples,)` or `(n_samples, n_outputs)`
True labels for X.
**kwargs: dictionary arguments
Legal arguments are the arguments of `Sequential.fit`
# Returns
history : object
details about the training history at each epoch.
"""
if classes is not None:
self.classes = classes
if y.shape[1] <= 1:
y = label_binarize(y, self.classes)
if self.model is None:
self.model = self.create_model(input_dim=x.shape[1],
output_size=y.shape[1],
architecture=self.architecture,
lr=self.lr)
return self.model.fit(x, y, batch_size=x.shape[0], epochs=1, verbose=0)
def create_model(self, input_dim=1, output_size=1, optimizer='rmsprop',
init='glorot_uniform', lr=1, momentum=0.0, decay=0.0,
nesterov=False , architecture='lr'):
"""
Parameters
----------
architecture: string: lr, mlp100, mlp100d, mlp100d100d
"""
model = Sequential()
previous_layer = input_dim
if architecture == 'lr':
model.add(Dense(output_size, input_shape=(input_dim,),
kernel_initializer=init,
activation='softmax'))
elif architecture.startswith('mlp'):
architecture = architecture[3:]
while len(architecture) > 0:
if architecture[0] == 'd':
model.add(Dropout(0.5))
architecture = architecture[1:]
elif architecture[0] == 's':
model.add(Activation('sigmoid'))
architecture = architecture[1:]
elif architecture[0] == 'm':
model.add(Dense(output_size, kernel_initializer=init))
model.add(Activation('softmax'))
architecture = architecture[1:]
elif architecture[0].isdigit():
i = 1
while len(architecture) > i and architecture[i].isdigit():
i += 1
actual_layer = int(architecture[:i])
model.add(Dense(actual_layer, input_dim=previous_layer,
kernel_initializer=init))
architecture = architecture[i:]
previous_layer = actual_layer
else:
raise(ValueError, 'Architecture with a wrong specification')
else:
raise(ValueError, 'Architecture with a wrong specification')
if optimizer == 'sgd':
optimizer = SGD(lr=lr, momentum=momentum, decay=decay,
nesterov=nesterov)
loss = 'categorical_crossentropy'
model.compile(loss=loss, optimizer=optimizer,
metrics=['acc'])
return model
def predict(self, x):
return self.model.predict(x, verbose=0)
def score(self, x, y):
pred = self.model.predict_classes(x, verbose=0).reshape(-1,1)
return np.mean(pred == y)
def get_arguments():
parser = argparse.ArgumentParser(description='Process some integers.')
parser.add_argument('-c', '--classifier', type=str,
default='lr',
help='''Classifier to use. Working options: lr for a
Logistic Regression. To specify Multilayer Perceptron
use the following convention: mlp100dm for a Multilayer
Perceptron with 100 hidden units, dropbout at 0.5 and
SoftMax activation, mlp30ds40m for an MLP with 30
hidden units with Dropout of 0.5, Sigmoid activation,
40 hidden units and Softmax activation''')
parser.add_argument('-d', '--dataset', type=str, default='iris',
help='''Dataset to use. Working options: iris,
breast_cancer, wine, digits''')
parser.add_argument('-e', '--epochs', type=int, default=10,
help='Number of epochs to run the classifier')
parser.add_argument('-s', '--seed', type=int, default=42,
help='Seed for the data shuffle')
parser.add_argument('-b', '--batchsize', type=int, default=1,
help='Batch size during training')
parser.add_argument('-l', '--learning-rate', type=float, default=1.0,
help='Learning rate')
return parser.parse_args()
def main(dataset, classifier, epochs, seed, batchsize, learning_rate):
hs = HyperStream(loglevel=30)
print(hs)
print([p.channel_id_prefix for p in hs.config.plugins])
M = hs.channel_manager.memory
data = getattr(datasets, 'load_{}'.format(dataset))()
data_tool = hs.plugins.sklearn.tools.dataset(data, shuffle=True,
epochs=epochs, seed=seed)
data_stream = M.get_or_create_stream('dataset')
model = MyKerasClassifier(architecture=classifier, lr=learning_rate)
classifier_tool = hs.plugins.sklearn.tools.classifier(model)
classifier_stream = M.get_or_create_stream('classifier')
now = datetime.utcnow().replace(tzinfo=UTC)
now = (now - timedelta(hours=1)).replace(tzinfo=UTC)
before = datetime.utcfromtimestamp(0).replace(tzinfo=UTC)
ti = TimeInterval(before, now)
data_tool.execute(sources=[], sink=data_stream, interval=ti)
print("Example of a data stream")
key, value = data_stream.window().iteritems().next()
print('[%s]: %s' % (key, value))
mini_batch_tool = hs.plugins.sklearn.tools.minibatch(batchsize=batchsize)
mini_batch_stream = M.get_or_create_stream('mini_batch')
mini_batch_tool.execute(sources=[data_stream], sink=mini_batch_stream,
interval=ti)
classifier_tool.execute(sources=[mini_batch_stream], sink=classifier_stream,
interval=ti)
scores = []
for key, value in classifier_stream.window():
scores.append(value['score'])
# The data is repeated the number of epochs. This makes the mini-batches to
# cycle and contain data from the begining and end of the dataset. This
# makes possible that the number of scores is not divisible by epochs.
if batchsize == 1:
print("Test scores per epoch")
scores = np.array(scores).reshape(epochs, -1)
print(scores.mean(axis=1).round(decimals=2))
else:
scores = np.array(scores).reshape(1,-1)
print("Test scores per minibatch (cyclic)")
print(scores.round(decimals=2))
if __name__ == '__main__':
arguments = get_arguments()
main(**vars(arguments))