-
Notifications
You must be signed in to change notification settings - Fork 0
/
3.4.dl_tf_advance_hyperparameter_tuning_keras_tuner.py
94 lines (72 loc) · 3.61 KB
/
3.4.dl_tf_advance_hyperparameter_tuning_keras_tuner.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
#%% TBD (search is failing): Hyperparameter tuning for Multiclass classification
#reference https://github.com/keras-team/keras-tuner
# Restart the Spyder
import pandas as pd
import numpy as np
import tensorflow as tf
from kerastuner.tuners import RandomSearch
import os
from sklearn.metrics import accuracy_score, cohen_kappa_score, confusion_matrix, classification_report
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
import gc; gc.enable()
#Set PANDAS to show all columns in DataFrame
pd.set_option('display.max_columns', None)
#Set PANDAS to show all rows in DataFrame
pd.set_option('display.max_rows', None)
pd.set_option('precision', 2)
os.chdir("D:\\trainings\\tensorflow")
exec(open(os.path.abspath('tf_CommonUtils.py')).read())
tf.compat.v1.logging.set_verbosity(tf.compat.v1.logging.INFO)
# fix random seed for reproducibility
seed = 123; np.random.seed(seed); tf.compat.v1.set_random_seed(seed)
# Read data from https://www.kaggle.com/c/titanic/data
data = pd.read_csv("./data/kaggle_titanic_train_EncodedScaled.csv")
data.info()
LABEL = "Survived"
batch_size = 8
#Get list of independent features
ar_independent_features = np.setdiff1d(data.columns, LABEL)
#Segragate 85% and 15%
training_set ,test_set = train_test_split(data,test_size=0.15)
training_set.shape
training_set.head(2)
len_fea = len(ar_independent_features)
# Build the model for tunning
def build_model_for_tunning(hp):
model = tf.keras.models.Sequential() # same as tf.keras.Sequential()
model.add(tf.keras.layers.Dense(units=hp.Int('units', min_value=len_fea, max_value=10 * len_fea, step=len_fea), input_shape=(len_fea,), activation=tf.nn.relu))
model.add(tf.keras.layers.Dense(units=hp.Int('units', min_value=len_fea, max_value=10 * len_fea, step=len_fea), activation=tf.nn.relu))
model.add(tf.keras.layers.Dense(2, activation=tf.nn.softmax))
#Compile
model.compile(optimizer=tf.keras.optimizers.Adam(hp.Choice('learning_rate', values=[1e-2, 1e-3, 1e-4])), loss='sparse_categorical_crossentropy', metrics=['accuracy'])
return model
# def build_model_for_tunning
tuner = RandomSearch(
build_model_for_tunning,
objective='accuracy',
max_trials=10,
executions_per_trial=1, # to get results faster, set 1 (ideal 3) (single round of training for each model configuration).
directory='./model/kaggle_titanic/',
project_name='kaggle_titanic_tunning')
#print a summary of the search space:
tuner.search_space_summary()
# Train it
tuner.search(training_set[ar_independent_features].values, training_set[LABEL].values, epochs=10, batch_size=batch_size, use_multiprocessing= False)
#summary of the results
tuner.results_summary()
#best model
model = tuner.get_best_models(num_models=1)[0]
# Evaluate on test data
model.evaluate(test_set[ar_independent_features].values, test_set[LABEL].values, verbose = 0)
# loss value & metrics values: [0.45, 0.79]
#Making Predictions
predictions = model.predict(x=test_set[ar_independent_features].values)
# Extracting max probability
predictions_number = np.array([])
for row_num in range(predictions.shape[0]): # row_num = 0
predictions_number = np.append(predictions_number, np.argmax(predictions[row_num]))
#Statistics are also available as follows
print("Overall Accuracy is ", round(accuracy_score(test_set[LABEL].values, predictions_number), 2),", Kappa is ", round(abs(cohen_kappa_score(test_set[LABEL].values, predictions_number)), 2))
#Overall Accuracy is 0.76 , Kappa is 0.51
del(data, training_set, test_set, predictions, predictions_number); gc.collect()