forked from bnsreenu/python_for_microscopists
-
Notifications
You must be signed in to change notification settings - Fork 0
/
062-066-ML_06_04_TRAIN_ML_segmentation_All_filters_RForest.py
222 lines (160 loc) · 7.69 KB
/
062-066-ML_06_04_TRAIN_ML_segmentation_All_filters_RForest.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
#!/usr/bin/env python
__author__ = "Sreenivas Bhattiprolu"
__license__ = "Feel free to copy, I appreciate if you acknowledge Python for Microscopists"
# https://www.youtube.com/watch?v=OUCwt8loM6s
# https://www.youtube.com/watch?v=6yW31TT6-wA
# https://www.youtube.com/watch?v=XmRKkMjD8hM
# https://www.youtube.com/watch?v=FT64YzD1KQI
# https://www.youtube.com/watch?v=f205EmfXi84&t=324s
"""
@author: Sreenivas Bhattiprolu
"""
import numpy as np
import cv2
import pandas as pd
#img = cv2.imread('BSE_Image.jpg')
img = cv2.imread('images/Train_images/Sandstone_Versa0000.tif')
img = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
#Here, if you have multichannel image then extract the right channel instead of converting the image to grey.
#For example, if DAPI contains nuclei information, extract the DAPI channel image first.
#Multiple images can be used for training. For that, you need to concatenate the data
#Save original image pixels into a data frame. This is our Feature #1.
img2 = img.reshape(-1)
df = pd.DataFrame()
df['Original Image'] = img2
#Generate Gabor features
num = 1 #To count numbers up in order to give Gabor features a lable in the data frame
kernels = []
for theta in range(2): #Define number of thetas
theta = theta / 4. * np.pi
for sigma in (1, 3): #Sigma with 1 and 3
for lamda in np.arange(0, np.pi, np.pi / 4): #Range of wavelengths
for gamma in (0.05, 0.5): #Gamma values of 0.05 and 0.5
gabor_label = 'Gabor' + str(num) #Label Gabor columns as Gabor1, Gabor2, etc.
# print(gabor_label)
ksize=9
kernel = cv2.getGaborKernel((ksize, ksize), sigma, theta, lamda, gamma, 0, ktype=cv2.CV_32F)
kernels.append(kernel)
#Now filter the image and add values to a new column
fimg = cv2.filter2D(img2, cv2.CV_8UC3, kernel)
filtered_img = fimg.reshape(-1)
df[gabor_label] = filtered_img #Labels columns as Gabor1, Gabor2, etc.
print(gabor_label, ': theta=', theta, ': sigma=', sigma, ': lamda=', lamda, ': gamma=', gamma)
num += 1 #Increment for gabor column label
########################################
#Gerate OTHER FEATURES and add them to the data frame
#CANNY EDGE
edges = cv2.Canny(img, 100,200) #Image, min and max values
edges1 = edges.reshape(-1)
df['Canny Edge'] = edges1 #Add column to original dataframe
from skimage.filters import roberts, sobel, scharr, prewitt
#ROBERTS EDGE
edge_roberts = roberts(img)
edge_roberts1 = edge_roberts.reshape(-1)
df['Roberts'] = edge_roberts1
#SOBEL
edge_sobel = sobel(img)
edge_sobel1 = edge_sobel.reshape(-1)
df['Sobel'] = edge_sobel1
#SCHARR
edge_scharr = scharr(img)
edge_scharr1 = edge_scharr.reshape(-1)
df['Scharr'] = edge_scharr1
#PREWITT
edge_prewitt = prewitt(img)
edge_prewitt1 = edge_prewitt.reshape(-1)
df['Prewitt'] = edge_prewitt1
#GAUSSIAN with sigma=3
from scipy import ndimage as nd
gaussian_img = nd.gaussian_filter(img, sigma=3)
gaussian_img1 = gaussian_img.reshape(-1)
df['Gaussian s3'] = gaussian_img1
#GAUSSIAN with sigma=7
gaussian_img2 = nd.gaussian_filter(img, sigma=7)
gaussian_img3 = gaussian_img2.reshape(-1)
df['Gaussian s7'] = gaussian_img3
#MEDIAN with sigma=3
median_img = nd.median_filter(img, size=3)
median_img1 = median_img.reshape(-1)
df['Median s3'] = median_img1
#VARIANCE with size=3
variance_img = nd.generic_filter(img, np.var, size=3)
variance_img1 = variance_img.reshape(-1)
df['Variance s3'] = variance_img1 #Add column to original dataframe
######################################
#Now, add a column in the data frame for the Labels
#For this, we need to import the labeled image
labeled_img = cv2.imread('images/Train_masks/Sandstone_Versa0000.tif')
#Remember that you can load an image with partial labels
#But, drop the rows with unlabeled data
labeled_img = cv2.cvtColor(labeled_img, cv2.COLOR_BGR2GRAY)
labeled_img1 = labeled_img.reshape(-1)
df['Labels'] = labeled_img1
print(df.head())
#df.to_csv("Gabor.csv")
#########################################################
#Define the dependent variable that needs to be predicted (labels)
Y = df["Labels"].values
#Define the independent variables
X = df.drop(labels = ["Labels"], axis=1)
#Split data into train and test to verify accuracy after fitting the model.
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, Y, test_size=0.4, random_state=20)
# Import the model we are using
#RandomForestRegressor is for regression type of problems.
#For classification we use RandomForestClassifier.
#Both yield similar results except for regressor the result is float
#and for classifier it is an integer.
from sklearn.ensemble import RandomForestClassifier
# Instantiate model with n number of decision trees
model = RandomForestClassifier(n_estimators = 100, random_state = 42)
###
#SVM
# Train the Linear SVM to compare against Random Forest
#SVM will be slower than Random Forest.
#Make sure to comment out Fetaure importances lines of code as it does not apply to SVM.
#from sklearn.svm import LinearSVC
#model = LinearSVC(max_iter=100) #Default of 100 is not converging
# Train the model on training data
model.fit(X_train, y_train)
# verify number of trees used. If not defined above.
#print('Number of Trees used : ', model.n_estimators)
#STEP 8: TESTING THE MODEL BY PREDICTING ON TEST DATA
#AND CALCULATE THE ACCURACY SCORE
#First test prediction on the training data itself. SHould be good.
prediction_test_train = model.predict(X_train)
#Test prediction on testing data.
prediction_test = model.predict(X_test)
#.predict just takes the .predict_proba output and changes everything
#to 0 below a certain threshold (usually 0.5) respectively to 1 above that threshold.
#In this example we have 4 labels, so the probabilities will for each label stored separately.
#
#prediction_prob_test = model.predict_proba(X_test)
#Let us check the accuracy on test data
from sklearn import metrics
#Print the prediction accuracy
#First check the accuracy on training data. This will be higher than test data prediction accuracy.
print ("Accuracy on training data = ", metrics.accuracy_score(y_train, prediction_test_train))
#Check accuracy on test dataset. If this is too low compared to train it indicates overfitting on training data.
print ("Accuracy = ", metrics.accuracy_score(y_test, prediction_test))
#This part commented out for SVM testing. Uncomment for random forest.
#One amazing feature of Random forest is that it provides us info on feature importances
# Get numerical feature importances
#importances = list(model.feature_importances_)
#Let us print them into a nice format.
feature_list = list(X.columns)
feature_imp = pd.Series(model.feature_importances_,index=feature_list).sort_values(ascending=False)
print(feature_imp)
#You can store the model for future use. In fact, this is how you do machine elarning
#Train on training images, validate on test images and deploy the model on unknown images.
import pickle
#Save the trained model as pickle string to disk for future use
filename = "sandstone_model"
pickle.dump(model, open(filename, 'wb'))
#To test the model on future datasets
loaded_model = pickle.load(open(filename, 'rb'))
result = loaded_model.predict(X)
segmented = result.reshape((img.shape))
from matplotlib import pyplot as plt
plt.imshow(segmented, cmap ='jet')
plt.imsave('segmented_rock_RF_100_estim.jpg', segmented, cmap ='jet')