Avik-Jain · upeskartik · Aug 31, 2018
diff --git a/Code/Day 1_Data PreProcessing.md b/Code/Day 1_Data PreProcessing.md
@@ -20,6 +20,10 @@ Y = dataset.iloc[ : , 3].values
 ## Step 3: Handling the missing data
 ```python
 from sklearn.preprocessing import Imputer
+#Imputer is used to fill the missing values, based on three different strategies,
+#1. Filling the mean value in the missing spaces.
+#2. Filling the Median value in the missing spaces.
+#3. Filling the most frequent values in the missing spaces
 imputer = Imputer(missing_values = "NaN", strategy = "mean", axis = 0)
 imputer = imputer.fit(X[ : , 1:3])
 X[ : , 1:3] = imputer.transform(X[ : , 1:3])
@@ -46,6 +50,7 @@ X_train, X_test, Y_train, Y_test = train_test_split( X , Y , test_size = 0.2, ra
 ## Step 6: Feature Scaling
 ```python
 from sklearn.preprocessing import StandardScaler
+#StandardScaler is used to transform the data so that mean will be zero and standard deviation will be 1.
 sc_X = StandardScaler()
 X_train = sc_X.fit_transform(X_train)
 X_test = sc_X.fit_transform(X_test)