From d08aff1e9ec1f8be403d069b2ab8a74cfdf58a46 Mon Sep 17 00:00:00 2001
From: Anuragsarkar12 <sarkaranurag2321@gmail.com>
Date: Tue, 21 May 2024 20:59:54 +0530
Subject: [PATCH] Performed feature selection

---
 .../diabetesclassification-checkpoint.ipynb   | 213 +++++++++++-------
 .../diabetesclassification.ipynb              | 213 +++++++++++-------
 2 files changed, 258 insertions(+), 168 deletions(-)

diff --git a/Diabetes Classification/.ipynb_checkpoints/diabetesclassification-checkpoint.ipynb b/Diabetes Classification/.ipynb_checkpoints/diabetesclassification-checkpoint.ipynb
index 0f854e2..fc0e11b 100644
--- a/Diabetes Classification/.ipynb_checkpoints/diabetesclassification-checkpoint.ipynb	
+++ b/Diabetes Classification/.ipynb_checkpoints/diabetesclassification-checkpoint.ipynb	
@@ -10,7 +10,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 36,
+   "execution_count": 2,
    "id": "312c95a1",
    "metadata": {},
    "outputs": [],
@@ -29,7 +29,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 37,
+   "execution_count": 3,
    "id": "aea1b45b",
    "metadata": {},
    "outputs": [],
@@ -47,7 +47,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 38,
+   "execution_count": 4,
    "id": "2ac5c17b",
    "metadata": {},
    "outputs": [
@@ -275,7 +275,7 @@
        "[5132 rows x 11 columns]"
       ]
      },
-     "execution_count": 38,
+     "execution_count": 4,
      "metadata": {},
      "output_type": "execute_result"
     }
@@ -286,7 +286,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 39,
+   "execution_count": 5,
    "id": "c4484360",
    "metadata": {},
    "outputs": [
@@ -408,7 +408,7 @@
        "4           4   50      F   24   3.6  1.3  0.9  2.1  50.0  2.0          0"
       ]
      },
-     "execution_count": 39,
+     "execution_count": 5,
      "metadata": {},
      "output_type": "execute_result"
     }
@@ -427,7 +427,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 40,
+   "execution_count": 6,
    "id": "ea7c6dcb",
    "metadata": {},
    "outputs": [
@@ -437,7 +437,7 @@
        "(5132, 11)"
       ]
      },
-     "execution_count": 40,
+     "execution_count": 6,
      "metadata": {},
      "output_type": "execute_result"
     }
@@ -456,7 +456,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 41,
+   "execution_count": 7,
    "id": "a3c2ff78",
    "metadata": {},
    "outputs": [
@@ -499,7 +499,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 42,
+   "execution_count": 8,
    "id": "a9cc7614",
    "metadata": {},
    "outputs": [],
@@ -510,7 +510,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 43,
+   "execution_count": 9,
    "id": "cdee3c52",
    "metadata": {
     "scrolled": true
@@ -554,7 +554,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 44,
+   "execution_count": 10,
    "id": "2f353609",
    "metadata": {},
    "outputs": [
@@ -572,7 +572,7 @@
        "Name: Age, dtype: float64"
       ]
      },
-     "execution_count": 44,
+     "execution_count": 10,
      "metadata": {},
      "output_type": "execute_result"
     }
@@ -583,7 +583,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 45,
+   "execution_count": 11,
    "id": "b7887949",
    "metadata": {},
    "outputs": [
@@ -601,7 +601,7 @@
        "Name: BMI, dtype: float64"
       ]
      },
-     "execution_count": 45,
+     "execution_count": 11,
      "metadata": {},
      "output_type": "execute_result"
     }
@@ -612,7 +612,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 46,
+   "execution_count": 12,
    "id": "fdf468b3",
    "metadata": {},
    "outputs": [
@@ -630,7 +630,7 @@
        "Name: Chol, dtype: float64"
       ]
      },
-     "execution_count": 46,
+     "execution_count": 12,
      "metadata": {},
      "output_type": "execute_result"
     }
@@ -641,7 +641,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 47,
+   "execution_count": 13,
    "id": "7877549d",
    "metadata": {},
    "outputs": [
@@ -659,7 +659,7 @@
        "Name: TG, dtype: float64"
       ]
      },
-     "execution_count": 47,
+     "execution_count": 13,
      "metadata": {},
      "output_type": "execute_result"
     }
@@ -670,7 +670,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 48,
+   "execution_count": 14,
    "id": "71ec5fcb",
    "metadata": {},
    "outputs": [
@@ -688,7 +688,7 @@
        "Name: HDL, dtype: float64"
       ]
      },
-     "execution_count": 48,
+     "execution_count": 14,
      "metadata": {},
      "output_type": "execute_result"
     }
@@ -699,7 +699,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 49,
+   "execution_count": 15,
    "id": "bbd3682c",
    "metadata": {},
    "outputs": [
@@ -717,7 +717,7 @@
        "Name: LDL, dtype: float64"
       ]
      },
-     "execution_count": 49,
+     "execution_count": 15,
      "metadata": {},
      "output_type": "execute_result"
     }
@@ -728,7 +728,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 50,
+   "execution_count": 16,
    "id": "c4603e52",
    "metadata": {},
    "outputs": [
@@ -746,7 +746,7 @@
        "Name: Cr, dtype: float64"
       ]
      },
-     "execution_count": 50,
+     "execution_count": 16,
      "metadata": {},
      "output_type": "execute_result"
     }
@@ -757,7 +757,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 51,
+   "execution_count": 17,
    "id": "f98b84c7",
    "metadata": {},
    "outputs": [
@@ -775,7 +775,7 @@
        "Name: BUN, dtype: float64"
       ]
      },
-     "execution_count": 51,
+     "execution_count": 17,
      "metadata": {},
      "output_type": "execute_result"
     }
@@ -794,7 +794,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 52,
+   "execution_count": 18,
    "id": "a9add765",
    "metadata": {},
    "outputs": [
@@ -822,7 +822,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 53,
+   "execution_count": 19,
    "id": "752e448c",
    "metadata": {},
    "outputs": [],
@@ -835,7 +835,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 54,
+   "execution_count": 20,
    "id": "a8441035",
    "metadata": {},
    "outputs": [],
@@ -846,7 +846,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 55,
+   "execution_count": 21,
    "id": "378ecf72",
    "metadata": {},
    "outputs": [
@@ -892,7 +892,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 56,
+   "execution_count": 22,
    "id": "39477c2a",
    "metadata": {},
    "outputs": [
@@ -953,7 +953,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 57,
+   "execution_count": 23,
    "id": "651dd699",
    "metadata": {},
    "outputs": [],
@@ -969,7 +969,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 58,
+   "execution_count": 24,
    "id": "e1801370",
    "metadata": {},
    "outputs": [],
@@ -979,7 +979,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 59,
+   "execution_count": 25,
    "id": "eb6127cc",
    "metadata": {},
    "outputs": [
@@ -1089,7 +1089,7 @@
        "4   50  24.0   3.6  1.3  0.9  2.1  50.0  2.0          0"
       ]
      },
-     "execution_count": 59,
+     "execution_count": 25,
      "metadata": {},
      "output_type": "execute_result"
     }
@@ -1100,7 +1100,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 60,
+   "execution_count": 26,
    "id": "12304d13",
    "metadata": {},
    "outputs": [],
@@ -1111,7 +1111,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 61,
+   "execution_count": 27,
    "id": "dfd08af6",
    "metadata": {},
    "outputs": [],
@@ -1144,7 +1144,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 62,
+   "execution_count": 28,
    "id": "237cd728",
    "metadata": {},
    "outputs": [
@@ -1154,7 +1154,7 @@
        "['Chol', 'Cr', 'BUN']"
       ]
      },
-     "execution_count": 62,
+     "execution_count": 28,
      "metadata": {},
      "output_type": "execute_result"
     }
@@ -1173,17 +1173,17 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 63,
+   "execution_count": 29,
    "id": "40e0c236",
    "metadata": {},
    "outputs": [],
    "source": [
-    "data_new_kbest=data_new.drop(['Chol','Cr','BUN'],axis=1)"
+    "data_new_kbest=data_new.drop(['Chol','Cr','BUN'],axis"
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": 64,
+   "execution_count": 30,
    "id": "6f5836ec",
    "metadata": {},
    "outputs": [
@@ -1338,7 +1338,7 @@
        "[5132 rows x 6 columns]"
       ]
      },
-     "execution_count": 64,
+     "execution_count": 30,
      "metadata": {},
      "output_type": "execute_result"
     }
@@ -1357,33 +1357,51 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 66,
+   "execution_count": 40,
    "id": "67e27ab3",
    "metadata": {},
+   "outputs": [],
+   "source": [
+    "y_f= data_new_kbest['Diagnosis']\n",
+    "X_f=data_new_kbest.drop(['Diagnosis'],axis=1)\n",
+    "X_train, X_test, y_train, y_test = train_test_split(X_f, y_f, test_size=0.2, random_state=42)\n",
+    "from sklearn.model_selection import GridSearchCV, RandomizedSearchCV\n",
+    "from sklearn.ensemble import RandomForestClassifier\n",
+    "param_grid = {\n",
+    "    'n_estimators': [100, 200, 500],\n",
+    "    'max_depth': [3, 5, 8],\n",
+    "    'min_samples_split': [2, 5, 10],\n",
+    "    'min_samples_leaf': [1, 2, 4],\n",
+    "    'max_features': [5, 'sqrt', 'log2']\n",
+    "}\n",
+    "search = RandomizedSearchCV(RandomForestClassifier(), param_grid, cv=5) \n",
+    "search.fit(X_train, y_train)\n",
+    "best_model = search.best_estimator_\n",
+    "best_score = search.best_score_\n",
+    "\n",
+    "\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 41,
+   "id": "476a4ae1",
+   "metadata": {},
    "outputs": [
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Accuracy: 0.8198636806231743\n",
-      "[[536  68]\n",
-      " [117 306]]\n"
+      "Best Model: RandomForestClassifier(max_depth=8, max_features='log2', min_samples_leaf=2,\n",
+      "                       n_estimators=200)\n",
+      "Best Score: 0.8285018270401949\n"
      ]
     }
    ],
    "source": [
-    "y_f= data_new_kbest['Diagnosis']\n",
-    "X_f=data_new_kbest.drop(['Diagnosis'],axis=1)\n",
-    "X_train, X_test, y_train, y_test = train_test_split(X_f, y_f, test_size=0.2, random_state=42)\n",
-    "from sklearn.ensemble import RandomForestClassifier\n",
-    "rfc = RandomForestClassifier(n_estimators=100)  \n",
-    "rfc.fit(X_train, y_train)\n",
-    "y_pred = rfc.predict(X_test)\n",
-    "accuracy = accuracy_score(y_test, y_pred)\n",
-    "print(\"Accuracy:\", accuracy)\n",
-    "cm = confusion_matrix(y_test, y_pred)\n",
-    "print(cm)\n",
-    "\n"
+    "\n",
+    "print(\"Best Model:\", search.best_estimator_)\n",
+    "print(\"Best Score:\", search.best_score_)\n"
    ]
   },
   {
@@ -1391,14 +1409,16 @@
    "id": "8e6b9595",
    "metadata": {},
    "source": [
-    "Applying a random forest classifier on the dataset obtained by using f-statistic in selectkbest, we also bobserve the model accuracy and the confusion matrix"
+    "Applying a random forest classifier along with hyperparameter tuning on the dataset obtained by using f-statistic in selectkbest, we also bobserve the model accuracy "
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": 69,
+   "execution_count": 32,
    "id": "f63ffb4d",
-   "metadata": {},
+   "metadata": {
+    "scrolled": true
+   },
    "outputs": [
     {
      "name": "stdout",
@@ -1433,7 +1453,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 74,
+   "execution_count": 33,
    "id": "22dbb42e",
    "metadata": {},
    "outputs": [],
@@ -1443,7 +1463,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 75,
+   "execution_count": 42,
    "id": "d96d1966",
    "metadata": {},
    "outputs": [
@@ -1634,7 +1654,7 @@
        "[5132 rows x 9 columns]"
       ]
      },
-     "execution_count": 75,
+     "execution_count": 42,
      "metadata": {},
      "output_type": "execute_result"
     }
@@ -1653,32 +1673,49 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 76,
+   "execution_count": 43,
    "id": "084425a3",
    "metadata": {},
+   "outputs": [],
+   "source": [
+    "y_rfecv= data_new_rfecv['Diagnosis']\n",
+    "X_rfecv=data_new_rfecv.drop(['Diagnosis'],axis=1)\n",
+    "X_train, X_test, y_train, y_test = train_test_split(X_rfecv, y_rfecv, test_size=0.2, random_state=42)\n",
+    "from sklearn.model_selection import GridSearchCV, RandomizedSearchCV\n",
+    "from sklearn.ensemble import RandomForestClassifier\n",
+    "param_grid = {\n",
+    "    'n_estimators': [100, 200, 500],\n",
+    "    'max_depth': [3, 5, 8],\n",
+    "    'min_samples_split': [2, 5, 10],\n",
+    "    'min_samples_leaf': [1, 2, 4],\n",
+    "    'max_features': [5, 'sqrt', 'log2']\n",
+    "}\n",
+    "search = RandomizedSearchCV(RandomForestClassifier(), param_grid, cv=5) \n",
+    "search.fit(X_train, y_train)\n",
+    "best_model = search.best_estimator_\n",
+    "best_score = search.best_score_\n",
+    "\n",
+    "\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 44,
+   "id": "8ce4d337",
+   "metadata": {},
    "outputs": [
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Accuracy: 0.818889970788705\n",
-      "[[539  65]\n",
-      " [121 302]]\n"
+      "Best Model: RandomForestClassifier(max_depth=5, max_features='log2', n_estimators=200)\n",
+      "Best Score: 0.8294762484774665\n"
      ]
     }
    ],
    "source": [
-    "y_rfecv= data_new_rfecv['Diagnosis']\n",
-    "X_rfecv=data_new_rfecv.drop(['Diagnosis'],axis=1)\n",
-    "X_train, X_test, y_train, y_test = train_test_split(X_rfecv, y_rfecv, test_size=0.2, random_state=42)\n",
-    "from sklearn.ensemble import RandomForestClassifier\n",
-    "rfc = RandomForestClassifier(n_estimators=100)  \n",
-    "rfc.fit(X_train, y_train)\n",
-    "y_pred = rfc.predict(X_test)\n",
-    "accuracy = accuracy_score(y_test, y_pred)\n",
-    "print(\"Accuracy:\", accuracy)\n",
-    "cm = confusion_matrix(y_test, y_pred)\n",
-    "print(cm)"
+    "print(\"Best Model:\", search.best_estimator_)\n",
+    "print(\"Best Score:\", search.best_score_)\n"
    ]
   },
   {
@@ -1686,7 +1723,7 @@
    "id": "a52ed238",
    "metadata": {},
    "source": [
-    "Applying a random forest classifier on the dataset obtained by using RFECV, we also bobserve the model accuracy and the confusion matrix"
+    "Applying a random forest classifier along with hyperparameter tuning on the dataset obtained by using RFECV, we also observe the model accuracy "
    ]
   },
   {
@@ -1694,16 +1731,24 @@
    "id": "59349ad6",
    "metadata": {},
    "source": [
-    "Since, we observe that the accuracy score of the model trained on datset generated by selectkbest is slightly higher that that trained by dataset generate by rfecv, hence we will use the 5 features selected by selectkbest as they have the most significant relationship with diagnosis"
+    "Thus, we are able to compare the accuracy of two rfc models with hyperparameter tuning trained on datasets developed by applying two different feature selection methods"
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 36,
    "id": "82916004",
    "metadata": {},
    "outputs": [],
    "source": []
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "acb0a472",
+   "metadata": {},
+   "outputs": [],
+   "source": []
   }
  ],
  "metadata": {
diff --git a/Diabetes Classification/diabetesclassification.ipynb b/Diabetes Classification/diabetesclassification.ipynb
index 0f854e2..fc0e11b 100644
--- a/Diabetes Classification/diabetesclassification.ipynb	
+++ b/Diabetes Classification/diabetesclassification.ipynb	
@@ -10,7 +10,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 36,
+   "execution_count": 2,
    "id": "312c95a1",
    "metadata": {},
    "outputs": [],
@@ -29,7 +29,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 37,
+   "execution_count": 3,
    "id": "aea1b45b",
    "metadata": {},
    "outputs": [],
@@ -47,7 +47,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 38,
+   "execution_count": 4,
    "id": "2ac5c17b",
    "metadata": {},
    "outputs": [
@@ -275,7 +275,7 @@
        "[5132 rows x 11 columns]"
       ]
      },
-     "execution_count": 38,
+     "execution_count": 4,
      "metadata": {},
      "output_type": "execute_result"
     }
@@ -286,7 +286,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 39,
+   "execution_count": 5,
    "id": "c4484360",
    "metadata": {},
    "outputs": [
@@ -408,7 +408,7 @@
        "4           4   50      F   24   3.6  1.3  0.9  2.1  50.0  2.0          0"
       ]
      },
-     "execution_count": 39,
+     "execution_count": 5,
      "metadata": {},
      "output_type": "execute_result"
     }
@@ -427,7 +427,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 40,
+   "execution_count": 6,
    "id": "ea7c6dcb",
    "metadata": {},
    "outputs": [
@@ -437,7 +437,7 @@
        "(5132, 11)"
       ]
      },
-     "execution_count": 40,
+     "execution_count": 6,
      "metadata": {},
      "output_type": "execute_result"
     }
@@ -456,7 +456,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 41,
+   "execution_count": 7,
    "id": "a3c2ff78",
    "metadata": {},
    "outputs": [
@@ -499,7 +499,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 42,
+   "execution_count": 8,
    "id": "a9cc7614",
    "metadata": {},
    "outputs": [],
@@ -510,7 +510,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 43,
+   "execution_count": 9,
    "id": "cdee3c52",
    "metadata": {
     "scrolled": true
@@ -554,7 +554,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 44,
+   "execution_count": 10,
    "id": "2f353609",
    "metadata": {},
    "outputs": [
@@ -572,7 +572,7 @@
        "Name: Age, dtype: float64"
       ]
      },
-     "execution_count": 44,
+     "execution_count": 10,
      "metadata": {},
      "output_type": "execute_result"
     }
@@ -583,7 +583,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 45,
+   "execution_count": 11,
    "id": "b7887949",
    "metadata": {},
    "outputs": [
@@ -601,7 +601,7 @@
        "Name: BMI, dtype: float64"
       ]
      },
-     "execution_count": 45,
+     "execution_count": 11,
      "metadata": {},
      "output_type": "execute_result"
     }
@@ -612,7 +612,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 46,
+   "execution_count": 12,
    "id": "fdf468b3",
    "metadata": {},
    "outputs": [
@@ -630,7 +630,7 @@
        "Name: Chol, dtype: float64"
       ]
      },
-     "execution_count": 46,
+     "execution_count": 12,
      "metadata": {},
      "output_type": "execute_result"
     }
@@ -641,7 +641,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 47,
+   "execution_count": 13,
    "id": "7877549d",
    "metadata": {},
    "outputs": [
@@ -659,7 +659,7 @@
        "Name: TG, dtype: float64"
       ]
      },
-     "execution_count": 47,
+     "execution_count": 13,
      "metadata": {},
      "output_type": "execute_result"
     }
@@ -670,7 +670,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 48,
+   "execution_count": 14,
    "id": "71ec5fcb",
    "metadata": {},
    "outputs": [
@@ -688,7 +688,7 @@
        "Name: HDL, dtype: float64"
       ]
      },
-     "execution_count": 48,
+     "execution_count": 14,
      "metadata": {},
      "output_type": "execute_result"
     }
@@ -699,7 +699,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 49,
+   "execution_count": 15,
    "id": "bbd3682c",
    "metadata": {},
    "outputs": [
@@ -717,7 +717,7 @@
        "Name: LDL, dtype: float64"
       ]
      },
-     "execution_count": 49,
+     "execution_count": 15,
      "metadata": {},
      "output_type": "execute_result"
     }
@@ -728,7 +728,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 50,
+   "execution_count": 16,
    "id": "c4603e52",
    "metadata": {},
    "outputs": [
@@ -746,7 +746,7 @@
        "Name: Cr, dtype: float64"
       ]
      },
-     "execution_count": 50,
+     "execution_count": 16,
      "metadata": {},
      "output_type": "execute_result"
     }
@@ -757,7 +757,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 51,
+   "execution_count": 17,
    "id": "f98b84c7",
    "metadata": {},
    "outputs": [
@@ -775,7 +775,7 @@
        "Name: BUN, dtype: float64"
       ]
      },
-     "execution_count": 51,
+     "execution_count": 17,
      "metadata": {},
      "output_type": "execute_result"
     }
@@ -794,7 +794,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 52,
+   "execution_count": 18,
    "id": "a9add765",
    "metadata": {},
    "outputs": [
@@ -822,7 +822,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 53,
+   "execution_count": 19,
    "id": "752e448c",
    "metadata": {},
    "outputs": [],
@@ -835,7 +835,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 54,
+   "execution_count": 20,
    "id": "a8441035",
    "metadata": {},
    "outputs": [],
@@ -846,7 +846,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 55,
+   "execution_count": 21,
    "id": "378ecf72",
    "metadata": {},
    "outputs": [
@@ -892,7 +892,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 56,
+   "execution_count": 22,
    "id": "39477c2a",
    "metadata": {},
    "outputs": [
@@ -953,7 +953,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 57,
+   "execution_count": 23,
    "id": "651dd699",
    "metadata": {},
    "outputs": [],
@@ -969,7 +969,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 58,
+   "execution_count": 24,
    "id": "e1801370",
    "metadata": {},
    "outputs": [],
@@ -979,7 +979,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 59,
+   "execution_count": 25,
    "id": "eb6127cc",
    "metadata": {},
    "outputs": [
@@ -1089,7 +1089,7 @@
        "4   50  24.0   3.6  1.3  0.9  2.1  50.0  2.0          0"
       ]
      },
-     "execution_count": 59,
+     "execution_count": 25,
      "metadata": {},
      "output_type": "execute_result"
     }
@@ -1100,7 +1100,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 60,
+   "execution_count": 26,
    "id": "12304d13",
    "metadata": {},
    "outputs": [],
@@ -1111,7 +1111,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 61,
+   "execution_count": 27,
    "id": "dfd08af6",
    "metadata": {},
    "outputs": [],
@@ -1144,7 +1144,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 62,
+   "execution_count": 28,
    "id": "237cd728",
    "metadata": {},
    "outputs": [
@@ -1154,7 +1154,7 @@
        "['Chol', 'Cr', 'BUN']"
       ]
      },
-     "execution_count": 62,
+     "execution_count": 28,
      "metadata": {},
      "output_type": "execute_result"
     }
@@ -1173,17 +1173,17 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 63,
+   "execution_count": 29,
    "id": "40e0c236",
    "metadata": {},
    "outputs": [],
    "source": [
-    "data_new_kbest=data_new.drop(['Chol','Cr','BUN'],axis=1)"
+    "data_new_kbest=data_new.drop(['Chol','Cr','BUN'],axis"
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": 64,
+   "execution_count": 30,
    "id": "6f5836ec",
    "metadata": {},
    "outputs": [
@@ -1338,7 +1338,7 @@
        "[5132 rows x 6 columns]"
       ]
      },
-     "execution_count": 64,
+     "execution_count": 30,
      "metadata": {},
      "output_type": "execute_result"
     }
@@ -1357,33 +1357,51 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 66,
+   "execution_count": 40,
    "id": "67e27ab3",
    "metadata": {},
+   "outputs": [],
+   "source": [
+    "y_f= data_new_kbest['Diagnosis']\n",
+    "X_f=data_new_kbest.drop(['Diagnosis'],axis=1)\n",
+    "X_train, X_test, y_train, y_test = train_test_split(X_f, y_f, test_size=0.2, random_state=42)\n",
+    "from sklearn.model_selection import GridSearchCV, RandomizedSearchCV\n",
+    "from sklearn.ensemble import RandomForestClassifier\n",
+    "param_grid = {\n",
+    "    'n_estimators': [100, 200, 500],\n",
+    "    'max_depth': [3, 5, 8],\n",
+    "    'min_samples_split': [2, 5, 10],\n",
+    "    'min_samples_leaf': [1, 2, 4],\n",
+    "    'max_features': [5, 'sqrt', 'log2']\n",
+    "}\n",
+    "search = RandomizedSearchCV(RandomForestClassifier(), param_grid, cv=5) \n",
+    "search.fit(X_train, y_train)\n",
+    "best_model = search.best_estimator_\n",
+    "best_score = search.best_score_\n",
+    "\n",
+    "\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 41,
+   "id": "476a4ae1",
+   "metadata": {},
    "outputs": [
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Accuracy: 0.8198636806231743\n",
-      "[[536  68]\n",
-      " [117 306]]\n"
+      "Best Model: RandomForestClassifier(max_depth=8, max_features='log2', min_samples_leaf=2,\n",
+      "                       n_estimators=200)\n",
+      "Best Score: 0.8285018270401949\n"
      ]
     }
    ],
    "source": [
-    "y_f= data_new_kbest['Diagnosis']\n",
-    "X_f=data_new_kbest.drop(['Diagnosis'],axis=1)\n",
-    "X_train, X_test, y_train, y_test = train_test_split(X_f, y_f, test_size=0.2, random_state=42)\n",
-    "from sklearn.ensemble import RandomForestClassifier\n",
-    "rfc = RandomForestClassifier(n_estimators=100)  \n",
-    "rfc.fit(X_train, y_train)\n",
-    "y_pred = rfc.predict(X_test)\n",
-    "accuracy = accuracy_score(y_test, y_pred)\n",
-    "print(\"Accuracy:\", accuracy)\n",
-    "cm = confusion_matrix(y_test, y_pred)\n",
-    "print(cm)\n",
-    "\n"
+    "\n",
+    "print(\"Best Model:\", search.best_estimator_)\n",
+    "print(\"Best Score:\", search.best_score_)\n"
    ]
   },
   {
@@ -1391,14 +1409,16 @@
    "id": "8e6b9595",
    "metadata": {},
    "source": [
-    "Applying a random forest classifier on the dataset obtained by using f-statistic in selectkbest, we also bobserve the model accuracy and the confusion matrix"
+    "Applying a random forest classifier along with hyperparameter tuning on the dataset obtained by using f-statistic in selectkbest, we also bobserve the model accuracy "
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": 69,
+   "execution_count": 32,
    "id": "f63ffb4d",
-   "metadata": {},
+   "metadata": {
+    "scrolled": true
+   },
    "outputs": [
     {
      "name": "stdout",
@@ -1433,7 +1453,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 74,
+   "execution_count": 33,
    "id": "22dbb42e",
    "metadata": {},
    "outputs": [],
@@ -1443,7 +1463,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 75,
+   "execution_count": 42,
    "id": "d96d1966",
    "metadata": {},
    "outputs": [
@@ -1634,7 +1654,7 @@
        "[5132 rows x 9 columns]"
       ]
      },
-     "execution_count": 75,
+     "execution_count": 42,
      "metadata": {},
      "output_type": "execute_result"
     }
@@ -1653,32 +1673,49 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 76,
+   "execution_count": 43,
    "id": "084425a3",
    "metadata": {},
+   "outputs": [],
+   "source": [
+    "y_rfecv= data_new_rfecv['Diagnosis']\n",
+    "X_rfecv=data_new_rfecv.drop(['Diagnosis'],axis=1)\n",
+    "X_train, X_test, y_train, y_test = train_test_split(X_rfecv, y_rfecv, test_size=0.2, random_state=42)\n",
+    "from sklearn.model_selection import GridSearchCV, RandomizedSearchCV\n",
+    "from sklearn.ensemble import RandomForestClassifier\n",
+    "param_grid = {\n",
+    "    'n_estimators': [100, 200, 500],\n",
+    "    'max_depth': [3, 5, 8],\n",
+    "    'min_samples_split': [2, 5, 10],\n",
+    "    'min_samples_leaf': [1, 2, 4],\n",
+    "    'max_features': [5, 'sqrt', 'log2']\n",
+    "}\n",
+    "search = RandomizedSearchCV(RandomForestClassifier(), param_grid, cv=5) \n",
+    "search.fit(X_train, y_train)\n",
+    "best_model = search.best_estimator_\n",
+    "best_score = search.best_score_\n",
+    "\n",
+    "\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 44,
+   "id": "8ce4d337",
+   "metadata": {},
    "outputs": [
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Accuracy: 0.818889970788705\n",
-      "[[539  65]\n",
-      " [121 302]]\n"
+      "Best Model: RandomForestClassifier(max_depth=5, max_features='log2', n_estimators=200)\n",
+      "Best Score: 0.8294762484774665\n"
      ]
     }
    ],
    "source": [
-    "y_rfecv= data_new_rfecv['Diagnosis']\n",
-    "X_rfecv=data_new_rfecv.drop(['Diagnosis'],axis=1)\n",
-    "X_train, X_test, y_train, y_test = train_test_split(X_rfecv, y_rfecv, test_size=0.2, random_state=42)\n",
-    "from sklearn.ensemble import RandomForestClassifier\n",
-    "rfc = RandomForestClassifier(n_estimators=100)  \n",
-    "rfc.fit(X_train, y_train)\n",
-    "y_pred = rfc.predict(X_test)\n",
-    "accuracy = accuracy_score(y_test, y_pred)\n",
-    "print(\"Accuracy:\", accuracy)\n",
-    "cm = confusion_matrix(y_test, y_pred)\n",
-    "print(cm)"
+    "print(\"Best Model:\", search.best_estimator_)\n",
+    "print(\"Best Score:\", search.best_score_)\n"
    ]
   },
   {
@@ -1686,7 +1723,7 @@
    "id": "a52ed238",
    "metadata": {},
    "source": [
-    "Applying a random forest classifier on the dataset obtained by using RFECV, we also bobserve the model accuracy and the confusion matrix"
+    "Applying a random forest classifier along with hyperparameter tuning on the dataset obtained by using RFECV, we also observe the model accuracy "
    ]
   },
   {
@@ -1694,16 +1731,24 @@
    "id": "59349ad6",
    "metadata": {},
    "source": [
-    "Since, we observe that the accuracy score of the model trained on datset generated by selectkbest is slightly higher that that trained by dataset generate by rfecv, hence we will use the 5 features selected by selectkbest as they have the most significant relationship with diagnosis"
+    "Thus, we are able to compare the accuracy of two rfc models with hyperparameter tuning trained on datasets developed by applying two different feature selection methods"
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 36,
    "id": "82916004",
    "metadata": {},
    "outputs": [],
    "source": []
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "acb0a472",
+   "metadata": {},
+   "outputs": [],
+   "source": []
   }
  ],
  "metadata": {