diff --git a/Metabolic Syndrome Prediction/metabolic_syndrome_predict.ipynb b/Metabolic Syndrome Prediction/metabolic_syndrome_predict.ipynb index 0ffbf113..8bbddf4b 100644 --- a/Metabolic Syndrome Prediction/metabolic_syndrome_predict.ipynb +++ b/Metabolic Syndrome Prediction/metabolic_syndrome_predict.ipynb @@ -2323,7 +2323,7 @@ }, { "cell_type": "code", - "execution_count": 40, + "execution_count": 2, "metadata": {}, "outputs": [], "source": [ @@ -2345,7 +2345,7 @@ }, { "cell_type": "code", - "execution_count": 41, + "execution_count": 3, "metadata": {}, "outputs": [], "source": [ @@ -2835,7 +2835,7 @@ }, { "cell_type": "code", - "execution_count": 49, + "execution_count": 4, "metadata": {}, "outputs": [], "source": [ @@ -3928,7 +3928,7 @@ }, { "cell_type": "code", - "execution_count": 61, + "execution_count": 44, "metadata": {}, "outputs": [], "source": [ @@ -3938,7 +3938,7 @@ }, { "cell_type": "code", - "execution_count": 62, + "execution_count": 45, "metadata": {}, "outputs": [ { @@ -4113,7 +4113,7 @@ "[2401 rows x 7 columns]" ] }, - "execution_count": 62, + "execution_count": 45, "metadata": {}, "output_type": "execute_result" } @@ -4154,7 +4154,7 @@ }, { "cell_type": "code", - "execution_count": 63, + "execution_count": 46, "metadata": {}, "outputs": [ { @@ -4210,7 +4210,7 @@ }, { "cell_type": "code", - "execution_count": 64, + "execution_count": 47, "metadata": {}, "outputs": [ { @@ -4282,18 +4282,18 @@ }, { "cell_type": "code", - "execution_count": 93, + "execution_count": 50, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ - "No information: defaultdict(, {1: 47, 3: 48, 2: 27, 7: 10, 4: 29})\n", + "No information: defaultdict(, {3: 38, 1: 47, 4: 28, 2: 25, 7: 7})\n", "\n", - "Majority information: defaultdict(, {5: 100, 6: 100, 10: 100, 11: 100, 12: 100, 0: 4, 9: 1})\n", + "Majority information: defaultdict(, {5: 100, 6: 100, 10: 100, 11: 100, 12: 100, 0: 2, 9: 2})\n", "\n", - "Max information: defaultdict(, {5: 28, 10: 67, 12: 5})\n" + "Max information: defaultdict(, {10: 70, 5: 28, 12: 2})\n" ] } ], @@ -4335,7 +4335,7 @@ }, { "cell_type": "code", - "execution_count": 86, + "execution_count": 49, "metadata": {}, "outputs": [ { @@ -4347,7 +4347,7 @@ " dtype='object')" ] }, - "execution_count": 86, + "execution_count": 49, "metadata": {}, "output_type": "execute_result" } @@ -4364,10 +4364,10 @@ "\n", "after a **Hundred iterations** it was found that the following features show no importance with a score of 0:\n", "\n", - "* *'Income'*\n", - "\n", "* *'Sex'*\n", "\n", + "* *'Income'*\n", + "\n", "* *'Race'*\n", "\n", "* *'Marital'*\n", @@ -4408,6 +4408,896 @@ "\n", "* ***'WaistCirc'***" ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "---" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Metabolic Syndrome Prediction | 4. ML-based Feature Importance\n", + "\n", + "-> XGBoost (eXtreme Gradient Boosting model)\n", + "\n", + "-> Random Forest\n", + "\n", + "-> Decision Tree\n", + "\n", + "-> LGBM (Light Gradient Boosting model)\n", + "\n", + "-> CatBoost (Categorical Boosting model)\n", + "\n", + "-> Extra Trees\n", + "\n", + "-> AdaBoost (Adaptive Boosting)\n", + "\n", + "-> Gradient Boosting model" + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "metadata": {}, + "outputs": [], + "source": [ + "x = df.copy().drop(columns=['MetabolicSyndrome'],axis = 1)\n", + "y = df.copy()['MetabolicSyndrome']" + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "x_train: (1680, 13)\n", + "y_train: (1680,)\n", + "x_test: (721, 13)\n", + "y_test: (721,)\n" + ] + } + ], + "source": [ + "from sklearn.model_selection import train_test_split\n", + "\n", + "x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.3)\n", + "\n", + "print(\"x_train:\", x_train.shape)\n", + "print(\"y_train:\", y_train.shape)\n", + "print(\"x_test:\", x_test.shape)\n", + "print(\"y_test:\", y_test.shape)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "We judge features as follows : Features are important if:\n", + "\n", + "Feature Importance > 5% importance\n", + "\n", + "100+ importance count" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "---" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## 1. XGBoost (eXtreme Gradient Boosting model)" + ] + }, + { + "cell_type": "code", + "execution_count": 11, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Accuracy: 0.9098474341192788\n" + ] + }, + { + "data": { + "text/plain": [ + "array([ 5.6163044, 8.819067 , 1.7450701, 2.8455133, 2.0169516,\n", + " 9.7672415, 9.607102 , 2.2360196, 3.8645415, 2.8161535,\n", + " 21.942804 , 7.282089 , 21.441141 ], dtype=float32)" + ] + }, + "execution_count": 11, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "from xgboost import XGBClassifier\n", + "from sklearn.metrics import accuracy_score\n", + "\n", + "model_params = {\n", + " 'objective': 'binary:logistic',\n", + " 'n_estimators': 100,\n", + " 'learning_rate': 0.1,\n", + " 'max_depth': 3,\n", + " 'subsample': 0.8,\n", + " 'colsample_bytree': 0.8, \n", + " 'enable_categorical' : True\n", + "}\n", + "\n", + "model = XGBClassifier(**model_params)\n", + "\n", + "model.fit(x_train, y_train) # training model\n", + "\n", + "y_pred = model.predict(x_test) # predicting through model\n", + "y_pred = (y_pred > 0.5).astype(int) # converting y_pred to class labels\n", + "\n", + "# Accuracy\n", + "accuracy = accuracy_score(y_test, y_pred)\n", + "print(\"Accuracy:\", accuracy)\n", + "\n", + "model.feature_importances_*100" + ] + }, + { + "cell_type": "code", + "execution_count": 12, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "Index(['Age', 'Sex', 'Marital', 'Income', 'Race', 'WaistCirc', 'BMI',\n", + " 'Albuminuria', 'UrAlbCr', 'UricAcid', 'BloodGlucose', 'HDL',\n", + " 'Triglycerides'],\n", + " dtype='object')" + ] + }, + "execution_count": 12, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "x_train.columns" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "***Important Features*** based on **XBGClassifier**: based on percentages (descending order)\n", + "\n", + "* **'BloodGlucose'**\n", + "\n", + "* **'Triglycerides'**\n", + "\n", + "* **'WaistCirc'**\n", + "\n", + "* **'BMI'**\n", + "\n", + "* **'Sex'**\n", + "\n", + "* **'HDL'**\n", + "\n", + "* **'Age'**" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "---" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## 2. Random Forest" + ] + }, + { + "cell_type": "code", + "execution_count": 18, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Accuracy: 0.8904299583911235\n" + ] + }, + { + "data": { + "text/plain": [ + "array([ 5.4468663 , 2.11115457, 1.63322985, 2.87164018, 1.57061713,\n", + " 14.18829404, 9.70870992, 0.75828731, 5.55706342, 4.2298375 ,\n", + " 19.41284562, 10.07545424, 22.43599991])" + ] + }, + "execution_count": 18, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "from sklearn.ensemble import RandomForestClassifier\n", + "\n", + "model = RandomForestClassifier()\n", + "\n", + "model.fit(x_train, y_train)\n", + "\n", + "y_pred = model.predict(x_test)\n", + "y_pred = (y_pred > 0.5).astype(int)\n", + "\n", + "accuracy = accuracy_score(y_test, y_pred)\n", + "print(\"Accuracy:\", accuracy)\n", + "\n", + "model.feature_importances_*100" + ] + }, + { + "cell_type": "code", + "execution_count": 19, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "Index(['Age', 'Sex', 'Marital', 'Income', 'Race', 'WaistCirc', 'BMI',\n", + " 'Albuminuria', 'UrAlbCr', 'UricAcid', 'BloodGlucose', 'HDL',\n", + " 'Triglycerides'],\n", + " dtype='object')" + ] + }, + "execution_count": 19, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "x_train.columns" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "***Important Features*** based on **RandomForestClassifier**: based on percentages (descending order)\n", + "\n", + "* **'Triglycerides'**\n", + "\n", + "* **'BloodGlucose'**\n", + "\n", + "* **'WaistCirc'**\n", + "\n", + "* **'HDL'**\n", + "\n", + "* **'BMI'**\n", + "\n", + "* **'Sex'**\n", + "\n", + "* **'Age'**\n", + "\n", + "* **'UrAlbCr'**" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "---" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## 3. Decision Tree" + ] + }, + { + "cell_type": "code", + "execution_count": 22, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Accuracy: 0.855755894590846\n" + ] + }, + { + "data": { + "text/plain": [ + "array([ 3.20396639, 4.09267422, 2.2220431 , 2.3265535 , 0.71003562,\n", + " 16.63651252, 3.62258047, 0.21575584, 5.71524286, 2.52528682,\n", + " 19.51304344, 7.84702446, 31.36928076])" + ] + }, + "execution_count": 22, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "from sklearn.tree import DecisionTreeClassifier\n", + "\n", + "model = DecisionTreeClassifier()\n", + "\n", + "model.fit(x_train, y_train)\n", + "\n", + "y_pred = model.predict(x_test)\n", + "y_pred = (y_pred > 0.5).astype(int)\n", + "\n", + "accuracy = accuracy_score(y_test, y_pred)\n", + "print(\"Accuracy:\", accuracy)\n", + "\n", + "model.feature_importances_*100" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "x_train.columns" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "***Important Features*** based on **DecisionTreeClassifier**: based on percentages (descending order)\n", + "\n", + "* **'Triglycerides'**\n", + "\n", + "* **'BloodGlucose'**\n", + "\n", + "* **'WaistCirc'**\n", + "\n", + "* **'UrAlbCr'**" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "---" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## 4. LGBM (Light Gradient Boosting model)" + ] + }, + { + "cell_type": "code", + "execution_count": 23, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Accuracy: 0.8932038834951457\n" + ] + }, + { + "data": { + "text/plain": [ + "array([280, 97, 53, 161, 67, 419, 329, 6, 310, 229, 319, 369, 361])" + ] + }, + "execution_count": 23, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "import lightgbm as lgb\n", + "\n", + "model_params = {\n", + " 'objective': 'binary',\n", + " 'metric': 'binary_error',\n", + " 'num_leaves': 31,\n", + " 'learning_rate': 0.1,\n", + " 'feature_fraction': 0.8,\n", + " 'bagging_fraction': 0.8,\n", + " 'bagging_freq': 5,\n", + " 'verbose': -1\n", + "}\n", + "\n", + "train_data = lgb.Dataset(x_train, label=y_train)\n", + "test_data = lgb.Dataset(x_test, label=y_test, reference=train_data)\n", + "\n", + "num_round = 100\n", + "bst = lgb.train(model_params, train_data, num_round)\n", + "\n", + "y_pred_proba = bst.predict(x_test)\n", + "y_pred = [1 if pred > 0.5 else 0 for pred in y_pred_proba]\n", + "\n", + "accuracy = accuracy_score(y_test, y_pred)\n", + "print(\"Accuracy:\", accuracy)\n", + "\n", + "bst.feature_importance()" + ] + }, + { + "cell_type": "code", + "execution_count": 24, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "Index(['Age', 'Sex', 'Marital', 'Income', 'Race', 'WaistCirc', 'BMI',\n", + " 'Albuminuria', 'UrAlbCr', 'UricAcid', 'BloodGlucose', 'HDL',\n", + " 'Triglycerides'],\n", + " dtype='object')" + ] + }, + "execution_count": 24, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "x_train.columns" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "***Important Features*** based on **LGBM**: based on count (descending order)\n", + "\n", + "* **'WaistCirc'**\n", + "\n", + "* **'HDL'**\n", + "\n", + "* **'Triglycerides'**\n", + "\n", + "* **'BMI'**\n", + "\n", + "* **'BloodGlucose'**\n", + "\n", + "* **'UrAlbCr'**\n", + "\n", + "* **'Age'**\n", + "\n", + "* **'UricAcid'**\n", + "\n", + "* **'Income'**" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "---" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## 5. CatBoost (Categorical Boosting model)" + ] + }, + { + "cell_type": "code", + "execution_count": 27, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Accuracy: 0.9029126213592233\n" + ] + }, + { + "data": { + "text/plain": [ + "array([ 4.56710772, 11.62804771, 1.06129061, 1.65206427, 0.7416957 ,\n", + " 15.69751939, 4.08420142, 0.611841 , 2.71786898, 0.71133944,\n", + " 21.06262069, 14.43682955, 21.02757352])" + ] + }, + "execution_count": 27, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "from catboost import CatBoostClassifier\n", + "\n", + "# Define model parameters\n", + "model_params = {\n", + " 'iterations': 100,\n", + " 'learning_rate': 0.1,\n", + " 'depth': 6,\n", + " 'subsample': 0.8,\n", + " 'colsample_bylevel': 0.8,\n", + " 'eval_metric': 'Accuracy',\n", + "}\n", + "\n", + "# Initialize CatBoostClassifier\n", + "model = CatBoostClassifier(**model_params)\n", + "\n", + "# Train the model\n", + "model.fit(x_train, y_train, eval_set=(x_test, y_test), verbose=False)\n", + "\n", + "# Make predictions\n", + "y_pred = model.predict(x_test)\n", + "y_pred = (y_pred > 0.5).astype(int)\n", + "\n", + "accuracy = accuracy_score(y_test, y_pred)\n", + "print(\"Accuracy:\", accuracy)\n", + "\n", + "model.feature_importances_" + ] + }, + { + "cell_type": "code", + "execution_count": 28, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "Index(['Age', 'Sex', 'Marital', 'Income', 'Race', 'WaistCirc', 'BMI',\n", + " 'Albuminuria', 'UrAlbCr', 'UricAcid', 'BloodGlucose', 'HDL',\n", + " 'Triglycerides'],\n", + " dtype='object')" + ] + }, + "execution_count": 28, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "x_train.columns" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "***Important Features*** based on **CatBoostClassifier**: based on percentages (descending order)\n", + "\n", + "* **'BloodGlucose'**\n", + "\n", + "* **'Triglycerides'**\n", + "\n", + "* **'WaistCirc'**\n", + "\n", + "* **'HDL'**\n", + "\n", + "* **'Sex'**" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "---" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## 6. Extra Trees" + ] + }, + { + "cell_type": "code", + "execution_count": 37, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Accuracy: 0.8613\n" + ] + }, + { + "data": { + "text/plain": [ + "array([ 7.85749598, 2.91794336, 3.65928408, 4.67697385, 3.86039051,\n", + " 14.6587498 , 9.72502302, 2.38920523, 5.60023169, 6.15703292,\n", + " 11.83867671, 11.47766221, 15.18133065])" + ] + }, + "execution_count": 37, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "from sklearn.ensemble import ExtraTreesClassifier\n", + "\n", + "model = ExtraTreesClassifier()\n", + "\n", + "model.fit(x_train, y_train)\n", + "\n", + "y_pred = model.predict(x_test)\n", + "y_pred = (y_pred > 0.5).astype(int)\n", + "\n", + "accuracy = accuracy_score(y_test, y_pred)\n", + "print(f\"Accuracy: {accuracy:.4f}\")\n", + "\n", + "model.feature_importances_*100" + ] + }, + { + "cell_type": "code", + "execution_count": 38, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "Index(['Age', 'Sex', 'Marital', 'Income', 'Race', 'WaistCirc', 'BMI',\n", + " 'Albuminuria', 'UrAlbCr', 'UricAcid', 'BloodGlucose', 'HDL',\n", + " 'Triglycerides'],\n", + " dtype='object')" + ] + }, + "execution_count": 38, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "x_train.columns" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "***Important Features*** based on **ExtraTreesClassifier**: based on percentages (descending order)\n", + "\n", + "* **'Triglycerides'**\n", + "\n", + "* **'WaistCirc'**\n", + "\n", + "* **'BloodGlucose'**\n", + "\n", + "* **'HDL'**\n", + "\n", + "* **'BMI'**\n", + "\n", + "* **'Age'**\n", + "\n", + "* **'UricAcid'**\n", + "\n", + "* **'UrAlbCr'**" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "---" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## 7. AdaBoost (Adaptive Boosting)" + ] + }, + { + "cell_type": "code", + "execution_count": 39, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Accuracy: 0.8779472954230236\n" + ] + }, + { + "data": { + "text/plain": [ + "array([ 8., 6., 2., 6., 0., 14., 6., 0., 6., 4., 16., 14., 18.])" + ] + }, + "execution_count": 39, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "from sklearn.ensemble import AdaBoostClassifier\n", + "\n", + "# Initialize CatBoostClassifier\n", + "model = AdaBoostClassifier()\n", + "\n", + "# Train the model\n", + "model.fit(x_train, y_train)\n", + "\n", + "# Make predictions\n", + "y_pred = model.predict(x_test)\n", + "y_pred = (y_pred > 0.5).astype(int)\n", + "\n", + "accuracy = accuracy_score(y_test, y_pred)\n", + "print(\"Accuracy:\", accuracy)\n", + "\n", + "model.feature_importances_*100" + ] + }, + { + "cell_type": "code", + "execution_count": 40, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "Index(['Age', 'Sex', 'Marital', 'Income', 'Race', 'WaistCirc', 'BMI',\n", + " 'Albuminuria', 'UrAlbCr', 'UricAcid', 'BloodGlucose', 'HDL',\n", + " 'Triglycerides'],\n", + " dtype='object')" + ] + }, + "execution_count": 40, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "x_train.columns" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "***Important Features*** based on **AdaBoostClassifier**: based on percentages (descending order)\n", + "\n", + "* **'Triglycerides'**\n", + "\n", + "* **'BloodGlucose'**\n", + "\n", + "* **'WaistCirc'**\n", + "\n", + "* **'Age'**\n", + "\n", + "* **'BMI'**\n", + "\n", + "* **'Sex'**\n", + "\n", + "* **'Income'**\n", + "\n", + "* **'UrAlbCr'**" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "---" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## 8. Gradient Boosting model" + ] + }, + { + "cell_type": "code", + "execution_count": 41, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Accuracy: 0.9070735090152566\n" + ] + }, + { + "data": { + "text/plain": [ + "array([1.93701995e+00, 4.68730697e+00, 3.02995274e-01, 2.28137369e-01,\n", + " 7.02390195e-02, 1.34819874e+01, 6.65235169e+00, 1.36211611e-02,\n", + " 1.79365716e+00, 6.17837269e-01, 2.84970568e+01, 7.88522668e+00,\n", + " 3.38325632e+01])" + ] + }, + "execution_count": 41, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "from sklearn.ensemble import GradientBoostingClassifier\n", + "\n", + "model = GradientBoostingClassifier()\n", + "\n", + "model.fit(x_train, y_train)\n", + "\n", + "y_pred = model.predict(x_test)\n", + "y_pred = (y_pred > 0.5).astype(int)\n", + "\n", + "accuracy = accuracy_score(y_test, y_pred)\n", + "print(\"Accuracy:\", accuracy)\n", + "\n", + "model.feature_importances_*100" + ] + }, + { + "cell_type": "code", + "execution_count": 42, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "Index(['Age', 'Sex', 'Marital', 'Income', 'Race', 'WaistCirc', 'BMI',\n", + " 'Albuminuria', 'UrAlbCr', 'UricAcid', 'BloodGlucose', 'HDL',\n", + " 'Triglycerides'],\n", + " dtype='object')" + ] + }, + "execution_count": 42, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "x_train.columns" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "***Important Features*** based on **GradientBoostingClassifier**: based on percentages (descending order)\n", + "\n", + "* **'Triglycerides'**\n", + "\n", + "* **'BloodGlucose'**\n", + "\n", + "* **'WaistCirc'**\n", + "\n", + "* **'HDL'**\n", + "\n", + "* **'BMI'**" + ] } ], "metadata": {