diff --git a/Cirrhosis Prediction/Cirrhosis_Prediction.ipynb b/Cirrhosis Prediction/Cirrhosis_Prediction.ipynb
index fae718ff..1673ce5b 100644
--- a/Cirrhosis Prediction/Cirrhosis_Prediction.ipynb
+++ b/Cirrhosis Prediction/Cirrhosis_Prediction.ipynb
@@ -1,40 +1,26 @@
{
- "nbformat": 4,
- "nbformat_minor": 0,
- "metadata": {
- "colab": {
- "provenance": []
- },
- "kernelspec": {
- "name": "python3",
- "display_name": "Python 3"
- },
- "language_info": {
- "name": "python"
- }
- },
"cells": [
{
"cell_type": "markdown",
- "source": [
- "Cirrhosis, also known as liver cirrhosis or hepatic cirrhosis, and end-stage liver disease, is the impaired liver function caused by the formation of scar tissue known as fibrosis due to damage caused by liver disease."
- ],
"metadata": {
"id": "te3Fb6qv5586"
- }
+ },
+ "source": [
+ "Cirrhosis, also known as liver cirrhosis or hepatic cirrhosis, and end-stage liver disease, is the impaired liver function caused by the formation of scar tissue known as fibrosis due to damage caused by liver disease."
+ ]
},
{
"cell_type": "markdown",
- "source": [
- "# Importing the libraries"
- ],
"metadata": {
"id": "uQkG7QvH592O"
- }
+ },
+ "source": [
+ "# Importing the libraries"
+ ]
},
{
"cell_type": "code",
- "execution_count": null,
+ "execution_count": 1,
"metadata": {
"id": "m8nP5ROx4jRL"
},
@@ -51,18 +37,14 @@
},
{
"cell_type": "code",
- "source": [
- "from google.colab import drive\n",
- "drive.mount('/content/drive')"
- ],
+ "execution_count": 2,
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/"
},
"id": "aj-hKE-ZvCCM",
- "outputId": "574aef0e-1391-4d10-bd29-bdaedbab9390"
+ "outputId": "ec7090b7-104c-4e02-baa3-4c63af19a160"
},
- "execution_count": null,
"outputs": [
{
"output_type": "stream",
@@ -71,51 +53,52 @@
"Mounted at /content/drive\n"
]
}
+ ],
+ "source": [
+ "from google.colab import drive\n",
+ "drive.mount('/content/drive')"
]
},
{
"cell_type": "markdown",
- "source": [
- "# Loading the dataset\n"
- ],
"metadata": {
"id": "P4KIBvX86GHa"
- }
+ },
+ "source": [
+ "# Loading the dataset\n"
+ ]
},
{
"cell_type": "code",
- "source": [
- "data=pd.read_csv(\"/content/drive/MyDrive/cirrhosis.csv\")"
- ],
+ "execution_count": 3,
"metadata": {
"id": "hcB25RfJ4qg3"
},
- "execution_count": null,
- "outputs": []
+ "outputs": [],
+ "source": [
+ "data=pd.read_csv(\"/content/drive/MyDrive/cirrhosis.csv\")"
+ ]
},
{
"cell_type": "markdown",
- "source": [
- "# Exploring the dataset"
- ],
"metadata": {
"id": "vyGdhcmc6LJu"
- }
+ },
+ "source": [
+ "# Exploring the dataset"
+ ]
},
{
"cell_type": "code",
- "source": [
- "data.head()"
- ],
+ "execution_count": 4,
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/",
"height": 295
},
"id": "3oJvA_m140Nj",
- "outputId": "f59e8249-102c-4071-9984-7cd30cb5f9ac"
+ "outputId": "1a66f1c9-2a6d-4d36-8e8a-8e98241e76c7"
},
- "execution_count": null,
"outputs": [
{
"output_type": "execute_result",
@@ -144,7 +127,7 @@
],
"text/html": [
"\n",
- "
\n",
+ "
\n",
"
\n",
"\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " N_Days | \n",
+ " Status | \n",
+ " Drug | \n",
+ " Age | \n",
+ " Sex | \n",
+ " Ascites | \n",
+ " Hepatomegaly | \n",
+ " Spiders | \n",
+ " Edema | \n",
+ " Bilirubin | \n",
+ " Cholesterol | \n",
+ " Albumin | \n",
+ " Copper | \n",
+ " Alk_Phos | \n",
+ " SGOT | \n",
+ " Tryglicerides | \n",
+ " Platelets | \n",
+ " Prothrombin | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " 0 | \n",
+ " 400.0 | \n",
+ " 1.0 | \n",
+ " 0.0 | \n",
+ " 21464.0 | \n",
+ " 1.0 | \n",
+ " 1.0 | \n",
+ " 1.0 | \n",
+ " 1.0 | \n",
+ " 2.0 | \n",
+ " 14.5 | \n",
+ " 261.0 | \n",
+ " 2.60 | \n",
+ " 156.0 | \n",
+ " 1718.0 | \n",
+ " 137.95 | \n",
+ " 172.0 | \n",
+ " 190.0 | \n",
+ " 12.2 | \n",
+ "
\n",
+ " \n",
+ " 1 | \n",
+ " 4500.0 | \n",
+ " 0.0 | \n",
+ " 0.0 | \n",
+ " 20617.0 | \n",
+ " 1.0 | \n",
+ " 0.0 | \n",
+ " 1.0 | \n",
+ " 1.0 | \n",
+ " 0.0 | \n",
+ " 1.1 | \n",
+ " 302.0 | \n",
+ " 4.14 | \n",
+ " 54.0 | \n",
+ " 7394.8 | \n",
+ " 113.52 | \n",
+ " 88.0 | \n",
+ " 221.0 | \n",
+ " 10.6 | \n",
+ "
\n",
+ " \n",
+ " 2 | \n",
+ " 1012.0 | \n",
+ " 1.0 | \n",
+ " 0.0 | \n",
+ " 25594.0 | \n",
+ " 0.0 | \n",
+ " 0.0 | \n",
+ " 0.0 | \n",
+ " 0.0 | \n",
+ " 1.0 | \n",
+ " 1.4 | \n",
+ " 176.0 | \n",
+ " 3.48 | \n",
+ " 210.0 | \n",
+ " 516.0 | \n",
+ " 96.10 | \n",
+ " 55.0 | \n",
+ " 151.0 | \n",
+ " 12.0 | \n",
+ "
\n",
+ " \n",
+ " 3 | \n",
+ " 1925.0 | \n",
+ " 1.0 | \n",
+ " 0.0 | \n",
+ " 19994.0 | \n",
+ " 1.0 | \n",
+ " 0.0 | \n",
+ " 1.0 | \n",
+ " 1.0 | \n",
+ " 1.0 | \n",
+ " 1.8 | \n",
+ " 244.0 | \n",
+ " 2.54 | \n",
+ " 64.0 | \n",
+ " 6121.8 | \n",
+ " 60.63 | \n",
+ " 92.0 | \n",
+ " 183.0 | \n",
+ " 10.3 | \n",
+ "
\n",
+ " \n",
+ " 4 | \n",
+ " 1504.0 | \n",
+ " 2.0 | \n",
+ " 1.0 | \n",
+ " 13918.0 | \n",
+ " 1.0 | \n",
+ " 0.0 | \n",
+ " 1.0 | \n",
+ " 1.0 | \n",
+ " 0.0 | \n",
+ " 3.4 | \n",
+ " 279.0 | \n",
+ " 3.53 | \n",
+ " 143.0 | \n",
+ " 671.0 | \n",
+ " 113.15 | \n",
+ " 72.0 | \n",
+ " 136.0 | \n",
+ " 10.9 | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
\n",
+ "
\n",
+ "
\n"
+ ],
+ "application/vnd.google.colaboratory.intrinsic+json": {
+ "type": "dataframe",
+ "variable_name": "x",
+ "summary": "{\n \"name\": \"x\",\n \"rows\": 418,\n \"fields\": [\n {\n \"column\": \"N_Days\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 1104.6729923907321,\n \"min\": 41.0,\n \"max\": 4795.0,\n \"num_unique_values\": 399,\n \"samples\": [\n 2272.0,\n 359.0,\n 3933.0\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"Status\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 0.6086712158248031,\n \"min\": 0.0,\n \"max\": 2.0,\n \"num_unique_values\": 3,\n \"samples\": [\n 1.0,\n 0.0,\n 2.0\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"Drug\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 0.4453269190734691,\n \"min\": 0.0,\n \"max\": 1.0,\n \"num_unique_values\": 6,\n \"samples\": [\n 0.0,\n 1.0,\n 0.8\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"Age\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 3815.8450545514697,\n \"min\": 9598.0,\n \"max\": 28650.0,\n \"num_unique_values\": 344,\n \"samples\": [\n 16300.0,\n 16714.0,\n 13486.0\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"Sex\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 0.3072599609116883,\n \"min\": 0.0,\n \"max\": 1.0,\n \"num_unique_values\": 2,\n \"samples\": [\n 0.0,\n 1.0\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"Ascites\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 0.23910927846550492,\n \"min\": 0.0,\n \"max\": 1.0,\n \"num_unique_values\": 5,\n \"samples\": [\n 0.0,\n 0.6\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"Hepatomegaly\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 0.45638549920859156,\n \"min\": 0.0,\n \"max\": 1.0,\n \"num_unique_values\": 6,\n \"samples\": [\n 1.0,\n 0.0\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"Spiders\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 0.4090471140296553,\n \"min\": 0.0,\n \"max\": 1.0,\n \"num_unique_values\": 6,\n \"samples\": [\n 1.0,\n 0.0\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"Edema\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 0.5068348607572785,\n \"min\": 0.0,\n \"max\": 2.0,\n \"num_unique_values\": 3,\n \"samples\": [\n 2.0,\n 0.0\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"Bilirubin\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 4.407506384141372,\n \"min\": 0.3,\n \"max\": 28.0,\n \"num_unique_values\": 98,\n \"samples\": [\n 8.5,\n 6.3\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"Cholesterol\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 201.8249278609731,\n \"min\": 120.0,\n \"max\": 1775.0,\n \"num_unique_values\": 309,\n \"samples\": [\n 356.2,\n 200.0\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"Albumin\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 0.4249716057796193,\n \"min\": 1.96,\n \"max\": 4.64,\n \"num_unique_values\": 154,\n \"samples\": [\n 3.66,\n 3.79\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"Copper\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 78.20564938803574,\n \"min\": 4.0,\n \"max\": 588.0,\n \"num_unique_values\": 237,\n \"samples\": [\n 76.0,\n 159.0\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"Alk_Phos\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 1894.3337428209882,\n \"min\": 289.0,\n \"max\": 13862.4,\n \"num_unique_values\": 392,\n \"samples\": [\n 1303.0,\n 637.0\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"SGOT\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 51.41900944653152,\n \"min\": 26.35,\n \"max\": 457.25,\n \"num_unique_values\": 267,\n \"samples\": [\n 136.4,\n 79.98\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"Tryglicerides\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 57.180801173212494,\n \"min\": 33.0,\n \"max\": 598.0,\n \"num_unique_values\": 234,\n \"samples\": [\n 104.0,\n 183.2\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"Platelets\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 97.17113670359227,\n \"min\": 62.0,\n \"max\": 721.0,\n \"num_unique_values\": 251,\n \"samples\": [\n 201.0,\n 204.0\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"Prothrombin\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 1.0199847989880626,\n \"min\": 9.0,\n \"max\": 18.0,\n \"num_unique_values\": 49,\n \"samples\": [\n 11.4,\n 13.8\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n }\n ]\n}"
+ }
+ },
+ "metadata": {},
+ "execution_count": 72
+ }
+ ]
+ },
+ {
+ "cell_type": "code",
+ "source": [
+ "y.head()"
+ ],
+ "metadata": {
+ "colab": {
+ "base_uri": "https://localhost:8080/"
+ },
+ "id": "YJadXEYr1Y0U",
+ "outputId": "75a5d80e-b2a8-4bfa-bd5b-2e13ffdb30fa"
+ },
+ "execution_count": 73,
+ "outputs": [
+ {
+ "output_type": "execute_result",
+ "data": {
+ "text/plain": [
+ "0 4.0\n",
+ "1 3.0\n",
+ "2 4.0\n",
+ "3 4.0\n",
+ "4 3.0\n",
+ "Name: Stage, dtype: float64"
+ ]
+ },
+ "metadata": {},
+ "execution_count": 73
+ }
+ ]
+ },
+ {
+ "cell_type": "code",
+ "source": [
+ "from sklearn.feature_selection import chi2, SelectKBest\n",
+ "X = data.drop(columns=['Stage']) # Replace 'Stage' with your target column name\n",
+ "y = data['Stage']\n",
+ "\n",
+ "# Convert target variable to integer if necessary\n",
+ "y = y.astype(int)\n",
+ "\n",
+ "# Compute Chi-square scores\n",
+ "chi_scores, p_values = chi2(X, y)\n",
+ "\n",
+ "# Print Chi-square scores and p-values\n",
+ "print(\"Chi-square scores:\", chi_scores)\n",
+ "print(\"P-values:\", p_values)\n",
+ "k = 10 # Number of top features to select\n",
+ "selector = SelectKBest(chi2, k=k)\n",
+ "X_new = selector.fit_transform(X, y)\n",
+ "\n",
+ "# Get the columns of the selected features\n",
+ "selected_features = X.columns[selector.get_support()]\n",
+ "\n",
+ "print(\"Selected features:\", selected_features)"
+ ],
+ "metadata": {
+ "id": "27JtIq_E17DM",
+ "colab": {
+ "base_uri": "https://localhost:8080/"
+ },
+ "outputId": "1fdad51b-9f2f-42b6-bc47-21cc1951c0bd"
+ },
+ "execution_count": 74,
+ "outputs": [
+ {
+ "output_type": "stream",
+ "name": "stdout",
+ "text": [
+ "Chi-square scores: [1.05154142e+02 3.69201715e+04 2.57157124e+01 1.86484408e+00\n",
+ " 1.54801920e+04 1.06009810e-01 2.62972885e+01 2.84109826e+01\n",
+ " 1.54259377e+01 3.97200216e+01 1.13264836e+02 1.14405326e+03\n",
+ " 2.46156286e+00 1.74994475e+03 1.10797445e+03 2.30254617e+02\n",
+ " 2.11350654e+02 1.00537170e+03 3.68434869e+00]\n",
+ "P-values: [1.21058183e-022 0.00000000e+000 1.09380866e-005 6.00926799e-001\n",
+ " 0.00000000e+000 9.91106598e-001 8.26368285e-006 2.97760470e-006\n",
+ " 1.48656989e-003 1.22151759e-008 2.17583441e-024 1.00816865e-247\n",
+ " 4.82277600e-001 0.00000000e+000 6.77643772e-240 1.21832568e-049\n",
+ " 1.48686292e-045 1.22982401e-217 2.97627932e-001]\n",
+ "Selected features: Index(['ID', 'N_Days', 'Age', 'Bilirubin', 'Cholesterol', 'Copper', 'Alk_Phos',\n",
+ " 'SGOT', 'Tryglicerides', 'Platelets'],\n",
+ " dtype='object')\n"
+ ]
+ }
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "source": [
+ "**5 important features using Chi-Squared Test are-**\n",
+ "\n",
+ "N_days\n",
+ "\n",
+ "Age\n",
+ "\n",
+ "Alk_phos\n",
+ "\n",
+ "Copper\n",
+ "\n",
+ "Cholestrol\n",
+ "\n",
+ "\n"
+ ],
+ "metadata": {
+ "id": "2R3dkOVn26Pa"
+ }
+ },
+ {
+ "cell_type": "code",
+ "source": [],
+ "metadata": {
+ "id": "mto1ioAl2dK-"
+ },
+ "execution_count": 74,
+ "outputs": []
+ },
+ {
+ "cell_type": "markdown",
+ "source": [
+ "###3. Independent Component Analysis (ICA)\n",
+ "\n",
+ "ICA can be used for feature selection by examining the contribution of each original feature to the independent components. Features with high loadings on the most important independent components are considered more relevant."
+ ],
+ "metadata": {
+ "id": "JUS_X3FS3cro"
+ }
+ },
+ {
+ "cell_type": "code",
+ "source": [
+ "import matplotlib.pyplot as plt\n",
+ "from sklearn.decomposition import FastICA\n",
+ "from scipy.stats import kurtosis\n",
+ "import numpy as np\n",
+ "\n",
+ "# Assuming pca_df is your input data\n",
+ "ica = FastICA()\n",
+ "S_ = ica.fit_transform(pca_df) # fitting ICA\n",
+ "A_ = ica.mixing_\n",
+ "\n",
+ "kurt = [kurtosis(ic) for ic in S_.T] # getting Kurtosis metrics\n",
+ "print(\"Kurtosis of the independent components:\")\n",
+ "print(kurt)\n",
+ "\n",
+ "# Determine the number of components\n",
+ "n_components = S_.shape[1]\n",
+ "\n",
+ "# Calculate grid size (rows and columns)\n",
+ "n_rows = int(np.ceil(np.sqrt(n_components)))\n",
+ "n_cols = int(np.ceil(n_components / n_rows))\n",
+ "\n",
+ "plt.figure(figsize=(12, 8)) # plotting components\n",
+ "for i in range(n_components):\n",
+ " plt.subplot(n_rows, n_cols, i + 1)\n",
+ " plt.plot(S_[:, i])\n",
+ " plt.title(f\"Independent Component {i + 1}\")\n",
+ "\n",
+ "plt.tight_layout()\n",
+ "plt.show()\n"
+ ],
+ "metadata": {
+ "colab": {
+ "base_uri": "https://localhost:8080/",
+ "height": 0
+ },
+ "id": "gbIHHsab3gI-",
+ "outputId": "a65def3e-8a29-4f2e-9b38-f67d8e3f58ad"
+ },
+ "execution_count": 75,
+ "outputs": [
+ {
+ "output_type": "stream",
+ "name": "stdout",
+ "text": [
+ "Kurtosis of the independent components:\n",
+ "[10.783930822912195, -1.6239907975228332, 3.1935288174171674, 13.890220688034333, -0.7013503262509135, -1.654905649509667, 16.370456402289392, -1.0990191898394077, 20.64948385662554, 7.362068430775352, 11.557702085669101, 1.5631381418107049, 0.8677234558575289, 23.701218722430873, 6.0167008852264825, 2.097233596697331, 11.65612370856848, -0.6491953081141393, 4.5122952000566245]\n"
+ ]
+ },
+ {
+ "output_type": "stream",
+ "name": "stderr",
+ "text": [
+ "/usr/local/lib/python3.10/dist-packages/sklearn/decomposition/_fastica.py:542: FutureWarning: Starting in v1.3, whiten='unit-variance' will be used by default.\n",
+ " warnings.warn(\n"
+ ]
+ },
+ {
+ "output_type": "display_data",
+ "data": {
+ "text/plain": [
+ "