\n",
+ "
\n",
"
\n",
"\n",
- "
\n",
- " \n",
- " \n",
- " | \n",
- " age | \n",
- " sex | \n",
- " cp | \n",
- " trestbps | \n",
- " chol | \n",
- " fbs | \n",
- " restecg | \n",
- " thalach | \n",
- " exang | \n",
- " oldpeak | \n",
- " slope | \n",
- " ca | \n",
- " thal | \n",
- " target | \n",
- "
\n",
- " \n",
- " \n",
- " \n",
- " count | \n",
- " 303.000000 | \n",
- " 303.000000 | \n",
- " 303.000000 | \n",
- " 303.000000 | \n",
- " 303.000000 | \n",
- " 303.000000 | \n",
- " 303.000000 | \n",
- " 303.000000 | \n",
- " 303.000000 | \n",
- " 303.000000 | \n",
- " 303.000000 | \n",
- " 303.000000 | \n",
- " 303.000000 | \n",
- " 303.000000 | \n",
- "
\n",
- " \n",
- " mean | \n",
- " 54.366337 | \n",
- " 0.683168 | \n",
- " 0.966997 | \n",
- " 131.623762 | \n",
- " 246.264026 | \n",
- " 0.148515 | \n",
- " 0.528053 | \n",
- " 149.646865 | \n",
- " 0.326733 | \n",
- " 1.039604 | \n",
- " 1.399340 | \n",
- " 0.729373 | \n",
- " 2.313531 | \n",
- " 0.544554 | \n",
- "
\n",
- " \n",
- " std | \n",
- " 9.082101 | \n",
- " 0.466011 | \n",
- " 1.032052 | \n",
- " 17.538143 | \n",
- " 51.830751 | \n",
- " 0.356198 | \n",
- " 0.525860 | \n",
- " 22.905161 | \n",
- " 0.469794 | \n",
- " 1.161075 | \n",
- " 0.616226 | \n",
- " 1.022606 | \n",
- " 0.612277 | \n",
- " 0.498835 | \n",
- "
\n",
- " \n",
- " min | \n",
- " 29.000000 | \n",
- " 0.000000 | \n",
- " 0.000000 | \n",
- " 94.000000 | \n",
- " 126.000000 | \n",
- " 0.000000 | \n",
- " 0.000000 | \n",
- " 71.000000 | \n",
- " 0.000000 | \n",
- " 0.000000 | \n",
- " 0.000000 | \n",
- " 0.000000 | \n",
- " 0.000000 | \n",
- " 0.000000 | \n",
- "
\n",
- " \n",
- " 25% | \n",
- " 47.500000 | \n",
- " 0.000000 | \n",
- " 0.000000 | \n",
- " 120.000000 | \n",
- " 211.000000 | \n",
- " 0.000000 | \n",
- " 0.000000 | \n",
- " 133.500000 | \n",
- " 0.000000 | \n",
- " 0.000000 | \n",
- " 1.000000 | \n",
- " 0.000000 | \n",
- " 2.000000 | \n",
- " 0.000000 | \n",
- "
\n",
- " \n",
- " 50% | \n",
- " 55.000000 | \n",
- " 1.000000 | \n",
- " 1.000000 | \n",
- " 130.000000 | \n",
- " 240.000000 | \n",
- " 0.000000 | \n",
- " 1.000000 | \n",
- " 153.000000 | \n",
- " 0.000000 | \n",
- " 0.800000 | \n",
- " 1.000000 | \n",
- " 0.000000 | \n",
- " 2.000000 | \n",
- " 1.000000 | \n",
- "
\n",
- " \n",
- " 75% | \n",
- " 61.000000 | \n",
- " 1.000000 | \n",
- " 2.000000 | \n",
- " 140.000000 | \n",
- " 274.500000 | \n",
- " 0.000000 | \n",
- " 1.000000 | \n",
- " 166.000000 | \n",
- " 1.000000 | \n",
- " 1.600000 | \n",
- " 2.000000 | \n",
- " 1.000000 | \n",
- " 3.000000 | \n",
- " 1.000000 | \n",
- "
\n",
- " \n",
- " max | \n",
- " 77.000000 | \n",
- " 1.000000 | \n",
- " 3.000000 | \n",
- " 200.000000 | \n",
- " 564.000000 | \n",
- " 1.000000 | \n",
- " 2.000000 | \n",
- " 202.000000 | \n",
- " 1.000000 | \n",
- " 6.200000 | \n",
- " 2.000000 | \n",
- " 4.000000 | \n",
- " 3.000000 | \n",
- " 1.000000 | \n",
- "
\n",
- " \n",
- "
\n",
- "
\n",
- "
\n",
- "
\n"
- ],
- "application/vnd.google.colaboratory.intrinsic+json": {
- "type": "dataframe",
- "summary": "{\n \"name\": \"heart_data\",\n \"rows\": 8,\n \"fields\": [\n {\n \"column\": \"age\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 92.63263171018461,\n \"min\": 9.082100989837857,\n \"max\": 303.0,\n \"num_unique_values\": 8,\n \"samples\": [\n 54.366336633663366,\n 55.0,\n 303.0\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"sex\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 106.91793021099774,\n \"min\": 0.0,\n \"max\": 303.0,\n \"num_unique_values\": 5,\n \"samples\": [\n 0.6831683168316832,\n 1.0,\n 0.46601082333962385\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"cp\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 106.72725528212327,\n \"min\": 0.0,\n \"max\": 303.0,\n \"num_unique_values\": 7,\n \"samples\": [\n 303.0,\n 0.966996699669967,\n 2.0\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"trestbps\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 82.65195263865039,\n \"min\": 17.5381428135171,\n \"max\": 303.0,\n \"num_unique_values\": 8,\n \"samples\": [\n 131.62376237623764,\n 130.0,\n 303.0\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"chol\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 150.35806568851743,\n \"min\": 51.83075098793003,\n \"max\": 564.0,\n \"num_unique_values\": 8,\n \"samples\": [\n 246.26402640264027,\n 240.0,\n 303.0\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"fbs\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 107.0512286741478,\n \"min\": 0.0,\n \"max\": 303.0,\n \"num_unique_values\": 5,\n \"samples\": [\n 0.1485148514851485,\n 1.0,\n 0.35619787492797644\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"restecg\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 106.8733588009897,\n \"min\": 0.0,\n \"max\": 303.0,\n \"num_unique_values\": 6,\n \"samples\": [\n 303.0,\n 0.528052805280528,\n 2.0\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"thalach\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 83.70384393886218,\n \"min\": 22.905161114914094,\n \"max\": 303.0,\n \"num_unique_values\": 8,\n \"samples\": [\n 149.64686468646866,\n 153.0,\n 303.0\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"exang\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 106.9862394088184,\n \"min\": 0.0,\n \"max\": 303.0,\n \"num_unique_values\": 5,\n \"samples\": [\n 0.32673267326732675,\n 1.0,\n 0.4697944645223165\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"oldpeak\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 106.59952466080658,\n \"min\": 0.0,\n \"max\": 303.0,\n \"num_unique_values\": 7,\n \"samples\": [\n 303.0,\n 1.0396039603960396,\n 1.6\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"slope\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 106.72394469173834,\n \"min\": 0.0,\n \"max\": 303.0,\n \"num_unique_values\": 6,\n \"samples\": [\n 303.0,\n 1.3993399339933994,\n 2.0\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"ca\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 106.79372080487734,\n \"min\": 0.0,\n \"max\": 303.0,\n \"num_unique_values\": 6,\n \"samples\": [\n 303.0,\n 0.7293729372937293,\n 4.0\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"thal\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 106.47909774814387,\n \"min\": 0.0,\n \"max\": 303.0,\n \"num_unique_values\": 6,\n \"samples\": [\n 303.0,\n 2.3135313531353137,\n 3.0\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"target\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 106.92326354929804,\n \"min\": 0.0,\n \"max\": 303.0,\n \"num_unique_values\": 5,\n \"samples\": [\n 0.5445544554455446,\n 1.0,\n 0.4988347841643913\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n }\n ]\n}"
- }
- },
- "metadata": {},
- "execution_count": 9
- }
- ]
+ "execution_count": 219,
+ "outputs": []
},
{
"cell_type": "code",
"source": [
"# checking the distribution of Target Variable\n",
- "heart_data['target'].value_counts()"
+ "heart_data['Heart Attack Risk'].value_counts()"
],
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/"
},
"id": "NCbxYqqNf2-4",
- "outputId": "9ec352f8-5bd5-4d8e-ae54-68962baa0851"
+ "outputId": "1e38c06b-606b-4509-8e29-2d249daaf4d0"
},
- "execution_count": null,
+ "execution_count": 220,
"outputs": [
{
"output_type": "execute_result",
"data": {
"text/plain": [
- "1 165\n",
- "0 138\n",
- "Name: target, dtype: int64"
+ "Heart Attack Risk\n",
+ "0 5624\n",
+ "1 3139\n",
+ "Name: count, dtype: int64"
]
},
"metadata": {},
- "execution_count": 10
+ "execution_count": 220
}
]
},
{
"cell_type": "markdown",
+ "source": [],
+ "metadata": {
+ "id": "DvvKtsuILgK1"
+ }
+ },
+ {
+ "cell_type": "code",
"source": [
- "1-->Defective heart\n",
- "\n",
- "0-->Healthy heart\n",
- "\n",
- "\n"
+ "heart_data_num = heart_data.select_dtypes(include=[np.float32,np.float64,np.int64])"
],
"metadata": {
- "id": "qWNMUL5_CrfC"
- }
+ "id": "mfiZ3MDvIiaV"
+ },
+ "execution_count": 221,
+ "outputs": []
},
{
"cell_type": "markdown",
"source": [
+ "1-->Defective heart\n",
"\n",
- "Splitting the features and target"
+ "0-->Healthy heart\n",
+ "\n",
+ "\n"
],
"metadata": {
- "id": "McSs_6cTC8Ub"
+ "id": "qWNMUL5_CrfC"
}
},
{
"cell_type": "code",
"source": [
- "x=heart_data.drop(columns='target', axis=1)\n",
- "y=heart_data['target']"
+ "x=heart_data_num.drop(columns='Heart Attack Risk', axis=1)\n",
+ "y=heart_data_num['Heart Attack Risk']\n"
],
"metadata": {
"id": "oSgKSF5-DGVk"
},
- "execution_count": null,
+ "execution_count": 222,
"outputs": []
},
{
@@ -1467,41 +1194,80 @@
"base_uri": "https://localhost:8080/"
},
"id": "zhiIhyMxDhWF",
- "outputId": "caf4204e-9079-4a26-a933-b9c9f93f6dd9"
+ "outputId": "5684dc44-c814-4d81-e438-ecc544010d10"
},
- "execution_count": null,
+ "execution_count": 223,
"outputs": [
{
"output_type": "stream",
"name": "stdout",
"text": [
- " age sex cp trestbps chol fbs restecg thalach exang oldpeak \\\n",
- "0 63 1 3 145 233 1 0 150 0 2.3 \n",
- "1 37 1 2 130 250 0 1 187 0 3.5 \n",
- "2 41 0 1 130 204 0 0 172 0 1.4 \n",
- "3 56 1 1 120 236 0 1 178 0 0.8 \n",
- "4 57 0 0 120 354 0 1 163 1 0.6 \n",
- ".. ... ... .. ... ... ... ... ... ... ... \n",
- "298 57 0 0 140 241 0 1 123 1 0.2 \n",
- "299 45 1 3 110 264 0 1 132 0 1.2 \n",
- "300 68 1 0 144 193 1 1 141 0 3.4 \n",
- "301 57 1 0 130 131 0 1 115 1 1.2 \n",
- "302 57 0 1 130 236 0 0 174 0 0.0 \n",
+ " Age Cholesterol Heart Rate Diabetes Family History Smoking \\\n",
+ "0 67 208 72 0 0 1 \n",
+ "1 21 389 98 1 1 1 \n",
+ "2 21 324 72 1 0 0 \n",
+ "3 84 383 73 1 1 1 \n",
+ "4 66 318 93 1 1 1 \n",
+ "... ... ... ... ... ... ... \n",
+ "8758 60 121 61 1 1 1 \n",
+ "8759 28 120 73 1 0 0 \n",
+ "8760 47 250 105 0 1 1 \n",
+ "8761 36 178 60 1 0 1 \n",
+ "8762 25 356 75 1 1 0 \n",
+ "\n",
+ " Obesity Alcohol Consumption Exercise Hours Per Week \\\n",
+ "0 0 0 4.168189 \n",
+ "1 1 1 1.813242 \n",
+ "2 0 0 2.078353 \n",
+ "3 0 1 9.828130 \n",
+ "4 1 0 5.804299 \n",
+ "... ... ... ... \n",
+ "8758 0 1 7.917342 \n",
+ "8759 1 0 16.558426 \n",
+ "8760 1 1 3.148438 \n",
+ "8761 0 0 3.789950 \n",
+ "8762 0 1 18.081748 \n",
+ "\n",
+ " Previous Heart Problems Medication Use Stress Level \\\n",
+ "0 0 0 9 \n",
+ "1 1 0 1 \n",
+ "2 1 1 9 \n",
+ "3 1 0 9 \n",
+ "4 1 0 6 \n",
+ "... ... ... ... \n",
+ "8758 1 1 8 \n",
+ "8759 0 0 8 \n",
+ "8760 1 0 5 \n",
+ "8761 1 1 5 \n",
+ "8762 0 0 8 \n",
+ "\n",
+ " Sedentary Hours Per Day Income BMI Triglycerides \\\n",
+ "0 6.615001 261404 31.251233 286 \n",
+ "1 4.963459 285768 27.194973 235 \n",
+ "2 9.463426 235282 28.176571 587 \n",
+ "3 7.648981 125640 36.464704 378 \n",
+ "4 1.514821 160555 21.809144 231 \n",
+ "... ... ... ... ... \n",
+ "8758 10.806373 235420 19.655895 67 \n",
+ "8759 3.833038 217881 23.993866 617 \n",
+ "8760 2.375214 36998 35.406146 527 \n",
+ "8761 0.029104 209943 27.294020 114 \n",
+ "8762 9.005234 247338 32.914151 180 \n",
"\n",
- " slope ca thal \n",
- "0 0 0 1 \n",
- "1 0 0 2 \n",
- "2 2 0 2 \n",
- "3 2 0 2 \n",
- "4 2 0 2 \n",
- ".. ... .. ... \n",
- "298 1 0 3 \n",
- "299 1 0 3 \n",
- "300 1 2 3 \n",
- "301 1 1 3 \n",
- "302 1 1 2 \n",
+ " Physical Activity Days Per Week Sleep Hours Per Day \n",
+ "0 0 6 \n",
+ "1 1 7 \n",
+ "2 4 4 \n",
+ "3 3 4 \n",
+ "4 1 5 \n",
+ "... ... ... \n",
+ "8758 7 7 \n",
+ "8759 4 9 \n",
+ "8760 4 4 \n",
+ "8761 2 8 \n",
+ "8762 7 4 \n",
"\n",
- "[303 rows x 13 columns]\n"
+ "[8763 rows x 18 columns]\n"
]
}
]
@@ -1516,26 +1282,26 @@
"base_uri": "https://localhost:8080/"
},
"id": "VpOvdXWgHWmI",
- "outputId": "113ab45d-b6a3-45a1-912f-54a86cf21df2"
+ "outputId": "8d0190fe-62af-4d69-9af1-d4ce89b52bc4"
},
- "execution_count": null,
+ "execution_count": 224,
"outputs": [
{
"output_type": "stream",
"name": "stdout",
"text": [
- "0 1\n",
- "1 1\n",
- "2 1\n",
- "3 1\n",
- "4 1\n",
- " ..\n",
- "298 0\n",
- "299 0\n",
- "300 0\n",
- "301 0\n",
- "302 0\n",
- "Name: target, Length: 303, dtype: int64\n"
+ "0 0\n",
+ "1 0\n",
+ "2 0\n",
+ "3 0\n",
+ "4 0\n",
+ " ..\n",
+ "8758 0\n",
+ "8759 0\n",
+ "8760 1\n",
+ "8761 0\n",
+ "8762 1\n",
+ "Name: Heart Attack Risk, Length: 8763, dtype: int64\n"
]
}
]
@@ -1552,33 +1318,67 @@
{
"cell_type": "code",
"source": [
+ "\n",
"x_train,x_test,y_train,y_test=train_test_split(x,y,test_size=0.3,stratify=y,random_state=3)"
],
"metadata": {
"id": "SNK4hm8DIPSm"
},
- "execution_count": null,
+ "execution_count": 225,
"outputs": []
},
+ {
+ "source": [
+ "# Check the number of samples in x and y\n",
+ "print(f\"Number of samples in x: {len(x)}\")\n",
+ "print(f\"Number of samples in y: {len(y)}\")\n",
+ "\n",
+ "# If the number of samples is different, raise an error\n",
+ "if len(x) != len(y):\n",
+ " raise ValueError(\"Input arrays have different number of samples.\")\n",
+ "\n",
+ "# Proceed with train_test_split\n",
+ "x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.2, stratify=y, random_state=3)"
+ ],
+ "cell_type": "code",
+ "metadata": {
+ "colab": {
+ "base_uri": "https://localhost:8080/"
+ },
+ "id": "GyZ6mljEHuVk",
+ "outputId": "c7c7bb9e-ed30-466a-ea57-88f43a409f0f"
+ },
+ "execution_count": 226,
+ "outputs": [
+ {
+ "output_type": "stream",
+ "name": "stdout",
+ "text": [
+ "Number of samples in x: 8763\n",
+ "Number of samples in y: 8763\n"
+ ]
+ }
+ ]
+ },
{
"cell_type": "code",
"source": [
"print(x.shape,x_train.shape,x_test.shape)"
],
"metadata": {
+ "id": "7OTKtdA-JLCV",
"colab": {
"base_uri": "https://localhost:8080/"
},
- "id": "7OTKtdA-JLCV",
- "outputId": "cb0304a8-2605-4f7e-8510-7b23a0fbd4dc"
+ "outputId": "a9616ddf-7f61-4f3b-fbe0-6d81ea326287"
},
- "execution_count": null,
+ "execution_count": 227,
"outputs": [
{
"output_type": "stream",
"name": "stdout",
"text": [
- "(303, 13) (212, 13) (91, 13)\n"
+ "(8763, 18) (7010, 18) (1753, 18)\n"
]
}
]
@@ -1601,47 +1401,42 @@
"id": "AUEblGtLJlzD"
}
},
+ {
+ "cell_type": "code",
+ "source": [],
+ "metadata": {
+ "id": "OVQDdrIpHm8P"
+ },
+ "execution_count": 227,
+ "outputs": []
+ },
{
"cell_type": "code",
"source": [
- "model=LogisticRegression()"
+ "model1=LogisticRegression()"
],
"metadata": {
- "id": "k-IIz1pzJtRd"
+ "id": "w0CnNIPkHnTT"
},
- "execution_count": null,
+ "execution_count": 228,
"outputs": []
},
{
"cell_type": "code",
"source": [
"# Training the logistic regression model with training data\n",
- "model.fit(x_train,y_train)"
+ "model1.fit(x_train,y_train)"
],
"metadata": {
+ "id": "kr84EwGwHqGY",
"colab": {
"base_uri": "https://localhost:8080/",
- "height": 233
+ "height": 74
},
- "id": "kqFKrLzlJ0N0",
- "outputId": "3bb02431-b194-4619-a85f-8fbe87e779e6"
+ "outputId": "3a0e56ab-20f9-4584-8e40-c84af8b2c593"
},
- "execution_count": null,
+ "execution_count": 229,
"outputs": [
- {
- "output_type": "stream",
- "name": "stderr",
- "text": [
- "/usr/local/lib/python3.10/dist-packages/sklearn/linear_model/_logistic.py:458: ConvergenceWarning: lbfgs failed to converge (status=1):\n",
- "STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.\n",
- "\n",
- "Increase the number of iterations (max_iter) or scale the data as shown in:\n",
- " https://scikit-learn.org/stable/modules/preprocessing.html\n",
- "Please also refer to the documentation for alternative solver options:\n",
- " https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression\n",
- " n_iter_i = _check_optimize_result(\n"
- ]
- },
{
"output_type": "execute_result",
"data": {
@@ -1649,11 +1444,11 @@
"LogisticRegression()"
],
"text/html": [
- "
LogisticRegression()
In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook.
On GitHub, the HTML representation is unable to render, please try loading this page with nbviewer.org."
+ "
LogisticRegression()
In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook.
On GitHub, the HTML representation is unable to render, please try loading this page with nbviewer.org."
]
},
"metadata": {},
- "execution_count": 17
+ "execution_count": 229
}
]
},
@@ -1668,17 +1463,24 @@
"id": "aPahD6MLKaPU"
}
},
+ {
+ "cell_type": "markdown",
+ "source": [],
+ "metadata": {
+ "id": "FE92kQzZHIIl"
+ }
+ },
{
"cell_type": "code",
"source": [
"#accuracy on training data\n",
- "x_train_prediction=model.predict(x_train)\n",
- "training_data_accuracy=accuracy_score(x_train_prediction,y_train)"
+ "x_train_prediction = model1.predict(x_train)\n",
+ "training_data_accuracy = accuracy_score(x_train_prediction, y_train)"
],
"metadata": {
"id": "NHy61zdJKDR1"
},
- "execution_count": null,
+ "execution_count": 230,
"outputs": []
},
{
@@ -1687,19 +1489,19 @@
"print('Accuracy on Training data:',training_data_accuracy)"
],
"metadata": {
+ "id": "J4XiNRwXLCXf",
"colab": {
"base_uri": "https://localhost:8080/"
},
- "id": "J4XiNRwXLCXf",
- "outputId": "dd55ea56-948e-4bc3-b98d-273431014230"
+ "outputId": "ffd6a7b0-f978-4e5f-e677-7dcf55ac39a1"
},
- "execution_count": null,
+ "execution_count": 231,
"outputs": [
{
"output_type": "stream",
"name": "stdout",
"text": [
- "Accuracy on Training data: 0.8679245283018868\n"
+ "Accuracy on Training data: 0.6417974322396577\n"
]
}
]
@@ -1708,13 +1510,13 @@
"cell_type": "code",
"source": [
"#accuracy on test data\n",
- "x_test_prediction=model.predict(x_test)\n",
+ "x_test_prediction=model1.predict(x_test)\n",
"testing_data_accuracy=accuracy_score(x_test_prediction,y_test)"
],
"metadata": {
"id": "ehbFgWjhLK44"
},
- "execution_count": null,
+ "execution_count": 232,
"outputs": []
},
{
@@ -1723,19 +1525,19 @@
"print('Accuracy on Testing data:',testing_data_accuracy)"
],
"metadata": {
+ "id": "jYZIcbiVLs0G",
"colab": {
"base_uri": "https://localhost:8080/"
},
- "id": "jYZIcbiVLs0G",
- "outputId": "e7a36667-528e-42d1-e338-eeadc11c4947"
+ "outputId": "72bb19ec-73e3-437e-a6fb-e055fe2d31d0"
},
- "execution_count": null,
+ "execution_count": 233,
"outputs": [
{
"output_type": "stream",
"name": "stdout",
"text": [
- "Accuracy on Testing data: 0.8021978021978022\n"
+ "Accuracy on Testing data: 0.6417569880205363\n"
]
}
]
@@ -1752,12 +1554,12 @@
{
"cell_type": "code",
"source": [
- "input_data=(44,0,0,130,60,0,0,131,1,2.2,1,3,3)\n",
+ "input_data=(1,67,208,72,0,0,1,0,0,0,0,31.251233, 286,0,0,6,0,0)\n",
"# change the input data into numpy array\n",
"input_data_as_numpy_array=np.asarray(input_data)\n",
"#reshape the numpy array as we are predicting for only on instance\n",
"input_data_reshaped =input_data_as_numpy_array.reshape(1,-1)\n",
- "prediction=model.predict(input_data_reshaped)\n",
+ "prediction=model1.predict(input_data_reshaped)\n",
"print(prediction)\n",
"if (prediction[0]==0):\n",
" print(\"The person does not have heart disease\")\n",
@@ -1766,13 +1568,13 @@
"\n"
],
"metadata": {
+ "id": "Ky2mzQUgL9IU",
"colab": {
"base_uri": "https://localhost:8080/"
},
- "id": "Ky2mzQUgL9IU",
- "outputId": "7301a0b0-8bb7-4dff-bee4-a7f26dd4b00c"
+ "outputId": "72053f1d-55ac-4927-f8d8-1659265bbc5f"
},
- "execution_count": null,
+ "execution_count": 234,
"outputs": [
{
"output_type": "stream",
@@ -1791,48 +1593,6 @@
]
}
]
- },
- {
- "cell_type": "code",
- "source": [
- "input_data=(65\t,1,\t0\t,120\t,177\t,0,\t1\t,140,\t0,\t0.4,\t2,\t0,\t3)\n",
- "# change the input data into numpy array\n",
- "input_data_as_numpy_array=np.asarray(input_data)\n",
- "#reshape the numpy array as we are predicting for only on instance\n",
- "input_data_reshaped =input_data_as_numpy_array.reshape(1,-1)\n",
- "prediction=model.predict(input_data_reshaped)\n",
- "print(prediction)\n",
- "if (prediction[0]==0):\n",
- " print(\"The person does not have heart disease\")\n",
- "else:\n",
- " print(\"the person has heart disease\")"
- ],
- "metadata": {
- "id": "WCbZkDR7PCyB",
- "colab": {
- "base_uri": "https://localhost:8080/"
- },
- "outputId": "d7158150-6fe9-4271-833f-d16e78d435c6"
- },
- "execution_count": null,
- "outputs": [
- {
- "output_type": "stream",
- "name": "stdout",
- "text": [
- "[1]\n",
- "the person has heart disease\n"
- ]
- },
- {
- "output_type": "stream",
- "name": "stderr",
- "text": [
- "/usr/local/lib/python3.10/dist-packages/sklearn/base.py:439: UserWarning: X does not have valid feature names, but LogisticRegression was fitted with feature names\n",
- " warnings.warn(\n"
- ]
- }
- ]
}
]
}
\ No newline at end of file
From 0fc87b502c2eb7ce0ee7725860e02dd7d0be3954 Mon Sep 17 00:00:00 2001
From: Sudiksha Thatipelli <163149118+Sudiksha18@users.noreply.github.com>
Date: Mon, 13 May 2024 10:23:11 +0530
Subject: [PATCH 06/12] Created using Colab
From 4c291235af36c3dabb19532cba550dd6114e6709 Mon Sep 17 00:00:00 2001
From: Sudiksha18
Date: Mon, 13 May 2024 10:30:42 +0530
Subject: [PATCH 07/12] Heart_Disease_Prediction
---
Heart_Disease_Prediction.ipynb | 614 ++++++++++++++++-----------------
1 file changed, 307 insertions(+), 307 deletions(-)
diff --git a/Heart_Disease_Prediction.ipynb b/Heart_Disease_Prediction.ipynb
index 2c9128e5..c506c25d 100644
--- a/Heart_Disease_Prediction.ipynb
+++ b/Heart_Disease_Prediction.ipynb
@@ -1,26 +1,10 @@
{
- "nbformat": 4,
- "nbformat_minor": 0,
- "metadata": {
- "colab": {
- "provenance": [],
- "authorship_tag": "ABX9TyObVXmU5pb8i7Cea2P9aquf",
- "include_colab_link": true
- },
- "kernelspec": {
- "name": "python3",
- "display_name": "Python 3"
- },
- "language_info": {
- "name": "python"
- }
- },
"cells": [
{
"cell_type": "markdown",
"metadata": {
- "id": "view-in-github",
- "colab_type": "text"
+ "colab_type": "text",
+ "id": "view-in-github"
},
"source": [
"
"
@@ -28,15 +12,20 @@
},
{
"cell_type": "markdown",
- "source": [
- "Importing the Dependencies\n"
- ],
"metadata": {
"id": "Cj2SOXgaZt-Q"
- }
+ },
+ "source": [
+ "Importing the Dependencies\n"
+ ]
},
{
"cell_type": "code",
+ "execution_count": 212,
+ "metadata": {
+ "id": "k850UGz1Z03B"
+ },
+ "outputs": [],
"source": [
"# @title\n",
"import numpy as np\n",
@@ -44,40 +33,32 @@
"from sklearn.model_selection import train_test_split\n",
"from sklearn.linear_model import LogisticRegression\n",
"from sklearn.metrics import accuracy_score"
- ],
- "metadata": {
- "id": "k850UGz1Z03B"
- },
- "execution_count": 212,
- "outputs": []
+ ]
},
{
"cell_type": "markdown",
- "source": [
- "Data Collection and Processing\n"
- ],
"metadata": {
"id": "APYsimt8bDoD"
- }
+ },
+ "source": [
+ "Data Collection and Processing\n"
+ ]
},
{
"cell_type": "code",
- "source": [
- "#loading the csv data to a Pandas DataFrame\n",
- "heart_data= pd.read_csv('/content/heart.csv')"
- ],
+ "execution_count": 213,
"metadata": {
"id": "RJg3aA91Z0-u"
},
- "execution_count": 213,
- "outputs": []
+ "outputs": [],
+ "source": [
+ "#loading the csv data to a Pandas DataFrame\n",
+ "heart_data= pd.read_csv('/content/heart.csv')"
+ ]
},
{
"cell_type": "code",
- "source": [
- "#print first 5 rows of the datase\n",
- "heart_data.head()\n"
- ],
+ "execution_count": 214,
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/",
@@ -86,49 +67,13 @@
"id": "BnoQ8u4hdZ8Z",
"outputId": "452a033e-92bd-4b1e-b754-93d896d0c0a7"
},
- "execution_count": 214,
"outputs": [
{
- "output_type": "execute_result",
"data": {
- "text/plain": [
- " Patient ID Age Sex Cholesterol Blood Pressure Heart Rate Diabetes \\\n",
- "0 BMW7812 67 Male 208 158/88 72 0 \n",
- "1 CZE1114 21 Male 389 165/93 98 1 \n",
- "2 BNI9906 21 Female 324 174/99 72 1 \n",
- "3 JLN3497 84 Male 383 163/100 73 1 \n",
- "4 GFO8847 66 Male 318 91/88 93 1 \n",
- "\n",
- " Family History Smoking Obesity ... Sedentary Hours Per Day Income \\\n",
- "0 0 1 0 ... 6.615001 261404 \n",
- "1 1 1 1 ... 4.963459 285768 \n",
- "2 0 0 0 ... 9.463426 235282 \n",
- "3 1 1 0 ... 7.648981 125640 \n",
- "4 1 1 1 ... 1.514821 160555 \n",
- "\n",
- " BMI Triglycerides Physical Activity Days Per Week \\\n",
- "0 31.251233 286 0 \n",
- "1 27.194973 235 1 \n",
- "2 28.176571 587 4 \n",
- "3 36.464704 378 3 \n",
- "4 21.809144 231 1 \n",
- "\n",
- " Sleep Hours Per Day Country Continent Hemisphere \\\n",
- "0 6 Argentina South America Southern Hemisphere \n",
- "1 7 Canada North America Northern Hemisphere \n",
- "2 4 France Europe Northern Hemisphere \n",
- "3 4 Canada North America Northern Hemisphere \n",
- "4 5 Thailand Asia Northern Hemisphere \n",
- "\n",
- " Heart Attack Risk \n",
- "0 0 \n",
- "1 0 \n",
- "2 0 \n",
- "3 0 \n",
- "4 0 \n",
- "\n",
- "[5 rows x 26 columns]"
- ],
+ "application/vnd.google.colaboratory.intrinsic+json": {
+ "type": "dataframe",
+ "variable_name": "heart_data"
+ },
"text/html": [
"\n",
" \n",
@@ -508,21 +453,58 @@
"
\n",
" \n"
],
- "application/vnd.google.colaboratory.intrinsic+json": {
- "type": "dataframe",
- "variable_name": "heart_data"
- }
+ "text/plain": [
+ " Patient ID Age Sex Cholesterol Blood Pressure Heart Rate Diabetes \\\n",
+ "0 BMW7812 67 Male 208 158/88 72 0 \n",
+ "1 CZE1114 21 Male 389 165/93 98 1 \n",
+ "2 BNI9906 21 Female 324 174/99 72 1 \n",
+ "3 JLN3497 84 Male 383 163/100 73 1 \n",
+ "4 GFO8847 66 Male 318 91/88 93 1 \n",
+ "\n",
+ " Family History Smoking Obesity ... Sedentary Hours Per Day Income \\\n",
+ "0 0 1 0 ... 6.615001 261404 \n",
+ "1 1 1 1 ... 4.963459 285768 \n",
+ "2 0 0 0 ... 9.463426 235282 \n",
+ "3 1 1 0 ... 7.648981 125640 \n",
+ "4 1 1 1 ... 1.514821 160555 \n",
+ "\n",
+ " BMI Triglycerides Physical Activity Days Per Week \\\n",
+ "0 31.251233 286 0 \n",
+ "1 27.194973 235 1 \n",
+ "2 28.176571 587 4 \n",
+ "3 36.464704 378 3 \n",
+ "4 21.809144 231 1 \n",
+ "\n",
+ " Sleep Hours Per Day Country Continent Hemisphere \\\n",
+ "0 6 Argentina South America Southern Hemisphere \n",
+ "1 7 Canada North America Northern Hemisphere \n",
+ "2 4 France Europe Northern Hemisphere \n",
+ "3 4 Canada North America Northern Hemisphere \n",
+ "4 5 Thailand Asia Northern Hemisphere \n",
+ "\n",
+ " Heart Attack Risk \n",
+ "0 0 \n",
+ "1 0 \n",
+ "2 0 \n",
+ "3 0 \n",
+ "4 0 \n",
+ "\n",
+ "[5 rows x 26 columns]"
+ ]
},
+ "execution_count": 214,
"metadata": {},
- "execution_count": 214
+ "output_type": "execute_result"
}
+ ],
+ "source": [
+ "#print first 5 rows of the datase\n",
+ "heart_data.head()\n"
]
},
{
"cell_type": "code",
- "source": [
- "heart_data.tail()\n"
- ],
+ "execution_count": 215,
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/",
@@ -531,49 +513,12 @@
"id": "NQwDjwwGeBF4",
"outputId": "60d69d34-5c6e-4975-c633-13cc786065f6"
},
- "execution_count": 215,
"outputs": [
{
- "output_type": "execute_result",
"data": {
- "text/plain": [
- " Patient ID Age Sex Cholesterol Blood Pressure Heart Rate \\\n",
- "8758 MSV9918 60 Male 121 94/76 61 \n",
- "8759 QSV6764 28 Female 120 157/102 73 \n",
- "8760 XKA5925 47 Male 250 161/75 105 \n",
- "8761 EPE6801 36 Male 178 119/67 60 \n",
- "8762 ZWN9666 25 Female 356 138/67 75 \n",
- "\n",
- " Diabetes Family History Smoking Obesity ... \\\n",
- "8758 1 1 1 0 ... \n",
- "8759 1 0 0 1 ... \n",
- "8760 0 1 1 1 ... \n",
- "8761 1 0 1 0 ... \n",
- "8762 1 1 0 0 ... \n",
- "\n",
- " Sedentary Hours Per Day Income BMI Triglycerides \\\n",
- "8758 10.806373 235420 19.655895 67 \n",
- "8759 3.833038 217881 23.993866 617 \n",
- "8760 2.375214 36998 35.406146 527 \n",
- "8761 0.029104 209943 27.294020 114 \n",
- "8762 9.005234 247338 32.914151 180 \n",
- "\n",
- " Physical Activity Days Per Week Sleep Hours Per Day Country \\\n",
- "8758 7 7 Thailand \n",
- "8759 4 9 Canada \n",
- "8760 4 4 Brazil \n",
- "8761 2 8 Brazil \n",
- "8762 7 4 United Kingdom \n",
- "\n",
- " Continent Hemisphere Heart Attack Risk \n",
- "8758 Asia Northern Hemisphere 0 \n",
- "8759 North America Northern Hemisphere 0 \n",
- "8760 South America Southern Hemisphere 1 \n",
- "8761 South America Southern Hemisphere 0 \n",
- "8762 Europe Northern Hemisphere 1 \n",
- "\n",
- "[5 rows x 26 columns]"
- ],
+ "application/vnd.google.colaboratory.intrinsic+json": {
+ "type": "dataframe"
+ },
"text/html": [
"\n",
"