From 2414e155ecafa5d74faf3cb72f02c2506318a569 Mon Sep 17 00:00:00 2001 From: Sudiksha Thatipelli <163149118+Sudiksha18@users.noreply.github.com> Date: Sun, 12 May 2024 17:32:23 +0530 Subject: [PATCH 01/12] Created using Colab --- Heart_Disease_Prediction.ipynb | 1836 ++++++++++++++++++++++++++++++++ 1 file changed, 1836 insertions(+) create mode 100644 Heart_Disease_Prediction.ipynb diff --git a/Heart_Disease_Prediction.ipynb b/Heart_Disease_Prediction.ipynb new file mode 100644 index 00000000..f5d64999 --- /dev/null +++ b/Heart_Disease_Prediction.ipynb @@ -0,0 +1,1836 @@ +{ + "nbformat": 4, + "nbformat_minor": 0, + "metadata": { + "colab": { + "provenance": [], + "authorship_tag": "ABX9TyMjJQcXr2muZKXGUdpyVh/h", + "include_colab_link": true + }, + "kernelspec": { + "name": "python3", + "display_name": "Python 3" + }, + "language_info": { + "name": "python" + } + }, + "cells": [ + { + "cell_type": "markdown", + "metadata": { + "id": "view-in-github", + "colab_type": "text" + }, + "source": [ + "\"Open" + ] + }, + { + "cell_type": "markdown", + "source": [ + "Importing the Dependencies\n" + ], + "metadata": { + "id": "Cj2SOXgaZt-Q" + } + }, + { + "cell_type": "code", + "source": [ + "import numpy as np\n", + "import pandas as pd\n", + "from sklearn.model_selection import train_test_split\n", + "from sklearn.linear_model import LogisticRegression\n", + "from sklearn.metrics import accuracy_score" + ], + "metadata": { + "id": "k850UGz1Z03B" + }, + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "markdown", + "source": [ + "Data Collection and Processing\n" + ], + "metadata": { + "id": "APYsimt8bDoD" + } + }, + { + "cell_type": "code", + "source": [ + "#loading the csv data to a Pandas DataFrame\n", + "heart_data= pd.read_csv('/content/heart_disease_data.csv')" + ], + "metadata": { + "id": "RJg3aA91Z0-u" + }, + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "code", + "source": [ + "#print first 5 rows of the datase\n", + "heart_data.head()\n" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 206 + }, + "id": "BnoQ8u4hdZ8Z", + "outputId": "30199964-f01f-4d2c-9a9d-93c7c7ae6d72" + }, + "execution_count": null, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + " age sex cp trestbps chol fbs restecg thalach exang oldpeak slope \\\n", + "0 63 1 3 145 233 1 0 150 0 2.3 0 \n", + "1 37 1 2 130 250 0 1 187 0 3.5 0 \n", + "2 41 0 1 130 204 0 0 172 0 1.4 2 \n", + "3 56 1 1 120 236 0 1 178 0 0.8 2 \n", + "4 57 0 0 120 354 0 1 163 1 0.6 2 \n", + "\n", + " ca thal target \n", + "0 0 1 1 \n", + "1 0 2 1 \n", + "2 0 2 1 \n", + "3 0 2 1 \n", + "4 0 2 1 " + ], + "text/html": [ + "\n", + "
\n", + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
agesexcptrestbpscholfbsrestecgthalachexangoldpeakslopecathaltarget
063131452331015002.30011
137121302500118703.50021
241011302040017201.42021
356111202360117800.82021
457001203540116310.62021
\n", + "
\n", + "
\n", + "\n", + "
\n", + " \n", + "\n", + " \n", + "\n", + " \n", + "
\n", + "\n", + "\n", + "
\n", + " \n", + "\n", + "\n", + "\n", + " \n", + "
\n", + "\n", + "
\n", + "
\n" + ], + "application/vnd.google.colaboratory.intrinsic+json": { + "type": "dataframe", + "variable_name": "heart_data", + "summary": "{\n \"name\": \"heart_data\",\n \"rows\": 303,\n \"fields\": [\n {\n \"column\": \"age\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 9,\n \"min\": 29,\n \"max\": 77,\n \"num_unique_values\": 41,\n \"samples\": [\n 46,\n 66,\n 48\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"sex\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 0,\n \"min\": 0,\n \"max\": 1,\n \"num_unique_values\": 2,\n \"samples\": [\n 0,\n 1\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"cp\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 1,\n \"min\": 0,\n \"max\": 3,\n \"num_unique_values\": 4,\n \"samples\": [\n 2,\n 0\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"trestbps\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 17,\n \"min\": 94,\n \"max\": 200,\n \"num_unique_values\": 49,\n \"samples\": [\n 104,\n 123\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"chol\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 51,\n \"min\": 126,\n \"max\": 564,\n \"num_unique_values\": 152,\n \"samples\": [\n 277,\n 169\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"fbs\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 0,\n \"min\": 0,\n \"max\": 1,\n \"num_unique_values\": 2,\n \"samples\": [\n 0,\n 1\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"restecg\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 0,\n \"min\": 0,\n \"max\": 2,\n \"num_unique_values\": 3,\n \"samples\": [\n 0,\n 1\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"thalach\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 22,\n \"min\": 71,\n \"max\": 202,\n \"num_unique_values\": 91,\n \"samples\": [\n 159,\n 152\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"exang\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 0,\n \"min\": 0,\n \"max\": 1,\n \"num_unique_values\": 2,\n \"samples\": [\n 1,\n 0\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"oldpeak\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 1.1610750220686348,\n \"min\": 0.0,\n \"max\": 6.2,\n \"num_unique_values\": 40,\n \"samples\": [\n 1.9,\n 3.0\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"slope\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 0,\n \"min\": 0,\n \"max\": 2,\n \"num_unique_values\": 3,\n \"samples\": [\n 0,\n 2\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"ca\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 1,\n \"min\": 0,\n \"max\": 4,\n \"num_unique_values\": 5,\n \"samples\": [\n 2,\n 4\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"thal\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 0,\n \"min\": 0,\n \"max\": 3,\n \"num_unique_values\": 4,\n \"samples\": [\n 2,\n 0\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"target\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 0,\n \"min\": 0,\n \"max\": 1,\n \"num_unique_values\": 2,\n \"samples\": [\n 0,\n 1\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n }\n ]\n}" + } + }, + "metadata": {}, + "execution_count": 4 + } + ] + }, + { + "cell_type": "code", + "source": [ + "heart_data.tail()\n" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 206 + }, + "id": "NQwDjwwGeBF4", + "outputId": "0ad96c3c-0609-4365-b667-16204c4087e4" + }, + "execution_count": null, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + " age sex cp trestbps chol fbs restecg thalach exang oldpeak \\\n", + "298 57 0 0 140 241 0 1 123 1 0.2 \n", + "299 45 1 3 110 264 0 1 132 0 1.2 \n", + "300 68 1 0 144 193 1 1 141 0 3.4 \n", + "301 57 1 0 130 131 0 1 115 1 1.2 \n", + "302 57 0 1 130 236 0 0 174 0 0.0 \n", + "\n", + " slope ca thal target \n", + "298 1 0 3 0 \n", + "299 1 0 3 0 \n", + "300 1 2 3 0 \n", + "301 1 1 3 0 \n", + "302 1 1 2 0 " + ], + "text/html": [ + "\n", + "
\n", + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
agesexcptrestbpscholfbsrestecgthalachexangoldpeakslopecathaltarget
29857001402410112310.21030
29945131102640113201.21030
30068101441931114103.41230
30157101301310111511.21130
30257011302360017400.01120
\n", + "
\n", + "
\n", + "\n", + "
\n", + " \n", + "\n", + " \n", + "\n", + " \n", + "
\n", + "\n", + "\n", + "
\n", + " \n", + "\n", + "\n", + "\n", + " \n", + "
\n", + "\n", + "
\n", + "
\n" + ], + "application/vnd.google.colaboratory.intrinsic+json": { + "type": "dataframe", + "summary": "{\n \"name\": \"heart_data\",\n \"rows\": 5,\n \"fields\": [\n {\n \"column\": \"age\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 8,\n \"min\": 45,\n \"max\": 68,\n \"num_unique_values\": 3,\n \"samples\": [\n 57,\n 45,\n 68\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"sex\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 0,\n \"min\": 0,\n \"max\": 1,\n \"num_unique_values\": 2,\n \"samples\": [\n 1,\n 0\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"cp\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 1,\n \"min\": 0,\n \"max\": 3,\n \"num_unique_values\": 3,\n \"samples\": [\n 0,\n 3\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"trestbps\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 13,\n \"min\": 110,\n \"max\": 144,\n \"num_unique_values\": 4,\n \"samples\": [\n 110,\n 130\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"chol\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 52,\n \"min\": 131,\n \"max\": 264,\n \"num_unique_values\": 5,\n \"samples\": [\n 264,\n 236\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"fbs\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 0,\n \"min\": 0,\n \"max\": 1,\n \"num_unique_values\": 2,\n \"samples\": [\n 1,\n 0\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"restecg\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 0,\n \"min\": 0,\n \"max\": 1,\n \"num_unique_values\": 2,\n \"samples\": [\n 0,\n 1\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"thalach\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 22,\n \"min\": 115,\n \"max\": 174,\n \"num_unique_values\": 5,\n \"samples\": [\n 132,\n 174\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"exang\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 0,\n \"min\": 0,\n \"max\": 1,\n \"num_unique_values\": 2,\n \"samples\": [\n 0,\n 1\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"oldpeak\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 1.3490737563232043,\n \"min\": 0.0,\n \"max\": 3.4,\n \"num_unique_values\": 4,\n \"samples\": [\n 1.2,\n 0.0\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"slope\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 0,\n \"min\": 1,\n \"max\": 1,\n \"num_unique_values\": 1,\n \"samples\": [\n 1\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"ca\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 0,\n \"min\": 0,\n \"max\": 2,\n \"num_unique_values\": 3,\n \"samples\": [\n 0\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"thal\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 0,\n \"min\": 2,\n \"max\": 3,\n \"num_unique_values\": 2,\n \"samples\": [\n 2\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"target\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 0,\n \"min\": 0,\n \"max\": 0,\n \"num_unique_values\": 1,\n \"samples\": [\n 0\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n }\n ]\n}" + } + }, + "metadata": {}, + "execution_count": 5 + } + ] + }, + { + "cell_type": "code", + "source": [ + "# number of rows and columns in the dataset\n", + "heart_data.shape\n", + "\n" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "Ye8LlTQVeHs1", + "outputId": "2ab0e7a8-c73f-4ad4-8e0f-fa3a4c18534e" + }, + "execution_count": null, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "(303, 14)" + ] + }, + "metadata": {}, + "execution_count": 6 + } + ] + }, + { + "cell_type": "code", + "source": [ + "# getting some info about the data\n", + "heart_data.info()" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "5YZLMOwFeXF3", + "outputId": "3267941d-8cae-4d36-c703-938e84bdb0f5" + }, + "execution_count": null, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "\n", + "RangeIndex: 303 entries, 0 to 302\n", + "Data columns (total 14 columns):\n", + " # Column Non-Null Count Dtype \n", + "--- ------ -------------- ----- \n", + " 0 age 303 non-null int64 \n", + " 1 sex 303 non-null int64 \n", + " 2 cp 303 non-null int64 \n", + " 3 trestbps 303 non-null int64 \n", + " 4 chol 303 non-null int64 \n", + " 5 fbs 303 non-null int64 \n", + " 6 restecg 303 non-null int64 \n", + " 7 thalach 303 non-null int64 \n", + " 8 exang 303 non-null int64 \n", + " 9 oldpeak 303 non-null float64\n", + " 10 slope 303 non-null int64 \n", + " 11 ca 303 non-null int64 \n", + " 12 thal 303 non-null int64 \n", + " 13 target 303 non-null int64 \n", + "dtypes: float64(1), int64(13)\n", + "memory usage: 33.3 KB\n" + ] + } + ] + }, + { + "cell_type": "code", + "source": [ + "#checking for missing values\n", + "heart_data.isnull().sum()" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "QHazm2rze6Oj", + "outputId": "26e2d065-8f01-4306-c293-96e986b8a836" + }, + "execution_count": null, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "age 0\n", + "sex 0\n", + "cp 0\n", + "trestbps 0\n", + "chol 0\n", + "fbs 0\n", + "restecg 0\n", + "thalach 0\n", + "exang 0\n", + "oldpeak 0\n", + "slope 0\n", + "ca 0\n", + "thal 0\n", + "target 0\n", + "dtype: int64" + ] + }, + "metadata": {}, + "execution_count": 8 + } + ] + }, + { + "cell_type": "code", + "source": [ + "# statistical measures about the data\n", + "heart_data.describe()" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 320 + }, + "id": "nt15bvuYfBcA", + "outputId": "0d4fa9d8-d94f-4706-f640-416c9d3678fd" + }, + "execution_count": null, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + " age sex cp trestbps chol fbs \\\n", + "count 303.000000 303.000000 303.000000 303.000000 303.000000 303.000000 \n", + "mean 54.366337 0.683168 0.966997 131.623762 246.264026 0.148515 \n", + "std 9.082101 0.466011 1.032052 17.538143 51.830751 0.356198 \n", + "min 29.000000 0.000000 0.000000 94.000000 126.000000 0.000000 \n", + "25% 47.500000 0.000000 0.000000 120.000000 211.000000 0.000000 \n", + "50% 55.000000 1.000000 1.000000 130.000000 240.000000 0.000000 \n", + "75% 61.000000 1.000000 2.000000 140.000000 274.500000 0.000000 \n", + "max 77.000000 1.000000 3.000000 200.000000 564.000000 1.000000 \n", + "\n", + " restecg thalach exang oldpeak slope ca \\\n", + "count 303.000000 303.000000 303.000000 303.000000 303.000000 303.000000 \n", + "mean 0.528053 149.646865 0.326733 1.039604 1.399340 0.729373 \n", + "std 0.525860 22.905161 0.469794 1.161075 0.616226 1.022606 \n", + "min 0.000000 71.000000 0.000000 0.000000 0.000000 0.000000 \n", + "25% 0.000000 133.500000 0.000000 0.000000 1.000000 0.000000 \n", + "50% 1.000000 153.000000 0.000000 0.800000 1.000000 0.000000 \n", + "75% 1.000000 166.000000 1.000000 1.600000 2.000000 1.000000 \n", + "max 2.000000 202.000000 1.000000 6.200000 2.000000 4.000000 \n", + "\n", + " thal target \n", + "count 303.000000 303.000000 \n", + "mean 2.313531 0.544554 \n", + "std 0.612277 0.498835 \n", + "min 0.000000 0.000000 \n", + "25% 2.000000 0.000000 \n", + "50% 2.000000 1.000000 \n", + "75% 3.000000 1.000000 \n", + "max 3.000000 1.000000 " + ], + "text/html": [ + "\n", + "
\n", + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
agesexcptrestbpscholfbsrestecgthalachexangoldpeakslopecathaltarget
count303.000000303.000000303.000000303.000000303.000000303.000000303.000000303.000000303.000000303.000000303.000000303.000000303.000000303.000000
mean54.3663370.6831680.966997131.623762246.2640260.1485150.528053149.6468650.3267331.0396041.3993400.7293732.3135310.544554
std9.0821010.4660111.03205217.53814351.8307510.3561980.52586022.9051610.4697941.1610750.6162261.0226060.6122770.498835
min29.0000000.0000000.00000094.000000126.0000000.0000000.00000071.0000000.0000000.0000000.0000000.0000000.0000000.000000
25%47.5000000.0000000.000000120.000000211.0000000.0000000.000000133.5000000.0000000.0000001.0000000.0000002.0000000.000000
50%55.0000001.0000001.000000130.000000240.0000000.0000001.000000153.0000000.0000000.8000001.0000000.0000002.0000001.000000
75%61.0000001.0000002.000000140.000000274.5000000.0000001.000000166.0000001.0000001.6000002.0000001.0000003.0000001.000000
max77.0000001.0000003.000000200.000000564.0000001.0000002.000000202.0000001.0000006.2000002.0000004.0000003.0000001.000000
\n", + "
\n", + "
\n", + "\n", + "
\n", + " \n", + "\n", + " \n", + "\n", + " \n", + "
\n", + "\n", + "\n", + "
\n", + " \n", + "\n", + "\n", + "\n", + " \n", + "
\n", + "\n", + "
\n", + "
\n" + ], + "application/vnd.google.colaboratory.intrinsic+json": { + "type": "dataframe", + "summary": "{\n \"name\": \"heart_data\",\n \"rows\": 8,\n \"fields\": [\n {\n \"column\": \"age\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 92.63263171018461,\n \"min\": 9.082100989837857,\n \"max\": 303.0,\n \"num_unique_values\": 8,\n \"samples\": [\n 54.366336633663366,\n 55.0,\n 303.0\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"sex\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 106.91793021099774,\n \"min\": 0.0,\n \"max\": 303.0,\n \"num_unique_values\": 5,\n \"samples\": [\n 0.6831683168316832,\n 1.0,\n 0.46601082333962385\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"cp\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 106.72725528212327,\n \"min\": 0.0,\n \"max\": 303.0,\n \"num_unique_values\": 7,\n \"samples\": [\n 303.0,\n 0.966996699669967,\n 2.0\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"trestbps\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 82.65195263865039,\n \"min\": 17.5381428135171,\n \"max\": 303.0,\n \"num_unique_values\": 8,\n \"samples\": [\n 131.62376237623764,\n 130.0,\n 303.0\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"chol\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 150.35806568851743,\n \"min\": 51.83075098793003,\n \"max\": 564.0,\n \"num_unique_values\": 8,\n \"samples\": [\n 246.26402640264027,\n 240.0,\n 303.0\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"fbs\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 107.0512286741478,\n \"min\": 0.0,\n \"max\": 303.0,\n \"num_unique_values\": 5,\n \"samples\": [\n 0.1485148514851485,\n 1.0,\n 0.35619787492797644\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"restecg\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 106.8733588009897,\n \"min\": 0.0,\n \"max\": 303.0,\n \"num_unique_values\": 6,\n \"samples\": [\n 303.0,\n 0.528052805280528,\n 2.0\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"thalach\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 83.70384393886218,\n \"min\": 22.905161114914094,\n \"max\": 303.0,\n \"num_unique_values\": 8,\n \"samples\": [\n 149.64686468646866,\n 153.0,\n 303.0\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"exang\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 106.9862394088184,\n \"min\": 0.0,\n \"max\": 303.0,\n \"num_unique_values\": 5,\n \"samples\": [\n 0.32673267326732675,\n 1.0,\n 0.4697944645223165\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"oldpeak\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 106.59952466080658,\n \"min\": 0.0,\n \"max\": 303.0,\n \"num_unique_values\": 7,\n \"samples\": [\n 303.0,\n 1.0396039603960396,\n 1.6\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"slope\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 106.72394469173834,\n \"min\": 0.0,\n \"max\": 303.0,\n \"num_unique_values\": 6,\n \"samples\": [\n 303.0,\n 1.3993399339933994,\n 2.0\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"ca\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 106.79372080487734,\n \"min\": 0.0,\n \"max\": 303.0,\n \"num_unique_values\": 6,\n \"samples\": [\n 303.0,\n 0.7293729372937293,\n 4.0\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"thal\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 106.47909774814387,\n \"min\": 0.0,\n \"max\": 303.0,\n \"num_unique_values\": 6,\n \"samples\": [\n 303.0,\n 2.3135313531353137,\n 3.0\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"target\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 106.92326354929804,\n \"min\": 0.0,\n \"max\": 303.0,\n \"num_unique_values\": 5,\n \"samples\": [\n 0.5445544554455446,\n 1.0,\n 0.4988347841643913\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n }\n ]\n}" + } + }, + "metadata": {}, + "execution_count": 9 + } + ] + }, + { + "cell_type": "code", + "source": [ + "# checking the distribution of Target Variable\n", + "heart_data['target'].value_counts()" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "NCbxYqqNf2-4", + "outputId": "9ec352f8-5bd5-4d8e-ae54-68962baa0851" + }, + "execution_count": null, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "1 165\n", + "0 138\n", + "Name: target, dtype: int64" + ] + }, + "metadata": {}, + "execution_count": 10 + } + ] + }, + { + "cell_type": "markdown", + "source": [ + "1-->Defective heart\n", + "\n", + "0-->Healthy heart\n", + "\n", + "\n" + ], + "metadata": { + "id": "qWNMUL5_CrfC" + } + }, + { + "cell_type": "markdown", + "source": [ + "\n", + "Splitting the features and target" + ], + "metadata": { + "id": "McSs_6cTC8Ub" + } + }, + { + "cell_type": "code", + "source": [ + "x=heart_data.drop(columns='target', axis=1)\n", + "y=heart_data['target']" + ], + "metadata": { + "id": "oSgKSF5-DGVk" + }, + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "code", + "source": [ + "print(x)" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "zhiIhyMxDhWF", + "outputId": "caf4204e-9079-4a26-a933-b9c9f93f6dd9" + }, + "execution_count": null, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + " age sex cp trestbps chol fbs restecg thalach exang oldpeak \\\n", + "0 63 1 3 145 233 1 0 150 0 2.3 \n", + "1 37 1 2 130 250 0 1 187 0 3.5 \n", + "2 41 0 1 130 204 0 0 172 0 1.4 \n", + "3 56 1 1 120 236 0 1 178 0 0.8 \n", + "4 57 0 0 120 354 0 1 163 1 0.6 \n", + ".. ... ... .. ... ... ... ... ... ... ... \n", + "298 57 0 0 140 241 0 1 123 1 0.2 \n", + "299 45 1 3 110 264 0 1 132 0 1.2 \n", + "300 68 1 0 144 193 1 1 141 0 3.4 \n", + "301 57 1 0 130 131 0 1 115 1 1.2 \n", + "302 57 0 1 130 236 0 0 174 0 0.0 \n", + "\n", + " slope ca thal \n", + "0 0 0 1 \n", + "1 0 0 2 \n", + "2 2 0 2 \n", + "3 2 0 2 \n", + "4 2 0 2 \n", + ".. ... .. ... \n", + "298 1 0 3 \n", + "299 1 0 3 \n", + "300 1 2 3 \n", + "301 1 1 3 \n", + "302 1 1 2 \n", + "\n", + "[303 rows x 13 columns]\n" + ] + } + ] + }, + { + "cell_type": "code", + "source": [ + "print(y)" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "VpOvdXWgHWmI", + "outputId": "113ab45d-b6a3-45a1-912f-54a86cf21df2" + }, + "execution_count": null, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "0 1\n", + "1 1\n", + "2 1\n", + "3 1\n", + "4 1\n", + " ..\n", + "298 0\n", + "299 0\n", + "300 0\n", + "301 0\n", + "302 0\n", + "Name: target, Length: 303, dtype: int64\n" + ] + } + ] + }, + { + "cell_type": "markdown", + "source": [ + "Splitting data into Training data" + ], + "metadata": { + "id": "5A20XMHYII3T" + } + }, + { + "cell_type": "code", + "source": [ + "x_train,x_test,y_train,y_test=train_test_split(x,y,test_size=0.3,stratify=y,random_state=3)" + ], + "metadata": { + "id": "SNK4hm8DIPSm" + }, + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "code", + "source": [ + "print(x.shape,x_train.shape,x_test.shape)" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "7OTKtdA-JLCV", + "outputId": "cb0304a8-2605-4f7e-8510-7b23a0fbd4dc" + }, + "execution_count": null, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "(303, 13) (212, 13) (91, 13)\n" + ] + } + ] + }, + { + "cell_type": "markdown", + "source": [ + "MODEL TRAINING" + ], + "metadata": { + "id": "ne2RibQaJdNe" + } + }, + { + "cell_type": "markdown", + "source": [ + "LOGISTIC REGRESSION" + ], + "metadata": { + "id": "AUEblGtLJlzD" + } + }, + { + "cell_type": "code", + "source": [ + "model=LogisticRegression()" + ], + "metadata": { + "id": "k-IIz1pzJtRd" + }, + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "code", + "source": [ + "# Training the logistic regression model with training data\n", + "model.fit(x_train,y_train)" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 233 + }, + "id": "kqFKrLzlJ0N0", + "outputId": "3bb02431-b194-4619-a85f-8fbe87e779e6" + }, + "execution_count": null, + "outputs": [ + { + "output_type": "stream", + "name": "stderr", + "text": [ + "/usr/local/lib/python3.10/dist-packages/sklearn/linear_model/_logistic.py:458: ConvergenceWarning: lbfgs failed to converge (status=1):\n", + "STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.\n", + "\n", + "Increase the number of iterations (max_iter) or scale the data as shown in:\n", + " https://scikit-learn.org/stable/modules/preprocessing.html\n", + "Please also refer to the documentation for alternative solver options:\n", + " https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression\n", + " n_iter_i = _check_optimize_result(\n" + ] + }, + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "LogisticRegression()" + ], + "text/html": [ + "
LogisticRegression()
In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook.
On GitHub, the HTML representation is unable to render, please try loading this page with nbviewer.org.
" + ] + }, + "metadata": {}, + "execution_count": 17 + } + ] + }, + { + "cell_type": "markdown", + "source": [ + "Model Evaluation\n", + "\n", + "Accuracy Score" + ], + "metadata": { + "id": "aPahD6MLKaPU" + } + }, + { + "cell_type": "code", + "source": [ + "#accuracy on training data\n", + "x_train_prediction=model.predict(x_train)\n", + "training_data_accuracy=accuracy_score(x_train_prediction,y_train)" + ], + "metadata": { + "id": "NHy61zdJKDR1" + }, + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "code", + "source": [ + "print('Accuracy on Training data:',training_data_accuracy)" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "J4XiNRwXLCXf", + "outputId": "dd55ea56-948e-4bc3-b98d-273431014230" + }, + "execution_count": null, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Accuracy on Training data: 0.8679245283018868\n" + ] + } + ] + }, + { + "cell_type": "code", + "source": [ + "#accuracy on test data\n", + "x_test_prediction=model.predict(x_test)\n", + "testing_data_accuracy=accuracy_score(x_test_prediction,y_test)" + ], + "metadata": { + "id": "ehbFgWjhLK44" + }, + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "code", + "source": [ + "print('Accuracy on Testing data:',testing_data_accuracy)" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "jYZIcbiVLs0G", + "outputId": "e7a36667-528e-42d1-e338-eeadc11c4947" + }, + "execution_count": null, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Accuracy on Testing data: 0.8021978021978022\n" + ] + } + ] + }, + { + "cell_type": "markdown", + "source": [ + "BUILDING PREDICTING SYSTEM" + ], + "metadata": { + "id": "rec6Gz8vMP_G" + } + }, + { + "cell_type": "code", + "source": [ + "input_data=(44,0,0,130,60,0,0,131,1,2.2,1,3,3)\n", + "# change the input data into numpy array\n", + "input_data_as_numpy_array=np.asarray(input_data)\n", + "#reshape the numpy array as we are predicting for only on instance\n", + "input_data_reshaped =input_data_as_numpy_array.reshape(1,-1)\n", + "prediction=model.predict(input_data_reshaped)\n", + "print(prediction)\n", + "if (prediction[0]==0):\n", + " print(\"The person does not have heart disease\")\n", + "else:\n", + " print(\"the person has heart disease\")\n", + "\n" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "Ky2mzQUgL9IU", + "outputId": "7301a0b0-8bb7-4dff-bee4-a7f26dd4b00c" + }, + "execution_count": null, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "[0]\n", + "The person does not have heart disease\n" + ] + }, + { + "output_type": "stream", + "name": "stderr", + "text": [ + "/usr/local/lib/python3.10/dist-packages/sklearn/base.py:439: UserWarning: X does not have valid feature names, but LogisticRegression was fitted with feature names\n", + " warnings.warn(\n" + ] + } + ] + }, + { + "cell_type": "code", + "source": [ + "input_data=(65\t,1,\t0\t,120\t,177\t,0,\t1\t,140,\t0,\t0.4,\t2,\t0,\t3)\n", + "# change the input data into numpy array\n", + "input_data_as_numpy_array=np.asarray(input_data)\n", + "#reshape the numpy array as we are predicting for only on instance\n", + "input_data_reshaped =input_data_as_numpy_array.reshape(1,-1)\n", + "prediction=model.predict(input_data_reshaped)\n", + "print(prediction)\n", + "if (prediction[0]==0):\n", + " print(\"The person does not have heart disease\")\n", + "else:\n", + " print(\"the person has heart disease\")" + ], + "metadata": { + "id": "WCbZkDR7PCyB", + "colab": { + "base_uri": "https://localhost:8080/" + }, + "outputId": "d7158150-6fe9-4271-833f-d16e78d435c6" + }, + "execution_count": null, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "[1]\n", + "the person has heart disease\n" + ] + }, + { + "output_type": "stream", + "name": "stderr", + "text": [ + "/usr/local/lib/python3.10/dist-packages/sklearn/base.py:439: UserWarning: X does not have valid feature names, but LogisticRegression was fitted with feature names\n", + " warnings.warn(\n" + ] + } + ] + } + ] +} \ No newline at end of file From bab1ac842093286f6452fe48342f6c4a145fa57b Mon Sep 17 00:00:00 2001 From: Sudiksha Thatipelli <163149118+Sudiksha18@users.noreply.github.com> Date: Sun, 12 May 2024 17:48:24 +0530 Subject: [PATCH 02/12] Created using Colab From 12512a12c361687d5adc6a900854b5402d999e7a Mon Sep 17 00:00:00 2001 From: Sudiksha Thatipelli <163149118+Sudiksha18@users.noreply.github.com> Date: Sun, 12 May 2024 17:56:07 +0530 Subject: [PATCH 03/12] Heart_Disease_Prediction.ipynb --- Heart_Disease_Prediction.ipynb | 2088 ++++---------------------------- 1 file changed, 252 insertions(+), 1836 deletions(-) diff --git a/Heart_Disease_Prediction.ipynb b/Heart_Disease_Prediction.ipynb index f5d64999..a6e99747 100644 --- a/Heart_Disease_Prediction.ipynb +++ b/Heart_Disease_Prediction.ipynb @@ -1,1836 +1,252 @@ -{ - "nbformat": 4, - "nbformat_minor": 0, - "metadata": { - "colab": { - "provenance": [], - "authorship_tag": "ABX9TyMjJQcXr2muZKXGUdpyVh/h", - "include_colab_link": true - }, - "kernelspec": { - "name": "python3", - "display_name": "Python 3" - }, - "language_info": { - "name": "python" - } - }, - "cells": [ - { - "cell_type": "markdown", - "metadata": { - "id": "view-in-github", - "colab_type": "text" - }, - "source": [ - "\"Open" - ] - }, - { - "cell_type": "markdown", - "source": [ - "Importing the Dependencies\n" - ], - "metadata": { - "id": "Cj2SOXgaZt-Q" - } - }, - { - "cell_type": "code", - "source": [ - "import numpy as np\n", - "import pandas as pd\n", - "from sklearn.model_selection import train_test_split\n", - "from sklearn.linear_model import LogisticRegression\n", - "from sklearn.metrics import accuracy_score" - ], - "metadata": { - "id": "k850UGz1Z03B" - }, - "execution_count": null, - "outputs": [] - }, - { - "cell_type": "markdown", - "source": [ - "Data Collection and Processing\n" - ], - "metadata": { - "id": "APYsimt8bDoD" - } - }, - { - "cell_type": "code", - "source": [ - "#loading the csv data to a Pandas DataFrame\n", - "heart_data= pd.read_csv('/content/heart_disease_data.csv')" - ], - "metadata": { - "id": "RJg3aA91Z0-u" - }, - "execution_count": null, - "outputs": [] - }, - { - "cell_type": "code", - "source": [ - "#print first 5 rows of the datase\n", - "heart_data.head()\n" - ], - "metadata": { - "colab": { - "base_uri": "https://localhost:8080/", - "height": 206 - }, - "id": "BnoQ8u4hdZ8Z", - "outputId": "30199964-f01f-4d2c-9a9d-93c7c7ae6d72" - }, - "execution_count": null, - "outputs": [ - { - "output_type": "execute_result", - "data": { - "text/plain": [ - " age sex cp trestbps chol fbs restecg thalach exang oldpeak slope \\\n", - "0 63 1 3 145 233 1 0 150 0 2.3 0 \n", - "1 37 1 2 130 250 0 1 187 0 3.5 0 \n", - "2 41 0 1 130 204 0 0 172 0 1.4 2 \n", - "3 56 1 1 120 236 0 1 178 0 0.8 2 \n", - "4 57 0 0 120 354 0 1 163 1 0.6 2 \n", - "\n", - " ca thal target \n", - "0 0 1 1 \n", - "1 0 2 1 \n", - "2 0 2 1 \n", - "3 0 2 1 \n", - "4 0 2 1 " - ], - "text/html": [ - "\n", - "
\n", - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
agesexcptrestbpscholfbsrestecgthalachexangoldpeakslopecathaltarget
063131452331015002.30011
137121302500118703.50021
241011302040017201.42021
356111202360117800.82021
457001203540116310.62021
\n", - "
\n", - "
\n", - "\n", - "
\n", - " \n", - "\n", - " \n", - "\n", - " \n", - "
\n", - "\n", - "\n", - "
\n", - " \n", - "\n", - "\n", - "\n", - " \n", - "
\n", - "\n", - "
\n", - "
\n" - ], - "application/vnd.google.colaboratory.intrinsic+json": { - "type": "dataframe", - "variable_name": "heart_data", - "summary": "{\n \"name\": \"heart_data\",\n \"rows\": 303,\n \"fields\": [\n {\n \"column\": \"age\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 9,\n \"min\": 29,\n \"max\": 77,\n \"num_unique_values\": 41,\n \"samples\": [\n 46,\n 66,\n 48\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"sex\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 0,\n \"min\": 0,\n \"max\": 1,\n \"num_unique_values\": 2,\n \"samples\": [\n 0,\n 1\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"cp\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 1,\n \"min\": 0,\n \"max\": 3,\n \"num_unique_values\": 4,\n \"samples\": [\n 2,\n 0\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"trestbps\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 17,\n \"min\": 94,\n \"max\": 200,\n \"num_unique_values\": 49,\n \"samples\": [\n 104,\n 123\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"chol\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 51,\n \"min\": 126,\n \"max\": 564,\n \"num_unique_values\": 152,\n \"samples\": [\n 277,\n 169\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"fbs\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 0,\n \"min\": 0,\n \"max\": 1,\n \"num_unique_values\": 2,\n \"samples\": [\n 0,\n 1\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"restecg\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 0,\n \"min\": 0,\n \"max\": 2,\n \"num_unique_values\": 3,\n \"samples\": [\n 0,\n 1\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"thalach\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 22,\n \"min\": 71,\n \"max\": 202,\n \"num_unique_values\": 91,\n \"samples\": [\n 159,\n 152\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"exang\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 0,\n \"min\": 0,\n \"max\": 1,\n \"num_unique_values\": 2,\n \"samples\": [\n 1,\n 0\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"oldpeak\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 1.1610750220686348,\n \"min\": 0.0,\n \"max\": 6.2,\n \"num_unique_values\": 40,\n \"samples\": [\n 1.9,\n 3.0\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"slope\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 0,\n \"min\": 0,\n \"max\": 2,\n \"num_unique_values\": 3,\n \"samples\": [\n 0,\n 2\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"ca\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 1,\n \"min\": 0,\n \"max\": 4,\n \"num_unique_values\": 5,\n \"samples\": [\n 2,\n 4\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"thal\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 0,\n \"min\": 0,\n \"max\": 3,\n \"num_unique_values\": 4,\n \"samples\": [\n 2,\n 0\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"target\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 0,\n \"min\": 0,\n \"max\": 1,\n \"num_unique_values\": 2,\n \"samples\": [\n 0,\n 1\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n }\n ]\n}" - } - }, - "metadata": {}, - "execution_count": 4 - } - ] - }, - { - "cell_type": "code", - "source": [ - "heart_data.tail()\n" - ], - "metadata": { - "colab": { - "base_uri": "https://localhost:8080/", - "height": 206 - }, - "id": "NQwDjwwGeBF4", - "outputId": "0ad96c3c-0609-4365-b667-16204c4087e4" - }, - "execution_count": null, - "outputs": [ - { - "output_type": "execute_result", - "data": { - "text/plain": [ - " age sex cp trestbps chol fbs restecg thalach exang oldpeak \\\n", - "298 57 0 0 140 241 0 1 123 1 0.2 \n", - "299 45 1 3 110 264 0 1 132 0 1.2 \n", - "300 68 1 0 144 193 1 1 141 0 3.4 \n", - "301 57 1 0 130 131 0 1 115 1 1.2 \n", - "302 57 0 1 130 236 0 0 174 0 0.0 \n", - "\n", - " slope ca thal target \n", - "298 1 0 3 0 \n", - "299 1 0 3 0 \n", - "300 1 2 3 0 \n", - "301 1 1 3 0 \n", - "302 1 1 2 0 " - ], - "text/html": [ - "\n", - "
\n", - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
agesexcptrestbpscholfbsrestecgthalachexangoldpeakslopecathaltarget
29857001402410112310.21030
29945131102640113201.21030
30068101441931114103.41230
30157101301310111511.21130
30257011302360017400.01120
\n", - "
\n", - "
\n", - "\n", - "
\n", - " \n", - "\n", - " \n", - "\n", - " \n", - "
\n", - "\n", - "\n", - "
\n", - " \n", - "\n", - "\n", - "\n", - " \n", - "
\n", - "\n", - "
\n", - "
\n" - ], - "application/vnd.google.colaboratory.intrinsic+json": { - "type": "dataframe", - "summary": "{\n \"name\": \"heart_data\",\n \"rows\": 5,\n \"fields\": [\n {\n \"column\": \"age\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 8,\n \"min\": 45,\n \"max\": 68,\n \"num_unique_values\": 3,\n \"samples\": [\n 57,\n 45,\n 68\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"sex\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 0,\n \"min\": 0,\n \"max\": 1,\n \"num_unique_values\": 2,\n \"samples\": [\n 1,\n 0\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"cp\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 1,\n \"min\": 0,\n \"max\": 3,\n \"num_unique_values\": 3,\n \"samples\": [\n 0,\n 3\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"trestbps\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 13,\n \"min\": 110,\n \"max\": 144,\n \"num_unique_values\": 4,\n \"samples\": [\n 110,\n 130\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"chol\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 52,\n \"min\": 131,\n \"max\": 264,\n \"num_unique_values\": 5,\n \"samples\": [\n 264,\n 236\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"fbs\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 0,\n \"min\": 0,\n \"max\": 1,\n \"num_unique_values\": 2,\n \"samples\": [\n 1,\n 0\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"restecg\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 0,\n \"min\": 0,\n \"max\": 1,\n \"num_unique_values\": 2,\n \"samples\": [\n 0,\n 1\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"thalach\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 22,\n \"min\": 115,\n \"max\": 174,\n \"num_unique_values\": 5,\n \"samples\": [\n 132,\n 174\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"exang\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 0,\n \"min\": 0,\n \"max\": 1,\n \"num_unique_values\": 2,\n \"samples\": [\n 0,\n 1\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"oldpeak\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 1.3490737563232043,\n \"min\": 0.0,\n \"max\": 3.4,\n \"num_unique_values\": 4,\n \"samples\": [\n 1.2,\n 0.0\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"slope\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 0,\n \"min\": 1,\n \"max\": 1,\n \"num_unique_values\": 1,\n \"samples\": [\n 1\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"ca\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 0,\n \"min\": 0,\n \"max\": 2,\n \"num_unique_values\": 3,\n \"samples\": [\n 0\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"thal\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 0,\n \"min\": 2,\n \"max\": 3,\n \"num_unique_values\": 2,\n \"samples\": [\n 2\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"target\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 0,\n \"min\": 0,\n \"max\": 0,\n \"num_unique_values\": 1,\n \"samples\": [\n 0\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n }\n ]\n}" - } - }, - "metadata": {}, - "execution_count": 5 - } - ] - }, - { - "cell_type": "code", - "source": [ - "# number of rows and columns in the dataset\n", - "heart_data.shape\n", - "\n" - ], - "metadata": { - "colab": { - "base_uri": "https://localhost:8080/" - }, - "id": "Ye8LlTQVeHs1", - "outputId": "2ab0e7a8-c73f-4ad4-8e0f-fa3a4c18534e" - }, - "execution_count": null, - "outputs": [ - { - "output_type": "execute_result", - "data": { - "text/plain": [ - "(303, 14)" - ] - }, - "metadata": {}, - "execution_count": 6 - } - ] - }, - { - "cell_type": "code", - "source": [ - "# getting some info about the data\n", - "heart_data.info()" - ], - "metadata": { - "colab": { - "base_uri": "https://localhost:8080/" - }, - "id": "5YZLMOwFeXF3", - "outputId": "3267941d-8cae-4d36-c703-938e84bdb0f5" - }, - "execution_count": null, - "outputs": [ - { - "output_type": "stream", - "name": "stdout", - "text": [ - "\n", - "RangeIndex: 303 entries, 0 to 302\n", - "Data columns (total 14 columns):\n", - " # Column Non-Null Count Dtype \n", - "--- ------ -------------- ----- \n", - " 0 age 303 non-null int64 \n", - " 1 sex 303 non-null int64 \n", - " 2 cp 303 non-null int64 \n", - " 3 trestbps 303 non-null int64 \n", - " 4 chol 303 non-null int64 \n", - " 5 fbs 303 non-null int64 \n", - " 6 restecg 303 non-null int64 \n", - " 7 thalach 303 non-null int64 \n", - " 8 exang 303 non-null int64 \n", - " 9 oldpeak 303 non-null float64\n", - " 10 slope 303 non-null int64 \n", - " 11 ca 303 non-null int64 \n", - " 12 thal 303 non-null int64 \n", - " 13 target 303 non-null int64 \n", - "dtypes: float64(1), int64(13)\n", - "memory usage: 33.3 KB\n" - ] - } - ] - }, - { - "cell_type": "code", - "source": [ - "#checking for missing values\n", - "heart_data.isnull().sum()" - ], - "metadata": { - "colab": { - "base_uri": "https://localhost:8080/" - }, - "id": "QHazm2rze6Oj", - "outputId": "26e2d065-8f01-4306-c293-96e986b8a836" - }, - "execution_count": null, - "outputs": [ - { - "output_type": "execute_result", - "data": { - "text/plain": [ - "age 0\n", - "sex 0\n", - "cp 0\n", - "trestbps 0\n", - "chol 0\n", - "fbs 0\n", - "restecg 0\n", - "thalach 0\n", - "exang 0\n", - "oldpeak 0\n", - "slope 0\n", - "ca 0\n", - "thal 0\n", - "target 0\n", - "dtype: int64" - ] - }, - "metadata": {}, - "execution_count": 8 - } - ] - }, - { - "cell_type": "code", - "source": [ - "# statistical measures about the data\n", - "heart_data.describe()" - ], - "metadata": { - "colab": { - "base_uri": "https://localhost:8080/", - "height": 320 - }, - "id": "nt15bvuYfBcA", - "outputId": "0d4fa9d8-d94f-4706-f640-416c9d3678fd" - }, - "execution_count": null, - "outputs": [ - { - "output_type": "execute_result", - "data": { - "text/plain": [ - " age sex cp trestbps chol fbs \\\n", - "count 303.000000 303.000000 303.000000 303.000000 303.000000 303.000000 \n", - "mean 54.366337 0.683168 0.966997 131.623762 246.264026 0.148515 \n", - "std 9.082101 0.466011 1.032052 17.538143 51.830751 0.356198 \n", - "min 29.000000 0.000000 0.000000 94.000000 126.000000 0.000000 \n", - "25% 47.500000 0.000000 0.000000 120.000000 211.000000 0.000000 \n", - "50% 55.000000 1.000000 1.000000 130.000000 240.000000 0.000000 \n", - "75% 61.000000 1.000000 2.000000 140.000000 274.500000 0.000000 \n", - "max 77.000000 1.000000 3.000000 200.000000 564.000000 1.000000 \n", - "\n", - " restecg thalach exang oldpeak slope ca \\\n", - "count 303.000000 303.000000 303.000000 303.000000 303.000000 303.000000 \n", - "mean 0.528053 149.646865 0.326733 1.039604 1.399340 0.729373 \n", - "std 0.525860 22.905161 0.469794 1.161075 0.616226 1.022606 \n", - "min 0.000000 71.000000 0.000000 0.000000 0.000000 0.000000 \n", - "25% 0.000000 133.500000 0.000000 0.000000 1.000000 0.000000 \n", - "50% 1.000000 153.000000 0.000000 0.800000 1.000000 0.000000 \n", - "75% 1.000000 166.000000 1.000000 1.600000 2.000000 1.000000 \n", - "max 2.000000 202.000000 1.000000 6.200000 2.000000 4.000000 \n", - "\n", - " thal target \n", - "count 303.000000 303.000000 \n", - "mean 2.313531 0.544554 \n", - "std 0.612277 0.498835 \n", - "min 0.000000 0.000000 \n", - "25% 2.000000 0.000000 \n", - "50% 2.000000 1.000000 \n", - "75% 3.000000 1.000000 \n", - "max 3.000000 1.000000 " - ], - "text/html": [ - "\n", - "
\n", - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
agesexcptrestbpscholfbsrestecgthalachexangoldpeakslopecathaltarget
count303.000000303.000000303.000000303.000000303.000000303.000000303.000000303.000000303.000000303.000000303.000000303.000000303.000000303.000000
mean54.3663370.6831680.966997131.623762246.2640260.1485150.528053149.6468650.3267331.0396041.3993400.7293732.3135310.544554
std9.0821010.4660111.03205217.53814351.8307510.3561980.52586022.9051610.4697941.1610750.6162261.0226060.6122770.498835
min29.0000000.0000000.00000094.000000126.0000000.0000000.00000071.0000000.0000000.0000000.0000000.0000000.0000000.000000
25%47.5000000.0000000.000000120.000000211.0000000.0000000.000000133.5000000.0000000.0000001.0000000.0000002.0000000.000000
50%55.0000001.0000001.000000130.000000240.0000000.0000001.000000153.0000000.0000000.8000001.0000000.0000002.0000001.000000
75%61.0000001.0000002.000000140.000000274.5000000.0000001.000000166.0000001.0000001.6000002.0000001.0000003.0000001.000000
max77.0000001.0000003.000000200.000000564.0000001.0000002.000000202.0000001.0000006.2000002.0000004.0000003.0000001.000000
\n", - "
\n", - "
\n", - "\n", - "
\n", - " \n", - "\n", - " \n", - "\n", - " \n", - "
\n", - "\n", - "\n", - "
\n", - " \n", - "\n", - "\n", - "\n", - " \n", - "
\n", - "\n", - "
\n", - "
\n" - ], - "application/vnd.google.colaboratory.intrinsic+json": { - "type": "dataframe", - "summary": "{\n \"name\": \"heart_data\",\n \"rows\": 8,\n \"fields\": [\n {\n \"column\": \"age\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 92.63263171018461,\n \"min\": 9.082100989837857,\n \"max\": 303.0,\n \"num_unique_values\": 8,\n \"samples\": [\n 54.366336633663366,\n 55.0,\n 303.0\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"sex\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 106.91793021099774,\n \"min\": 0.0,\n \"max\": 303.0,\n \"num_unique_values\": 5,\n \"samples\": [\n 0.6831683168316832,\n 1.0,\n 0.46601082333962385\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"cp\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 106.72725528212327,\n \"min\": 0.0,\n \"max\": 303.0,\n \"num_unique_values\": 7,\n \"samples\": [\n 303.0,\n 0.966996699669967,\n 2.0\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"trestbps\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 82.65195263865039,\n \"min\": 17.5381428135171,\n \"max\": 303.0,\n \"num_unique_values\": 8,\n \"samples\": [\n 131.62376237623764,\n 130.0,\n 303.0\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"chol\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 150.35806568851743,\n \"min\": 51.83075098793003,\n \"max\": 564.0,\n \"num_unique_values\": 8,\n \"samples\": [\n 246.26402640264027,\n 240.0,\n 303.0\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"fbs\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 107.0512286741478,\n \"min\": 0.0,\n \"max\": 303.0,\n \"num_unique_values\": 5,\n \"samples\": [\n 0.1485148514851485,\n 1.0,\n 0.35619787492797644\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"restecg\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 106.8733588009897,\n \"min\": 0.0,\n \"max\": 303.0,\n \"num_unique_values\": 6,\n \"samples\": [\n 303.0,\n 0.528052805280528,\n 2.0\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"thalach\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 83.70384393886218,\n \"min\": 22.905161114914094,\n \"max\": 303.0,\n \"num_unique_values\": 8,\n \"samples\": [\n 149.64686468646866,\n 153.0,\n 303.0\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"exang\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 106.9862394088184,\n \"min\": 0.0,\n \"max\": 303.0,\n \"num_unique_values\": 5,\n \"samples\": [\n 0.32673267326732675,\n 1.0,\n 0.4697944645223165\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"oldpeak\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 106.59952466080658,\n \"min\": 0.0,\n \"max\": 303.0,\n \"num_unique_values\": 7,\n \"samples\": [\n 303.0,\n 1.0396039603960396,\n 1.6\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"slope\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 106.72394469173834,\n \"min\": 0.0,\n \"max\": 303.0,\n \"num_unique_values\": 6,\n \"samples\": [\n 303.0,\n 1.3993399339933994,\n 2.0\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"ca\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 106.79372080487734,\n \"min\": 0.0,\n \"max\": 303.0,\n \"num_unique_values\": 6,\n \"samples\": [\n 303.0,\n 0.7293729372937293,\n 4.0\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"thal\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 106.47909774814387,\n \"min\": 0.0,\n \"max\": 303.0,\n \"num_unique_values\": 6,\n \"samples\": [\n 303.0,\n 2.3135313531353137,\n 3.0\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"target\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 106.92326354929804,\n \"min\": 0.0,\n \"max\": 303.0,\n \"num_unique_values\": 5,\n \"samples\": [\n 0.5445544554455446,\n 1.0,\n 0.4988347841643913\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n }\n ]\n}" - } - }, - "metadata": {}, - "execution_count": 9 - } - ] - }, - { - "cell_type": "code", - "source": [ - "# checking the distribution of Target Variable\n", - "heart_data['target'].value_counts()" - ], - "metadata": { - "colab": { - "base_uri": "https://localhost:8080/" - }, - "id": "NCbxYqqNf2-4", - "outputId": "9ec352f8-5bd5-4d8e-ae54-68962baa0851" - }, - "execution_count": null, - "outputs": [ - { - "output_type": "execute_result", - "data": { - "text/plain": [ - "1 165\n", - "0 138\n", - "Name: target, dtype: int64" - ] - }, - "metadata": {}, - "execution_count": 10 - } - ] - }, - { - "cell_type": "markdown", - "source": [ - "1-->Defective heart\n", - "\n", - "0-->Healthy heart\n", - "\n", - "\n" - ], - "metadata": { - "id": "qWNMUL5_CrfC" - } - }, - { - "cell_type": "markdown", - "source": [ - "\n", - "Splitting the features and target" - ], - "metadata": { - "id": "McSs_6cTC8Ub" - } - }, - { - "cell_type": "code", - "source": [ - "x=heart_data.drop(columns='target', axis=1)\n", - "y=heart_data['target']" - ], - "metadata": { - "id": "oSgKSF5-DGVk" - }, - "execution_count": null, - "outputs": [] - }, - { - "cell_type": "code", - "source": [ - "print(x)" - ], - "metadata": { - "colab": { - "base_uri": "https://localhost:8080/" - }, - "id": "zhiIhyMxDhWF", - "outputId": "caf4204e-9079-4a26-a933-b9c9f93f6dd9" - }, - "execution_count": null, - "outputs": [ - { - "output_type": "stream", - "name": "stdout", - "text": [ - " age sex cp trestbps chol fbs restecg thalach exang oldpeak \\\n", - "0 63 1 3 145 233 1 0 150 0 2.3 \n", - "1 37 1 2 130 250 0 1 187 0 3.5 \n", - "2 41 0 1 130 204 0 0 172 0 1.4 \n", - "3 56 1 1 120 236 0 1 178 0 0.8 \n", - "4 57 0 0 120 354 0 1 163 1 0.6 \n", - ".. ... ... .. ... ... ... ... ... ... ... \n", - "298 57 0 0 140 241 0 1 123 1 0.2 \n", - "299 45 1 3 110 264 0 1 132 0 1.2 \n", - "300 68 1 0 144 193 1 1 141 0 3.4 \n", - "301 57 1 0 130 131 0 1 115 1 1.2 \n", - "302 57 0 1 130 236 0 0 174 0 0.0 \n", - "\n", - " slope ca thal \n", - "0 0 0 1 \n", - "1 0 0 2 \n", - "2 2 0 2 \n", - "3 2 0 2 \n", - "4 2 0 2 \n", - ".. ... .. ... \n", - "298 1 0 3 \n", - "299 1 0 3 \n", - "300 1 2 3 \n", - "301 1 1 3 \n", - "302 1 1 2 \n", - "\n", - "[303 rows x 13 columns]\n" - ] - } - ] - }, - { - "cell_type": "code", - "source": [ - "print(y)" - ], - "metadata": { - "colab": { - "base_uri": "https://localhost:8080/" - }, - "id": "VpOvdXWgHWmI", - "outputId": "113ab45d-b6a3-45a1-912f-54a86cf21df2" - }, - "execution_count": null, - "outputs": [ - { - "output_type": "stream", - "name": "stdout", - "text": [ - "0 1\n", - "1 1\n", - "2 1\n", - "3 1\n", - "4 1\n", - " ..\n", - "298 0\n", - "299 0\n", - "300 0\n", - "301 0\n", - "302 0\n", - "Name: target, Length: 303, dtype: int64\n" - ] - } - ] - }, - { - "cell_type": "markdown", - "source": [ - "Splitting data into Training data" - ], - "metadata": { - "id": "5A20XMHYII3T" - } - }, - { - "cell_type": "code", - "source": [ - "x_train,x_test,y_train,y_test=train_test_split(x,y,test_size=0.3,stratify=y,random_state=3)" - ], - "metadata": { - "id": "SNK4hm8DIPSm" - }, - "execution_count": null, - "outputs": [] - }, - { - "cell_type": "code", - "source": [ - "print(x.shape,x_train.shape,x_test.shape)" - ], - "metadata": { - "colab": { - "base_uri": "https://localhost:8080/" - }, - "id": "7OTKtdA-JLCV", - "outputId": "cb0304a8-2605-4f7e-8510-7b23a0fbd4dc" - }, - "execution_count": null, - "outputs": [ - { - "output_type": "stream", - "name": "stdout", - "text": [ - "(303, 13) (212, 13) (91, 13)\n" - ] - } - ] - }, - { - "cell_type": "markdown", - "source": [ - "MODEL TRAINING" - ], - "metadata": { - "id": "ne2RibQaJdNe" - } - }, - { - "cell_type": "markdown", - "source": [ - "LOGISTIC REGRESSION" - ], - "metadata": { - "id": "AUEblGtLJlzD" - } - }, - { - "cell_type": "code", - "source": [ - "model=LogisticRegression()" - ], - "metadata": { - "id": "k-IIz1pzJtRd" - }, - "execution_count": null, - "outputs": [] - }, - { - "cell_type": "code", - "source": [ - "# Training the logistic regression model with training data\n", - "model.fit(x_train,y_train)" - ], - "metadata": { - "colab": { - "base_uri": "https://localhost:8080/", - "height": 233 - }, - "id": "kqFKrLzlJ0N0", - "outputId": "3bb02431-b194-4619-a85f-8fbe87e779e6" - }, - "execution_count": null, - "outputs": [ - { - "output_type": "stream", - "name": "stderr", - "text": [ - "/usr/local/lib/python3.10/dist-packages/sklearn/linear_model/_logistic.py:458: ConvergenceWarning: lbfgs failed to converge (status=1):\n", - "STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.\n", - "\n", - "Increase the number of iterations (max_iter) or scale the data as shown in:\n", - " https://scikit-learn.org/stable/modules/preprocessing.html\n", - "Please also refer to the documentation for alternative solver options:\n", - " https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression\n", - " n_iter_i = _check_optimize_result(\n" - ] - }, - { - "output_type": "execute_result", - "data": { - "text/plain": [ - "LogisticRegression()" - ], - "text/html": [ - "
LogisticRegression()
In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook.
On GitHub, the HTML representation is unable to render, please try loading this page with nbviewer.org.
" - ] - }, - "metadata": {}, - "execution_count": 17 - } - ] - }, - { - "cell_type": "markdown", - "source": [ - "Model Evaluation\n", - "\n", - "Accuracy Score" - ], - "metadata": { - "id": "aPahD6MLKaPU" - } - }, - { - "cell_type": "code", - "source": [ - "#accuracy on training data\n", - "x_train_prediction=model.predict(x_train)\n", - "training_data_accuracy=accuracy_score(x_train_prediction,y_train)" - ], - "metadata": { - "id": "NHy61zdJKDR1" - }, - "execution_count": null, - "outputs": [] - }, - { - "cell_type": "code", - "source": [ - "print('Accuracy on Training data:',training_data_accuracy)" - ], - "metadata": { - "colab": { - "base_uri": "https://localhost:8080/" - }, - "id": "J4XiNRwXLCXf", - "outputId": "dd55ea56-948e-4bc3-b98d-273431014230" - }, - "execution_count": null, - "outputs": [ - { - "output_type": "stream", - "name": "stdout", - "text": [ - "Accuracy on Training data: 0.8679245283018868\n" - ] - } - ] - }, - { - "cell_type": "code", - "source": [ - "#accuracy on test data\n", - "x_test_prediction=model.predict(x_test)\n", - "testing_data_accuracy=accuracy_score(x_test_prediction,y_test)" - ], - "metadata": { - "id": "ehbFgWjhLK44" - }, - "execution_count": null, - "outputs": [] - }, - { - "cell_type": "code", - "source": [ - "print('Accuracy on Testing data:',testing_data_accuracy)" - ], - "metadata": { - "colab": { - "base_uri": "https://localhost:8080/" - }, - "id": "jYZIcbiVLs0G", - "outputId": "e7a36667-528e-42d1-e338-eeadc11c4947" - }, - "execution_count": null, - "outputs": [ - { - "output_type": "stream", - "name": "stdout", - "text": [ - "Accuracy on Testing data: 0.8021978021978022\n" - ] - } - ] - }, - { - "cell_type": "markdown", - "source": [ - "BUILDING PREDICTING SYSTEM" - ], - "metadata": { - "id": "rec6Gz8vMP_G" - } - }, - { - "cell_type": "code", - "source": [ - "input_data=(44,0,0,130,60,0,0,131,1,2.2,1,3,3)\n", - "# change the input data into numpy array\n", - "input_data_as_numpy_array=np.asarray(input_data)\n", - "#reshape the numpy array as we are predicting for only on instance\n", - "input_data_reshaped =input_data_as_numpy_array.reshape(1,-1)\n", - "prediction=model.predict(input_data_reshaped)\n", - "print(prediction)\n", - "if (prediction[0]==0):\n", - " print(\"The person does not have heart disease\")\n", - "else:\n", - " print(\"the person has heart disease\")\n", - "\n" - ], - "metadata": { - "colab": { - "base_uri": "https://localhost:8080/" - }, - "id": "Ky2mzQUgL9IU", - "outputId": "7301a0b0-8bb7-4dff-bee4-a7f26dd4b00c" - }, - "execution_count": null, - "outputs": [ - { - "output_type": "stream", - "name": "stdout", - "text": [ - "[0]\n", - "The person does not have heart disease\n" - ] - }, - { - "output_type": "stream", - "name": "stderr", - "text": [ - "/usr/local/lib/python3.10/dist-packages/sklearn/base.py:439: UserWarning: X does not have valid feature names, but LogisticRegression was fitted with feature names\n", - " warnings.warn(\n" - ] - } - ] - }, - { - "cell_type": "code", - "source": [ - "input_data=(65\t,1,\t0\t,120\t,177\t,0,\t1\t,140,\t0,\t0.4,\t2,\t0,\t3)\n", - "# change the input data into numpy array\n", - "input_data_as_numpy_array=np.asarray(input_data)\n", - "#reshape the numpy array as we are predicting for only on instance\n", - "input_data_reshaped =input_data_as_numpy_array.reshape(1,-1)\n", - "prediction=model.predict(input_data_reshaped)\n", - "print(prediction)\n", - "if (prediction[0]==0):\n", - " print(\"The person does not have heart disease\")\n", - "else:\n", - " print(\"the person has heart disease\")" - ], - "metadata": { - "id": "WCbZkDR7PCyB", - "colab": { - "base_uri": "https://localhost:8080/" - }, - "outputId": "d7158150-6fe9-4271-833f-d16e78d435c6" - }, - "execution_count": null, - "outputs": [ - { - "output_type": "stream", - "name": "stdout", - "text": [ - "[1]\n", - "the person has heart disease\n" - ] - }, - { - "output_type": "stream", - "name": "stderr", - "text": [ - "/usr/local/lib/python3.10/dist-packages/sklearn/base.py:439: UserWarning: X does not have valid feature names, but LogisticRegression was fitted with feature names\n", - " warnings.warn(\n" - ] - } - ] - } - ] -} \ No newline at end of file +import numpy as np +import pandas as pd +from sklearn.model_selection import train_test_split +from sklearn.linear_model import LogisticRegression +from sklearn.metrics import accuracy_score + +Data Collection and Processing + + +#loading the csv data to a Pandas DataFrame +heart_data= pd.read_csv('/content/heart_disease_data.csv') + + +#print first 5 rows of the datase +heart_data.head() + + +age sex cp trestbps chol fbs restecg thalach exang oldpeak slope ca thal target +0 63 1 3 145 233 1 0 150 0 2.3 0 0 1 1 +1 37 1 2 130 250 0 1 187 0 3.5 0 0 2 1 +2 41 0 1 130 204 0 0 172 0 1.4 2 0 2 1 +3 56 1 1 120 236 0 1 178 0 0.8 2 0 2 1 +4 57 0 0 120 354 0 1 163 1 0.6 2 0 2 1 + +heart_data.tail() + + +age sex cp trestbps chol fbs restecg thalach exang oldpeak slope ca thal target +298 57 0 0 140 241 0 1 123 1 0.2 1 0 3 0 +299 45 1 3 110 264 0 1 132 0 1.2 1 0 3 0 +300 68 1 0 144 193 1 1 141 0 3.4 1 2 3 0 +301 57 1 0 130 131 0 1 115 1 1.2 1 1 3 0 +302 57 0 1 130 236 0 0 174 0 0.0 1 1 2 0 + +# number of rows and columns in the dataset +heart_data.shape + + + +(303, 14) + +# getting some info about the data +heart_data.info() + + +RangeIndex: 303 entries, 0 to 302 +Data columns (total 14 columns): + # Column Non-Null Count Dtype +--- ------ -------------- ----- + 0 age 303 non-null int64 + 1 sex 303 non-null int64 + 2 cp 303 non-null int64 + 3 trestbps 303 non-null int64 + 4 chol 303 non-null int64 + 5 fbs 303 non-null int64 + 6 restecg 303 non-null int64 + 7 thalach 303 non-null int64 + 8 exang 303 non-null int64 + 9 oldpeak 303 non-null float64 + 10 slope 303 non-null int64 + 11 ca 303 non-null int64 + 12 thal 303 non-null int64 + 13 target 303 non-null int64 +dtypes: float64(1), int64(13) +memory usage: 33.3 KB + +#checking for missing values +heart_data.isnull().sum() + +age 0 +sex 0 +cp 0 +trestbps 0 +chol 0 +fbs 0 +restecg 0 +thalach 0 +exang 0 +oldpeak 0 +slope 0 +ca 0 +thal 0 +target 0 +dtype: int64 + +# statistical measures about the data +heart_data.describe() + +age sex cp trestbps chol fbs restecg thalach exang oldpeak slope ca thal target +count 303.000000 303.000000 303.000000 303.000000 303.000000 303.000000 303.000000 303.000000 303.000000 303.000000 303.000000 303.000000 303.000000 303.000000 +mean 54.366337 0.683168 0.966997 131.623762 246.264026 0.148515 0.528053 149.646865 0.326733 1.039604 1.399340 0.729373 2.313531 0.544554 +std 9.082101 0.466011 1.032052 17.538143 51.830751 0.356198 0.525860 22.905161 0.469794 1.161075 0.616226 1.022606 0.612277 0.498835 +min 29.000000 0.000000 0.000000 94.000000 126.000000 0.000000 0.000000 71.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 +25% 47.500000 0.000000 0.000000 120.000000 211.000000 0.000000 0.000000 133.500000 0.000000 0.000000 1.000000 0.000000 2.000000 0.000000 +50% 55.000000 1.000000 1.000000 130.000000 240.000000 0.000000 1.000000 153.000000 0.000000 0.800000 1.000000 0.000000 2.000000 1.000000 +75% 61.000000 1.000000 2.000000 140.000000 274.500000 0.000000 1.000000 166.000000 1.000000 1.600000 2.000000 1.000000 3.000000 1.000000 +max 77.000000 1.000000 3.000000 200.000000 564.000000 1.000000 2.000000 202.000000 1.000000 6.200000 2.000000 4.000000 3.000000 1.000000 + +# checking the distribution of Target Variable +heart_data['target'].value_counts() + +1 165 +0 138 +Name: target, dtype: int64 +1-->Defective heart + +0-->Healthy heart + +Splitting the features and target + + +x=heart_data.drop(columns='target', axis=1) +y=heart_data['target'] + + +print(x) + + age sex cp trestbps chol fbs restecg thalach exang oldpeak \ +0 63 1 3 145 233 1 0 150 0 2.3 +1 37 1 2 130 250 0 1 187 0 3.5 +2 41 0 1 130 204 0 0 172 0 1.4 +3 56 1 1 120 236 0 1 178 0 0.8 +4 57 0 0 120 354 0 1 163 1 0.6 +.. ... ... .. ... ... ... ... ... ... ... +298 57 0 0 140 241 0 1 123 1 0.2 +299 45 1 3 110 264 0 1 132 0 1.2 +300 68 1 0 144 193 1 1 141 0 3.4 +301 57 1 0 130 131 0 1 115 1 1.2 +302 57 0 1 130 236 0 0 174 0 0.0 + + slope ca thal +0 0 0 1 +1 0 0 2 +2 2 0 2 +3 2 0 2 +4 2 0 2 +.. ... .. ... +298 1 0 3 +299 1 0 3 +300 1 2 3 +301 1 1 3 +302 1 1 2 + +[303 rows x 13 columns] + +print(y) + +0 1 +1 1 +2 1 +3 1 +4 1 + .. +298 0 +299 0 +300 0 +301 0 +302 0 +Name: target, Length: 303, dtype: int64 +Splitting data into Training data + + +x_train,x_test,y_train,y_test=train_test_split(x,y,test_size=0.3,stratify=y,random_state=3) + + +print(x.shape,x_train.shape,x_test.shape) + +(303, 13) (212, 13) (91, 13) +MODEL TRAINING + +LOGISTIC REGRESSION + + +model=LogisticRegression() + + +# Training the logistic regression model with training data +model.fit(x_train,y_train) + +/usr/local/lib/python3.10/dist-packages/sklearn/linear_model/_logistic.py:458: ConvergenceWarning: lbfgs failed to converge (status=1): +STOP: TOTAL NO. of ITERATIONS REACHED LIMIT. + +Increase the number of iterations (max_iter) or scale the data as shown in: + https://scikit-learn.org/stable/modules/preprocessing.html +Please also refer to the documentation for alternative solver options: + https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression + n_iter_i = _check_optimize_result( +LogisticRegression() +In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook. +On GitHub, the HTML representation is unable to render, please try loading this page with nbviewer.org. +Model Evaluation + +Accuracy Score + + +#accuracy on training data +x_train_prediction=model.predict(x_train) +training_data_accuracy=accuracy_score(x_train_prediction,y_train) + + +print('Accuracy on Training data:',training_data_accuracy) + +Accuracy on Training data: 0.8679245283018868 + +#accuracy on test data +x_test_prediction=model.predict(x_test) +testing_data_accuracy=accuracy_score(x_test_prediction,y_test) + + +print('Accuracy on Testing data:',testing_data_accuracy) + +Accuracy on Testing data: 0.8021978021978022 +BUILDING PREDICTING SYSTEM + + +input_data=(44,0,0,130,60,0,0,131,1,2.2,1,3,3) +# change the input data into numpy array +input_data_as_numpy_array=np.asarray(input_data) +#reshape the numpy array as we are predicting for only on instance +input_data_reshaped =input_data_as_numpy_array.reshape(1,-1) +prediction=model.predict(input_data_reshaped) +print(prediction) +if (prediction[0]==0): + print("The person does not have heart disease") +else: + print("the person has heart disease") + + + +[0] +#Output +The person does not have heart disease +/usr/local/lib/python3.10/dist-packages/sklearn/base.py:439: UserWarning: X does not have valid feature names, but LogisticRegression was fitted with feature names + warnings.warn( + +input_data=(65 ,1, 0 ,120 ,177 ,0, 1 ,140, 0, 0.4, 2, 0, 3) +# change the input data into numpy array +input_data_as_numpy_array=np.asarray(input_data) +#reshape the numpy array as we are predicting for only on instance +input_data_reshaped =input_data_as_numpy_array.reshape(1,-1) +prediction=model.predict(input_data_reshaped) +print(prediction) +if (prediction[0]==0): + print("The person does not have heart disease") +else: + print("the person has heart disease") + +[1] +#Output +the person has heart disease +/usr/local/lib/python3.10/dist-packages/sklearn/base.py:439: UserWarning: X does not have valid feature names, but LogisticRegression was fitted with feature names + From f96f77d70b42895c83f2ea6b3c56d3e0fceab0ce Mon Sep 17 00:00:00 2001 From: Sudiksha Thatipelli <163149118+Sudiksha18@users.noreply.github.com> Date: Sun, 12 May 2024 17:57:36 +0530 Subject: [PATCH 04/12] Created using Colab --- Heart_Disease_Prediction.ipynb | 2090 ++++++++++++++++++++++++++++---- 1 file changed, 1838 insertions(+), 252 deletions(-) diff --git a/Heart_Disease_Prediction.ipynb b/Heart_Disease_Prediction.ipynb index a6e99747..e84ac289 100644 --- a/Heart_Disease_Prediction.ipynb +++ b/Heart_Disease_Prediction.ipynb @@ -1,252 +1,1838 @@ -import numpy as np -import pandas as pd -from sklearn.model_selection import train_test_split -from sklearn.linear_model import LogisticRegression -from sklearn.metrics import accuracy_score - -Data Collection and Processing - - -#loading the csv data to a Pandas DataFrame -heart_data= pd.read_csv('/content/heart_disease_data.csv') - - -#print first 5 rows of the datase -heart_data.head() - - -age sex cp trestbps chol fbs restecg thalach exang oldpeak slope ca thal target -0 63 1 3 145 233 1 0 150 0 2.3 0 0 1 1 -1 37 1 2 130 250 0 1 187 0 3.5 0 0 2 1 -2 41 0 1 130 204 0 0 172 0 1.4 2 0 2 1 -3 56 1 1 120 236 0 1 178 0 0.8 2 0 2 1 -4 57 0 0 120 354 0 1 163 1 0.6 2 0 2 1 - -heart_data.tail() - - -age sex cp trestbps chol fbs restecg thalach exang oldpeak slope ca thal target -298 57 0 0 140 241 0 1 123 1 0.2 1 0 3 0 -299 45 1 3 110 264 0 1 132 0 1.2 1 0 3 0 -300 68 1 0 144 193 1 1 141 0 3.4 1 2 3 0 -301 57 1 0 130 131 0 1 115 1 1.2 1 1 3 0 -302 57 0 1 130 236 0 0 174 0 0.0 1 1 2 0 - -# number of rows and columns in the dataset -heart_data.shape - - - -(303, 14) - -# getting some info about the data -heart_data.info() - - -RangeIndex: 303 entries, 0 to 302 -Data columns (total 14 columns): - # Column Non-Null Count Dtype ---- ------ -------------- ----- - 0 age 303 non-null int64 - 1 sex 303 non-null int64 - 2 cp 303 non-null int64 - 3 trestbps 303 non-null int64 - 4 chol 303 non-null int64 - 5 fbs 303 non-null int64 - 6 restecg 303 non-null int64 - 7 thalach 303 non-null int64 - 8 exang 303 non-null int64 - 9 oldpeak 303 non-null float64 - 10 slope 303 non-null int64 - 11 ca 303 non-null int64 - 12 thal 303 non-null int64 - 13 target 303 non-null int64 -dtypes: float64(1), int64(13) -memory usage: 33.3 KB - -#checking for missing values -heart_data.isnull().sum() - -age 0 -sex 0 -cp 0 -trestbps 0 -chol 0 -fbs 0 -restecg 0 -thalach 0 -exang 0 -oldpeak 0 -slope 0 -ca 0 -thal 0 -target 0 -dtype: int64 - -# statistical measures about the data -heart_data.describe() - -age sex cp trestbps chol fbs restecg thalach exang oldpeak slope ca thal target -count 303.000000 303.000000 303.000000 303.000000 303.000000 303.000000 303.000000 303.000000 303.000000 303.000000 303.000000 303.000000 303.000000 303.000000 -mean 54.366337 0.683168 0.966997 131.623762 246.264026 0.148515 0.528053 149.646865 0.326733 1.039604 1.399340 0.729373 2.313531 0.544554 -std 9.082101 0.466011 1.032052 17.538143 51.830751 0.356198 0.525860 22.905161 0.469794 1.161075 0.616226 1.022606 0.612277 0.498835 -min 29.000000 0.000000 0.000000 94.000000 126.000000 0.000000 0.000000 71.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 -25% 47.500000 0.000000 0.000000 120.000000 211.000000 0.000000 0.000000 133.500000 0.000000 0.000000 1.000000 0.000000 2.000000 0.000000 -50% 55.000000 1.000000 1.000000 130.000000 240.000000 0.000000 1.000000 153.000000 0.000000 0.800000 1.000000 0.000000 2.000000 1.000000 -75% 61.000000 1.000000 2.000000 140.000000 274.500000 0.000000 1.000000 166.000000 1.000000 1.600000 2.000000 1.000000 3.000000 1.000000 -max 77.000000 1.000000 3.000000 200.000000 564.000000 1.000000 2.000000 202.000000 1.000000 6.200000 2.000000 4.000000 3.000000 1.000000 - -# checking the distribution of Target Variable -heart_data['target'].value_counts() - -1 165 -0 138 -Name: target, dtype: int64 -1-->Defective heart - -0-->Healthy heart - -Splitting the features and target - - -x=heart_data.drop(columns='target', axis=1) -y=heart_data['target'] - - -print(x) - - age sex cp trestbps chol fbs restecg thalach exang oldpeak \ -0 63 1 3 145 233 1 0 150 0 2.3 -1 37 1 2 130 250 0 1 187 0 3.5 -2 41 0 1 130 204 0 0 172 0 1.4 -3 56 1 1 120 236 0 1 178 0 0.8 -4 57 0 0 120 354 0 1 163 1 0.6 -.. ... ... .. ... ... ... ... ... ... ... -298 57 0 0 140 241 0 1 123 1 0.2 -299 45 1 3 110 264 0 1 132 0 1.2 -300 68 1 0 144 193 1 1 141 0 3.4 -301 57 1 0 130 131 0 1 115 1 1.2 -302 57 0 1 130 236 0 0 174 0 0.0 - - slope ca thal -0 0 0 1 -1 0 0 2 -2 2 0 2 -3 2 0 2 -4 2 0 2 -.. ... .. ... -298 1 0 3 -299 1 0 3 -300 1 2 3 -301 1 1 3 -302 1 1 2 - -[303 rows x 13 columns] - -print(y) - -0 1 -1 1 -2 1 -3 1 -4 1 - .. -298 0 -299 0 -300 0 -301 0 -302 0 -Name: target, Length: 303, dtype: int64 -Splitting data into Training data - - -x_train,x_test,y_train,y_test=train_test_split(x,y,test_size=0.3,stratify=y,random_state=3) - - -print(x.shape,x_train.shape,x_test.shape) - -(303, 13) (212, 13) (91, 13) -MODEL TRAINING - -LOGISTIC REGRESSION - - -model=LogisticRegression() - - -# Training the logistic regression model with training data -model.fit(x_train,y_train) - -/usr/local/lib/python3.10/dist-packages/sklearn/linear_model/_logistic.py:458: ConvergenceWarning: lbfgs failed to converge (status=1): -STOP: TOTAL NO. of ITERATIONS REACHED LIMIT. - -Increase the number of iterations (max_iter) or scale the data as shown in: - https://scikit-learn.org/stable/modules/preprocessing.html -Please also refer to the documentation for alternative solver options: - https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression - n_iter_i = _check_optimize_result( -LogisticRegression() -In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook. -On GitHub, the HTML representation is unable to render, please try loading this page with nbviewer.org. -Model Evaluation - -Accuracy Score - - -#accuracy on training data -x_train_prediction=model.predict(x_train) -training_data_accuracy=accuracy_score(x_train_prediction,y_train) - - -print('Accuracy on Training data:',training_data_accuracy) - -Accuracy on Training data: 0.8679245283018868 - -#accuracy on test data -x_test_prediction=model.predict(x_test) -testing_data_accuracy=accuracy_score(x_test_prediction,y_test) - - -print('Accuracy on Testing data:',testing_data_accuracy) - -Accuracy on Testing data: 0.8021978021978022 -BUILDING PREDICTING SYSTEM - - -input_data=(44,0,0,130,60,0,0,131,1,2.2,1,3,3) -# change the input data into numpy array -input_data_as_numpy_array=np.asarray(input_data) -#reshape the numpy array as we are predicting for only on instance -input_data_reshaped =input_data_as_numpy_array.reshape(1,-1) -prediction=model.predict(input_data_reshaped) -print(prediction) -if (prediction[0]==0): - print("The person does not have heart disease") -else: - print("the person has heart disease") - - - -[0] -#Output -The person does not have heart disease -/usr/local/lib/python3.10/dist-packages/sklearn/base.py:439: UserWarning: X does not have valid feature names, but LogisticRegression was fitted with feature names - warnings.warn( - -input_data=(65 ,1, 0 ,120 ,177 ,0, 1 ,140, 0, 0.4, 2, 0, 3) -# change the input data into numpy array -input_data_as_numpy_array=np.asarray(input_data) -#reshape the numpy array as we are predicting for only on instance -input_data_reshaped =input_data_as_numpy_array.reshape(1,-1) -prediction=model.predict(input_data_reshaped) -print(prediction) -if (prediction[0]==0): - print("The person does not have heart disease") -else: - print("the person has heart disease") - -[1] -#Output -the person has heart disease -/usr/local/lib/python3.10/dist-packages/sklearn/base.py:439: UserWarning: X does not have valid feature names, but LogisticRegression was fitted with feature names - +{ + "nbformat": 4, + "nbformat_minor": 0, + "metadata": { + "colab": { + "provenance": [], + "authorship_tag": "ABX9TyNoiZVo9f/BO9xy8X6rbGMT", + "include_colab_link": true + }, + "kernelspec": { + "name": "python3", + "display_name": "Python 3" + }, + "language_info": { + "name": "python" + } + }, + "cells": [ + { + "cell_type": "markdown", + "metadata": { + "id": "view-in-github", + "colab_type": "text" + }, + "source": [ + "\"Open" + ] + }, + { + "cell_type": "markdown", + "source": [ + "Importing the Dependencies\n" + ], + "metadata": { + "id": "Cj2SOXgaZt-Q" + } + }, + { + "cell_type": "code", + "source": [ + "# @title\n", + "import numpy as np\n", + "import pandas as pd\n", + "from sklearn.model_selection import train_test_split\n", + "from sklearn.linear_model import LogisticRegression\n", + "from sklearn.metrics import accuracy_score" + ], + "metadata": { + "id": "k850UGz1Z03B", + "cellView": "form" + }, + "execution_count": 1, + "outputs": [] + }, + { + "cell_type": "markdown", + "source": [ + "Data Collection and Processing\n" + ], + "metadata": { + "id": "APYsimt8bDoD" + } + }, + { + "cell_type": "code", + "source": [ + "#loading the csv data to a Pandas DataFrame\n", + "heart_data= pd.read_csv('/content/heart_disease_data.csv')" + ], + "metadata": { + "id": "RJg3aA91Z0-u" + }, + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "code", + "source": [ + "#print first 5 rows of the datase\n", + "heart_data.head()\n" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 206 + }, + "id": "BnoQ8u4hdZ8Z", + "outputId": "30199964-f01f-4d2c-9a9d-93c7c7ae6d72" + }, + "execution_count": null, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + " age sex cp trestbps chol fbs restecg thalach exang oldpeak slope \\\n", + "0 63 1 3 145 233 1 0 150 0 2.3 0 \n", + "1 37 1 2 130 250 0 1 187 0 3.5 0 \n", + "2 41 0 1 130 204 0 0 172 0 1.4 2 \n", + "3 56 1 1 120 236 0 1 178 0 0.8 2 \n", + "4 57 0 0 120 354 0 1 163 1 0.6 2 \n", + "\n", + " ca thal target \n", + "0 0 1 1 \n", + "1 0 2 1 \n", + "2 0 2 1 \n", + "3 0 2 1 \n", + "4 0 2 1 " + ], + "text/html": [ + "\n", + "
\n", + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
agesexcptrestbpscholfbsrestecgthalachexangoldpeakslopecathaltarget
063131452331015002.30011
137121302500118703.50021
241011302040017201.42021
356111202360117800.82021
457001203540116310.62021
\n", + "
\n", + "
\n", + "\n", + "
\n", + " \n", + "\n", + " \n", + "\n", + " \n", + "
\n", + "\n", + "\n", + "
\n", + " \n", + "\n", + "\n", + "\n", + " \n", + "
\n", + "\n", + "
\n", + "
\n" + ], + "application/vnd.google.colaboratory.intrinsic+json": { + "type": "dataframe", + "variable_name": "heart_data", + "summary": "{\n \"name\": \"heart_data\",\n \"rows\": 303,\n \"fields\": [\n {\n \"column\": \"age\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 9,\n \"min\": 29,\n \"max\": 77,\n \"num_unique_values\": 41,\n \"samples\": [\n 46,\n 66,\n 48\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"sex\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 0,\n \"min\": 0,\n \"max\": 1,\n \"num_unique_values\": 2,\n \"samples\": [\n 0,\n 1\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"cp\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 1,\n \"min\": 0,\n \"max\": 3,\n \"num_unique_values\": 4,\n \"samples\": [\n 2,\n 0\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"trestbps\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 17,\n \"min\": 94,\n \"max\": 200,\n \"num_unique_values\": 49,\n \"samples\": [\n 104,\n 123\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"chol\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 51,\n \"min\": 126,\n \"max\": 564,\n \"num_unique_values\": 152,\n \"samples\": [\n 277,\n 169\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"fbs\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 0,\n \"min\": 0,\n \"max\": 1,\n \"num_unique_values\": 2,\n \"samples\": [\n 0,\n 1\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"restecg\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 0,\n \"min\": 0,\n \"max\": 2,\n \"num_unique_values\": 3,\n \"samples\": [\n 0,\n 1\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"thalach\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 22,\n \"min\": 71,\n \"max\": 202,\n \"num_unique_values\": 91,\n \"samples\": [\n 159,\n 152\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"exang\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 0,\n \"min\": 0,\n \"max\": 1,\n \"num_unique_values\": 2,\n \"samples\": [\n 1,\n 0\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"oldpeak\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 1.1610750220686348,\n \"min\": 0.0,\n \"max\": 6.2,\n \"num_unique_values\": 40,\n \"samples\": [\n 1.9,\n 3.0\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"slope\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 0,\n \"min\": 0,\n \"max\": 2,\n \"num_unique_values\": 3,\n \"samples\": [\n 0,\n 2\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"ca\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 1,\n \"min\": 0,\n \"max\": 4,\n \"num_unique_values\": 5,\n \"samples\": [\n 2,\n 4\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"thal\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 0,\n \"min\": 0,\n \"max\": 3,\n \"num_unique_values\": 4,\n \"samples\": [\n 2,\n 0\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"target\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 0,\n \"min\": 0,\n \"max\": 1,\n \"num_unique_values\": 2,\n \"samples\": [\n 0,\n 1\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n }\n ]\n}" + } + }, + "metadata": {}, + "execution_count": 4 + } + ] + }, + { + "cell_type": "code", + "source": [ + "heart_data.tail()\n" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 206 + }, + "id": "NQwDjwwGeBF4", + "outputId": "0ad96c3c-0609-4365-b667-16204c4087e4" + }, + "execution_count": null, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + " age sex cp trestbps chol fbs restecg thalach exang oldpeak \\\n", + "298 57 0 0 140 241 0 1 123 1 0.2 \n", + "299 45 1 3 110 264 0 1 132 0 1.2 \n", + "300 68 1 0 144 193 1 1 141 0 3.4 \n", + "301 57 1 0 130 131 0 1 115 1 1.2 \n", + "302 57 0 1 130 236 0 0 174 0 0.0 \n", + "\n", + " slope ca thal target \n", + "298 1 0 3 0 \n", + "299 1 0 3 0 \n", + "300 1 2 3 0 \n", + "301 1 1 3 0 \n", + "302 1 1 2 0 " + ], + "text/html": [ + "\n", + "
\n", + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
agesexcptrestbpscholfbsrestecgthalachexangoldpeakslopecathaltarget
29857001402410112310.21030
29945131102640113201.21030
30068101441931114103.41230
30157101301310111511.21130
30257011302360017400.01120
\n", + "
\n", + "
\n", + "\n", + "
\n", + " \n", + "\n", + " \n", + "\n", + " \n", + "
\n", + "\n", + "\n", + "
\n", + " \n", + "\n", + "\n", + "\n", + " \n", + "
\n", + "\n", + "
\n", + "
\n" + ], + "application/vnd.google.colaboratory.intrinsic+json": { + "type": "dataframe", + "summary": "{\n \"name\": \"heart_data\",\n \"rows\": 5,\n \"fields\": [\n {\n \"column\": \"age\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 8,\n \"min\": 45,\n \"max\": 68,\n \"num_unique_values\": 3,\n \"samples\": [\n 57,\n 45,\n 68\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"sex\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 0,\n \"min\": 0,\n \"max\": 1,\n \"num_unique_values\": 2,\n \"samples\": [\n 1,\n 0\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"cp\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 1,\n \"min\": 0,\n \"max\": 3,\n \"num_unique_values\": 3,\n \"samples\": [\n 0,\n 3\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"trestbps\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 13,\n \"min\": 110,\n \"max\": 144,\n \"num_unique_values\": 4,\n \"samples\": [\n 110,\n 130\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"chol\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 52,\n \"min\": 131,\n \"max\": 264,\n \"num_unique_values\": 5,\n \"samples\": [\n 264,\n 236\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"fbs\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 0,\n \"min\": 0,\n \"max\": 1,\n \"num_unique_values\": 2,\n \"samples\": [\n 1,\n 0\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"restecg\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 0,\n \"min\": 0,\n \"max\": 1,\n \"num_unique_values\": 2,\n \"samples\": [\n 0,\n 1\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"thalach\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 22,\n \"min\": 115,\n \"max\": 174,\n \"num_unique_values\": 5,\n \"samples\": [\n 132,\n 174\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"exang\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 0,\n \"min\": 0,\n \"max\": 1,\n \"num_unique_values\": 2,\n \"samples\": [\n 0,\n 1\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"oldpeak\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 1.3490737563232043,\n \"min\": 0.0,\n \"max\": 3.4,\n \"num_unique_values\": 4,\n \"samples\": [\n 1.2,\n 0.0\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"slope\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 0,\n \"min\": 1,\n \"max\": 1,\n \"num_unique_values\": 1,\n \"samples\": [\n 1\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"ca\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 0,\n \"min\": 0,\n \"max\": 2,\n \"num_unique_values\": 3,\n \"samples\": [\n 0\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"thal\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 0,\n \"min\": 2,\n \"max\": 3,\n \"num_unique_values\": 2,\n \"samples\": [\n 2\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"target\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 0,\n \"min\": 0,\n \"max\": 0,\n \"num_unique_values\": 1,\n \"samples\": [\n 0\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n }\n ]\n}" + } + }, + "metadata": {}, + "execution_count": 5 + } + ] + }, + { + "cell_type": "code", + "source": [ + "# number of rows and columns in the dataset\n", + "heart_data.shape\n", + "\n" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "Ye8LlTQVeHs1", + "outputId": "2ab0e7a8-c73f-4ad4-8e0f-fa3a4c18534e" + }, + "execution_count": null, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "(303, 14)" + ] + }, + "metadata": {}, + "execution_count": 6 + } + ] + }, + { + "cell_type": "code", + "source": [ + "# getting some info about the data\n", + "heart_data.info()" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "5YZLMOwFeXF3", + "outputId": "3267941d-8cae-4d36-c703-938e84bdb0f5" + }, + "execution_count": null, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "\n", + "RangeIndex: 303 entries, 0 to 302\n", + "Data columns (total 14 columns):\n", + " # Column Non-Null Count Dtype \n", + "--- ------ -------------- ----- \n", + " 0 age 303 non-null int64 \n", + " 1 sex 303 non-null int64 \n", + " 2 cp 303 non-null int64 \n", + " 3 trestbps 303 non-null int64 \n", + " 4 chol 303 non-null int64 \n", + " 5 fbs 303 non-null int64 \n", + " 6 restecg 303 non-null int64 \n", + " 7 thalach 303 non-null int64 \n", + " 8 exang 303 non-null int64 \n", + " 9 oldpeak 303 non-null float64\n", + " 10 slope 303 non-null int64 \n", + " 11 ca 303 non-null int64 \n", + " 12 thal 303 non-null int64 \n", + " 13 target 303 non-null int64 \n", + "dtypes: float64(1), int64(13)\n", + "memory usage: 33.3 KB\n" + ] + } + ] + }, + { + "cell_type": "code", + "source": [ + "#checking for missing values\n", + "heart_data.isnull().sum()" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "QHazm2rze6Oj", + "outputId": "26e2d065-8f01-4306-c293-96e986b8a836" + }, + "execution_count": null, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "age 0\n", + "sex 0\n", + "cp 0\n", + "trestbps 0\n", + "chol 0\n", + "fbs 0\n", + "restecg 0\n", + "thalach 0\n", + "exang 0\n", + "oldpeak 0\n", + "slope 0\n", + "ca 0\n", + "thal 0\n", + "target 0\n", + "dtype: int64" + ] + }, + "metadata": {}, + "execution_count": 8 + } + ] + }, + { + "cell_type": "code", + "source": [ + "# statistical measures about the data\n", + "heart_data.describe()" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 320 + }, + "id": "nt15bvuYfBcA", + "outputId": "0d4fa9d8-d94f-4706-f640-416c9d3678fd" + }, + "execution_count": null, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + " age sex cp trestbps chol fbs \\\n", + "count 303.000000 303.000000 303.000000 303.000000 303.000000 303.000000 \n", + "mean 54.366337 0.683168 0.966997 131.623762 246.264026 0.148515 \n", + "std 9.082101 0.466011 1.032052 17.538143 51.830751 0.356198 \n", + "min 29.000000 0.000000 0.000000 94.000000 126.000000 0.000000 \n", + "25% 47.500000 0.000000 0.000000 120.000000 211.000000 0.000000 \n", + "50% 55.000000 1.000000 1.000000 130.000000 240.000000 0.000000 \n", + "75% 61.000000 1.000000 2.000000 140.000000 274.500000 0.000000 \n", + "max 77.000000 1.000000 3.000000 200.000000 564.000000 1.000000 \n", + "\n", + " restecg thalach exang oldpeak slope ca \\\n", + "count 303.000000 303.000000 303.000000 303.000000 303.000000 303.000000 \n", + "mean 0.528053 149.646865 0.326733 1.039604 1.399340 0.729373 \n", + "std 0.525860 22.905161 0.469794 1.161075 0.616226 1.022606 \n", + "min 0.000000 71.000000 0.000000 0.000000 0.000000 0.000000 \n", + "25% 0.000000 133.500000 0.000000 0.000000 1.000000 0.000000 \n", + "50% 1.000000 153.000000 0.000000 0.800000 1.000000 0.000000 \n", + "75% 1.000000 166.000000 1.000000 1.600000 2.000000 1.000000 \n", + "max 2.000000 202.000000 1.000000 6.200000 2.000000 4.000000 \n", + "\n", + " thal target \n", + "count 303.000000 303.000000 \n", + "mean 2.313531 0.544554 \n", + "std 0.612277 0.498835 \n", + "min 0.000000 0.000000 \n", + "25% 2.000000 0.000000 \n", + "50% 2.000000 1.000000 \n", + "75% 3.000000 1.000000 \n", + "max 3.000000 1.000000 " + ], + "text/html": [ + "\n", + "
\n", + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
agesexcptrestbpscholfbsrestecgthalachexangoldpeakslopecathaltarget
count303.000000303.000000303.000000303.000000303.000000303.000000303.000000303.000000303.000000303.000000303.000000303.000000303.000000303.000000
mean54.3663370.6831680.966997131.623762246.2640260.1485150.528053149.6468650.3267331.0396041.3993400.7293732.3135310.544554
std9.0821010.4660111.03205217.53814351.8307510.3561980.52586022.9051610.4697941.1610750.6162261.0226060.6122770.498835
min29.0000000.0000000.00000094.000000126.0000000.0000000.00000071.0000000.0000000.0000000.0000000.0000000.0000000.000000
25%47.5000000.0000000.000000120.000000211.0000000.0000000.000000133.5000000.0000000.0000001.0000000.0000002.0000000.000000
50%55.0000001.0000001.000000130.000000240.0000000.0000001.000000153.0000000.0000000.8000001.0000000.0000002.0000001.000000
75%61.0000001.0000002.000000140.000000274.5000000.0000001.000000166.0000001.0000001.6000002.0000001.0000003.0000001.000000
max77.0000001.0000003.000000200.000000564.0000001.0000002.000000202.0000001.0000006.2000002.0000004.0000003.0000001.000000
\n", + "
\n", + "
\n", + "\n", + "
\n", + " \n", + "\n", + " \n", + "\n", + " \n", + "
\n", + "\n", + "\n", + "
\n", + " \n", + "\n", + "\n", + "\n", + " \n", + "
\n", + "\n", + "
\n", + "
\n" + ], + "application/vnd.google.colaboratory.intrinsic+json": { + "type": "dataframe", + "summary": "{\n \"name\": \"heart_data\",\n \"rows\": 8,\n \"fields\": [\n {\n \"column\": \"age\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 92.63263171018461,\n \"min\": 9.082100989837857,\n \"max\": 303.0,\n \"num_unique_values\": 8,\n \"samples\": [\n 54.366336633663366,\n 55.0,\n 303.0\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"sex\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 106.91793021099774,\n \"min\": 0.0,\n \"max\": 303.0,\n \"num_unique_values\": 5,\n \"samples\": [\n 0.6831683168316832,\n 1.0,\n 0.46601082333962385\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"cp\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 106.72725528212327,\n \"min\": 0.0,\n \"max\": 303.0,\n \"num_unique_values\": 7,\n \"samples\": [\n 303.0,\n 0.966996699669967,\n 2.0\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"trestbps\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 82.65195263865039,\n \"min\": 17.5381428135171,\n \"max\": 303.0,\n \"num_unique_values\": 8,\n \"samples\": [\n 131.62376237623764,\n 130.0,\n 303.0\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"chol\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 150.35806568851743,\n \"min\": 51.83075098793003,\n \"max\": 564.0,\n \"num_unique_values\": 8,\n \"samples\": [\n 246.26402640264027,\n 240.0,\n 303.0\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"fbs\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 107.0512286741478,\n \"min\": 0.0,\n \"max\": 303.0,\n \"num_unique_values\": 5,\n \"samples\": [\n 0.1485148514851485,\n 1.0,\n 0.35619787492797644\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"restecg\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 106.8733588009897,\n \"min\": 0.0,\n \"max\": 303.0,\n \"num_unique_values\": 6,\n \"samples\": [\n 303.0,\n 0.528052805280528,\n 2.0\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"thalach\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 83.70384393886218,\n \"min\": 22.905161114914094,\n \"max\": 303.0,\n \"num_unique_values\": 8,\n \"samples\": [\n 149.64686468646866,\n 153.0,\n 303.0\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"exang\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 106.9862394088184,\n \"min\": 0.0,\n \"max\": 303.0,\n \"num_unique_values\": 5,\n \"samples\": [\n 0.32673267326732675,\n 1.0,\n 0.4697944645223165\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"oldpeak\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 106.59952466080658,\n \"min\": 0.0,\n \"max\": 303.0,\n \"num_unique_values\": 7,\n \"samples\": [\n 303.0,\n 1.0396039603960396,\n 1.6\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"slope\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 106.72394469173834,\n \"min\": 0.0,\n \"max\": 303.0,\n \"num_unique_values\": 6,\n \"samples\": [\n 303.0,\n 1.3993399339933994,\n 2.0\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"ca\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 106.79372080487734,\n \"min\": 0.0,\n \"max\": 303.0,\n \"num_unique_values\": 6,\n \"samples\": [\n 303.0,\n 0.7293729372937293,\n 4.0\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"thal\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 106.47909774814387,\n \"min\": 0.0,\n \"max\": 303.0,\n \"num_unique_values\": 6,\n \"samples\": [\n 303.0,\n 2.3135313531353137,\n 3.0\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"target\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 106.92326354929804,\n \"min\": 0.0,\n \"max\": 303.0,\n \"num_unique_values\": 5,\n \"samples\": [\n 0.5445544554455446,\n 1.0,\n 0.4988347841643913\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n }\n ]\n}" + } + }, + "metadata": {}, + "execution_count": 9 + } + ] + }, + { + "cell_type": "code", + "source": [ + "# checking the distribution of Target Variable\n", + "heart_data['target'].value_counts()" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "NCbxYqqNf2-4", + "outputId": "9ec352f8-5bd5-4d8e-ae54-68962baa0851" + }, + "execution_count": null, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "1 165\n", + "0 138\n", + "Name: target, dtype: int64" + ] + }, + "metadata": {}, + "execution_count": 10 + } + ] + }, + { + "cell_type": "markdown", + "source": [ + "1-->Defective heart\n", + "\n", + "0-->Healthy heart\n", + "\n", + "\n" + ], + "metadata": { + "id": "qWNMUL5_CrfC" + } + }, + { + "cell_type": "markdown", + "source": [ + "\n", + "Splitting the features and target" + ], + "metadata": { + "id": "McSs_6cTC8Ub" + } + }, + { + "cell_type": "code", + "source": [ + "x=heart_data.drop(columns='target', axis=1)\n", + "y=heart_data['target']" + ], + "metadata": { + "id": "oSgKSF5-DGVk" + }, + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "code", + "source": [ + "print(x)" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "zhiIhyMxDhWF", + "outputId": "caf4204e-9079-4a26-a933-b9c9f93f6dd9" + }, + "execution_count": null, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + " age sex cp trestbps chol fbs restecg thalach exang oldpeak \\\n", + "0 63 1 3 145 233 1 0 150 0 2.3 \n", + "1 37 1 2 130 250 0 1 187 0 3.5 \n", + "2 41 0 1 130 204 0 0 172 0 1.4 \n", + "3 56 1 1 120 236 0 1 178 0 0.8 \n", + "4 57 0 0 120 354 0 1 163 1 0.6 \n", + ".. ... ... .. ... ... ... ... ... ... ... \n", + "298 57 0 0 140 241 0 1 123 1 0.2 \n", + "299 45 1 3 110 264 0 1 132 0 1.2 \n", + "300 68 1 0 144 193 1 1 141 0 3.4 \n", + "301 57 1 0 130 131 0 1 115 1 1.2 \n", + "302 57 0 1 130 236 0 0 174 0 0.0 \n", + "\n", + " slope ca thal \n", + "0 0 0 1 \n", + "1 0 0 2 \n", + "2 2 0 2 \n", + "3 2 0 2 \n", + "4 2 0 2 \n", + ".. ... .. ... \n", + "298 1 0 3 \n", + "299 1 0 3 \n", + "300 1 2 3 \n", + "301 1 1 3 \n", + "302 1 1 2 \n", + "\n", + "[303 rows x 13 columns]\n" + ] + } + ] + }, + { + "cell_type": "code", + "source": [ + "print(y)" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "VpOvdXWgHWmI", + "outputId": "113ab45d-b6a3-45a1-912f-54a86cf21df2" + }, + "execution_count": null, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "0 1\n", + "1 1\n", + "2 1\n", + "3 1\n", + "4 1\n", + " ..\n", + "298 0\n", + "299 0\n", + "300 0\n", + "301 0\n", + "302 0\n", + "Name: target, Length: 303, dtype: int64\n" + ] + } + ] + }, + { + "cell_type": "markdown", + "source": [ + "Splitting data into Training data" + ], + "metadata": { + "id": "5A20XMHYII3T" + } + }, + { + "cell_type": "code", + "source": [ + "x_train,x_test,y_train,y_test=train_test_split(x,y,test_size=0.3,stratify=y,random_state=3)" + ], + "metadata": { + "id": "SNK4hm8DIPSm" + }, + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "code", + "source": [ + "print(x.shape,x_train.shape,x_test.shape)" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "7OTKtdA-JLCV", + "outputId": "cb0304a8-2605-4f7e-8510-7b23a0fbd4dc" + }, + "execution_count": null, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "(303, 13) (212, 13) (91, 13)\n" + ] + } + ] + }, + { + "cell_type": "markdown", + "source": [ + "MODEL TRAINING" + ], + "metadata": { + "id": "ne2RibQaJdNe" + } + }, + { + "cell_type": "markdown", + "source": [ + "LOGISTIC REGRESSION" + ], + "metadata": { + "id": "AUEblGtLJlzD" + } + }, + { + "cell_type": "code", + "source": [ + "model=LogisticRegression()" + ], + "metadata": { + "id": "k-IIz1pzJtRd" + }, + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "code", + "source": [ + "# Training the logistic regression model with training data\n", + "model.fit(x_train,y_train)" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 233 + }, + "id": "kqFKrLzlJ0N0", + "outputId": "3bb02431-b194-4619-a85f-8fbe87e779e6" + }, + "execution_count": null, + "outputs": [ + { + "output_type": "stream", + "name": "stderr", + "text": [ + "/usr/local/lib/python3.10/dist-packages/sklearn/linear_model/_logistic.py:458: ConvergenceWarning: lbfgs failed to converge (status=1):\n", + "STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.\n", + "\n", + "Increase the number of iterations (max_iter) or scale the data as shown in:\n", + " https://scikit-learn.org/stable/modules/preprocessing.html\n", + "Please also refer to the documentation for alternative solver options:\n", + " https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression\n", + " n_iter_i = _check_optimize_result(\n" + ] + }, + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "LogisticRegression()" + ], + "text/html": [ + "
LogisticRegression()
In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook.
On GitHub, the HTML representation is unable to render, please try loading this page with nbviewer.org.
" + ] + }, + "metadata": {}, + "execution_count": 17 + } + ] + }, + { + "cell_type": "markdown", + "source": [ + "Model Evaluation\n", + "\n", + "Accuracy Score" + ], + "metadata": { + "id": "aPahD6MLKaPU" + } + }, + { + "cell_type": "code", + "source": [ + "#accuracy on training data\n", + "x_train_prediction=model.predict(x_train)\n", + "training_data_accuracy=accuracy_score(x_train_prediction,y_train)" + ], + "metadata": { + "id": "NHy61zdJKDR1" + }, + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "code", + "source": [ + "print('Accuracy on Training data:',training_data_accuracy)" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "J4XiNRwXLCXf", + "outputId": "dd55ea56-948e-4bc3-b98d-273431014230" + }, + "execution_count": null, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Accuracy on Training data: 0.8679245283018868\n" + ] + } + ] + }, + { + "cell_type": "code", + "source": [ + "#accuracy on test data\n", + "x_test_prediction=model.predict(x_test)\n", + "testing_data_accuracy=accuracy_score(x_test_prediction,y_test)" + ], + "metadata": { + "id": "ehbFgWjhLK44" + }, + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "code", + "source": [ + "print('Accuracy on Testing data:',testing_data_accuracy)" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "jYZIcbiVLs0G", + "outputId": "e7a36667-528e-42d1-e338-eeadc11c4947" + }, + "execution_count": null, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Accuracy on Testing data: 0.8021978021978022\n" + ] + } + ] + }, + { + "cell_type": "markdown", + "source": [ + "BUILDING PREDICTING SYSTEM" + ], + "metadata": { + "id": "rec6Gz8vMP_G" + } + }, + { + "cell_type": "code", + "source": [ + "input_data=(44,0,0,130,60,0,0,131,1,2.2,1,3,3)\n", + "# change the input data into numpy array\n", + "input_data_as_numpy_array=np.asarray(input_data)\n", + "#reshape the numpy array as we are predicting for only on instance\n", + "input_data_reshaped =input_data_as_numpy_array.reshape(1,-1)\n", + "prediction=model.predict(input_data_reshaped)\n", + "print(prediction)\n", + "if (prediction[0]==0):\n", + " print(\"The person does not have heart disease\")\n", + "else:\n", + " print(\"the person has heart disease\")\n", + "\n" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "Ky2mzQUgL9IU", + "outputId": "7301a0b0-8bb7-4dff-bee4-a7f26dd4b00c" + }, + "execution_count": null, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "[0]\n", + "The person does not have heart disease\n" + ] + }, + { + "output_type": "stream", + "name": "stderr", + "text": [ + "/usr/local/lib/python3.10/dist-packages/sklearn/base.py:439: UserWarning: X does not have valid feature names, but LogisticRegression was fitted with feature names\n", + " warnings.warn(\n" + ] + } + ] + }, + { + "cell_type": "code", + "source": [ + "input_data=(65\t,1,\t0\t,120\t,177\t,0,\t1\t,140,\t0,\t0.4,\t2,\t0,\t3)\n", + "# change the input data into numpy array\n", + "input_data_as_numpy_array=np.asarray(input_data)\n", + "#reshape the numpy array as we are predicting for only on instance\n", + "input_data_reshaped =input_data_as_numpy_array.reshape(1,-1)\n", + "prediction=model.predict(input_data_reshaped)\n", + "print(prediction)\n", + "if (prediction[0]==0):\n", + " print(\"The person does not have heart disease\")\n", + "else:\n", + " print(\"the person has heart disease\")" + ], + "metadata": { + "id": "WCbZkDR7PCyB", + "colab": { + "base_uri": "https://localhost:8080/" + }, + "outputId": "d7158150-6fe9-4271-833f-d16e78d435c6" + }, + "execution_count": null, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "[1]\n", + "the person has heart disease\n" + ] + }, + { + "output_type": "stream", + "name": "stderr", + "text": [ + "/usr/local/lib/python3.10/dist-packages/sklearn/base.py:439: UserWarning: X does not have valid feature names, but LogisticRegression was fitted with feature names\n", + " warnings.warn(\n" + ] + } + ] + } + ] +} \ No newline at end of file From d5fe19a5d39dc39f3421212939e5fb94ce366022 Mon Sep 17 00:00:00 2001 From: Sudiksha Thatipelli <163149118+Sudiksha18@users.noreply.github.com> Date: Mon, 13 May 2024 10:20:01 +0530 Subject: [PATCH 05/12] Created using Colab --- Heart_Disease_Prediction.ipynb | 1400 +++++++++++++------------------- 1 file changed, 580 insertions(+), 820 deletions(-) diff --git a/Heart_Disease_Prediction.ipynb b/Heart_Disease_Prediction.ipynb index e84ac289..2c9128e5 100644 --- a/Heart_Disease_Prediction.ipynb +++ b/Heart_Disease_Prediction.ipynb @@ -4,7 +4,7 @@ "metadata": { "colab": { "provenance": [], - "authorship_tag": "ABX9TyNoiZVo9f/BO9xy8X6rbGMT", + "authorship_tag": "ABX9TyObVXmU5pb8i7Cea2P9aquf", "include_colab_link": true }, "kernelspec": { @@ -46,10 +46,9 @@ "from sklearn.metrics import accuracy_score" ], "metadata": { - "id": "k850UGz1Z03B", - "cellView": "form" + "id": "k850UGz1Z03B" }, - "execution_count": 1, + "execution_count": 212, "outputs": [] }, { @@ -65,12 +64,12 @@ "cell_type": "code", "source": [ "#loading the csv data to a Pandas DataFrame\n", - "heart_data= pd.read_csv('/content/heart_disease_data.csv')" + "heart_data= pd.read_csv('/content/heart.csv')" ], "metadata": { "id": "RJg3aA91Z0-u" }, - "execution_count": null, + "execution_count": 213, "outputs": [] }, { @@ -82,34 +81,57 @@ "metadata": { "colab": { "base_uri": "https://localhost:8080/", - "height": 206 + "height": 394 }, "id": "BnoQ8u4hdZ8Z", - "outputId": "30199964-f01f-4d2c-9a9d-93c7c7ae6d72" + "outputId": "452a033e-92bd-4b1e-b754-93d896d0c0a7" }, - "execution_count": null, + "execution_count": 214, "outputs": [ { "output_type": "execute_result", "data": { "text/plain": [ - " age sex cp trestbps chol fbs restecg thalach exang oldpeak slope \\\n", - "0 63 1 3 145 233 1 0 150 0 2.3 0 \n", - "1 37 1 2 130 250 0 1 187 0 3.5 0 \n", - "2 41 0 1 130 204 0 0 172 0 1.4 2 \n", - "3 56 1 1 120 236 0 1 178 0 0.8 2 \n", - "4 57 0 0 120 354 0 1 163 1 0.6 2 \n", - "\n", - " ca thal target \n", - "0 0 1 1 \n", - "1 0 2 1 \n", - "2 0 2 1 \n", - "3 0 2 1 \n", - "4 0 2 1 " + " Patient ID Age Sex Cholesterol Blood Pressure Heart Rate Diabetes \\\n", + "0 BMW7812 67 Male 208 158/88 72 0 \n", + "1 CZE1114 21 Male 389 165/93 98 1 \n", + "2 BNI9906 21 Female 324 174/99 72 1 \n", + "3 JLN3497 84 Male 383 163/100 73 1 \n", + "4 GFO8847 66 Male 318 91/88 93 1 \n", + "\n", + " Family History Smoking Obesity ... Sedentary Hours Per Day Income \\\n", + "0 0 1 0 ... 6.615001 261404 \n", + "1 1 1 1 ... 4.963459 285768 \n", + "2 0 0 0 ... 9.463426 235282 \n", + "3 1 1 0 ... 7.648981 125640 \n", + "4 1 1 1 ... 1.514821 160555 \n", + "\n", + " BMI Triglycerides Physical Activity Days Per Week \\\n", + "0 31.251233 286 0 \n", + "1 27.194973 235 1 \n", + "2 28.176571 587 4 \n", + "3 36.464704 378 3 \n", + "4 21.809144 231 1 \n", + "\n", + " Sleep Hours Per Day Country Continent Hemisphere \\\n", + "0 6 Argentina South America Southern Hemisphere \n", + "1 7 Canada North America Northern Hemisphere \n", + "2 4 France Europe Northern Hemisphere \n", + "3 4 Canada North America Northern Hemisphere \n", + "4 5 Thailand Asia Northern Hemisphere \n", + "\n", + " Heart Attack Risk \n", + "0 0 \n", + "1 0 \n", + "2 0 \n", + "3 0 \n", + "4 0 \n", + "\n", + "[5 rows x 26 columns]" ], "text/html": [ "\n", - "
\n", + "
\n", "
\n", "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
agesexcptrestbpscholfbsrestecgthalachexangoldpeakslopecathaltarget
count303.000000303.000000303.000000303.000000303.000000303.000000303.000000303.000000303.000000303.000000303.000000303.000000303.000000303.000000
mean54.3663370.6831680.966997131.623762246.2640260.1485150.528053149.6468650.3267331.0396041.3993400.7293732.3135310.544554
std9.0821010.4660111.03205217.53814351.8307510.3561980.52586022.9051610.4697941.1610750.6162261.0226060.6122770.498835
min29.0000000.0000000.00000094.000000126.0000000.0000000.00000071.0000000.0000000.0000000.0000000.0000000.0000000.000000
25%47.5000000.0000000.000000120.000000211.0000000.0000000.000000133.5000000.0000000.0000001.0000000.0000002.0000000.000000
50%55.0000001.0000001.000000130.000000240.0000000.0000001.000000153.0000000.0000000.8000001.0000000.0000002.0000001.000000
75%61.0000001.0000002.000000140.000000274.5000000.0000001.000000166.0000001.0000001.6000002.0000001.0000003.0000001.000000
max77.0000001.0000003.000000200.000000564.0000001.0000002.000000202.0000001.0000006.2000002.0000004.0000003.0000001.000000
\n", - "
\n", - "
\n", - "\n", - "
\n", - " \n", - "\n", - " \n", - "\n", - " \n", - "
\n", - "\n", - "\n", - "
\n", - " \n", - "\n", - "\n", - "\n", - " \n", - "
\n", - "\n", - "
\n", - "
\n" - ], - "application/vnd.google.colaboratory.intrinsic+json": { - "type": "dataframe", - "summary": "{\n \"name\": \"heart_data\",\n \"rows\": 8,\n \"fields\": [\n {\n \"column\": \"age\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 92.63263171018461,\n \"min\": 9.082100989837857,\n \"max\": 303.0,\n \"num_unique_values\": 8,\n \"samples\": [\n 54.366336633663366,\n 55.0,\n 303.0\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"sex\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 106.91793021099774,\n \"min\": 0.0,\n \"max\": 303.0,\n \"num_unique_values\": 5,\n \"samples\": [\n 0.6831683168316832,\n 1.0,\n 0.46601082333962385\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"cp\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 106.72725528212327,\n \"min\": 0.0,\n \"max\": 303.0,\n \"num_unique_values\": 7,\n \"samples\": [\n 303.0,\n 0.966996699669967,\n 2.0\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"trestbps\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 82.65195263865039,\n \"min\": 17.5381428135171,\n \"max\": 303.0,\n \"num_unique_values\": 8,\n \"samples\": [\n 131.62376237623764,\n 130.0,\n 303.0\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"chol\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 150.35806568851743,\n \"min\": 51.83075098793003,\n \"max\": 564.0,\n \"num_unique_values\": 8,\n \"samples\": [\n 246.26402640264027,\n 240.0,\n 303.0\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"fbs\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 107.0512286741478,\n \"min\": 0.0,\n \"max\": 303.0,\n \"num_unique_values\": 5,\n \"samples\": [\n 0.1485148514851485,\n 1.0,\n 0.35619787492797644\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"restecg\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 106.8733588009897,\n \"min\": 0.0,\n \"max\": 303.0,\n \"num_unique_values\": 6,\n \"samples\": [\n 303.0,\n 0.528052805280528,\n 2.0\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"thalach\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 83.70384393886218,\n \"min\": 22.905161114914094,\n \"max\": 303.0,\n \"num_unique_values\": 8,\n \"samples\": [\n 149.64686468646866,\n 153.0,\n 303.0\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"exang\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 106.9862394088184,\n \"min\": 0.0,\n \"max\": 303.0,\n \"num_unique_values\": 5,\n \"samples\": [\n 0.32673267326732675,\n 1.0,\n 0.4697944645223165\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"oldpeak\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 106.59952466080658,\n \"min\": 0.0,\n \"max\": 303.0,\n \"num_unique_values\": 7,\n \"samples\": [\n 303.0,\n 1.0396039603960396,\n 1.6\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"slope\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 106.72394469173834,\n \"min\": 0.0,\n \"max\": 303.0,\n \"num_unique_values\": 6,\n \"samples\": [\n 303.0,\n 1.3993399339933994,\n 2.0\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"ca\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 106.79372080487734,\n \"min\": 0.0,\n \"max\": 303.0,\n \"num_unique_values\": 6,\n \"samples\": [\n 303.0,\n 0.7293729372937293,\n 4.0\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"thal\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 106.47909774814387,\n \"min\": 0.0,\n \"max\": 303.0,\n \"num_unique_values\": 6,\n \"samples\": [\n 303.0,\n 2.3135313531353137,\n 3.0\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"target\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 106.92326354929804,\n \"min\": 0.0,\n \"max\": 303.0,\n \"num_unique_values\": 5,\n \"samples\": [\n 0.5445544554455446,\n 1.0,\n 0.4988347841643913\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n }\n ]\n}" - } - }, - "metadata": {}, - "execution_count": 9 - } - ] + "execution_count": 219, + "outputs": [] }, { "cell_type": "code", "source": [ "# checking the distribution of Target Variable\n", - "heart_data['target'].value_counts()" + "heart_data['Heart Attack Risk'].value_counts()" ], "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, "id": "NCbxYqqNf2-4", - "outputId": "9ec352f8-5bd5-4d8e-ae54-68962baa0851" + "outputId": "1e38c06b-606b-4509-8e29-2d249daaf4d0" }, - "execution_count": null, + "execution_count": 220, "outputs": [ { "output_type": "execute_result", "data": { "text/plain": [ - "1 165\n", - "0 138\n", - "Name: target, dtype: int64" + "Heart Attack Risk\n", + "0 5624\n", + "1 3139\n", + "Name: count, dtype: int64" ] }, "metadata": {}, - "execution_count": 10 + "execution_count": 220 } ] }, { "cell_type": "markdown", + "source": [], + "metadata": { + "id": "DvvKtsuILgK1" + } + }, + { + "cell_type": "code", "source": [ - "1-->Defective heart\n", - "\n", - "0-->Healthy heart\n", - "\n", - "\n" + "heart_data_num = heart_data.select_dtypes(include=[np.float32,np.float64,np.int64])" ], "metadata": { - "id": "qWNMUL5_CrfC" - } + "id": "mfiZ3MDvIiaV" + }, + "execution_count": 221, + "outputs": [] }, { "cell_type": "markdown", "source": [ + "1-->Defective heart\n", "\n", - "Splitting the features and target" + "0-->Healthy heart\n", + "\n", + "\n" ], "metadata": { - "id": "McSs_6cTC8Ub" + "id": "qWNMUL5_CrfC" } }, { "cell_type": "code", "source": [ - "x=heart_data.drop(columns='target', axis=1)\n", - "y=heart_data['target']" + "x=heart_data_num.drop(columns='Heart Attack Risk', axis=1)\n", + "y=heart_data_num['Heart Attack Risk']\n" ], "metadata": { "id": "oSgKSF5-DGVk" }, - "execution_count": null, + "execution_count": 222, "outputs": [] }, { @@ -1467,41 +1194,80 @@ "base_uri": "https://localhost:8080/" }, "id": "zhiIhyMxDhWF", - "outputId": "caf4204e-9079-4a26-a933-b9c9f93f6dd9" + "outputId": "5684dc44-c814-4d81-e438-ecc544010d10" }, - "execution_count": null, + "execution_count": 223, "outputs": [ { "output_type": "stream", "name": "stdout", "text": [ - " age sex cp trestbps chol fbs restecg thalach exang oldpeak \\\n", - "0 63 1 3 145 233 1 0 150 0 2.3 \n", - "1 37 1 2 130 250 0 1 187 0 3.5 \n", - "2 41 0 1 130 204 0 0 172 0 1.4 \n", - "3 56 1 1 120 236 0 1 178 0 0.8 \n", - "4 57 0 0 120 354 0 1 163 1 0.6 \n", - ".. ... ... .. ... ... ... ... ... ... ... \n", - "298 57 0 0 140 241 0 1 123 1 0.2 \n", - "299 45 1 3 110 264 0 1 132 0 1.2 \n", - "300 68 1 0 144 193 1 1 141 0 3.4 \n", - "301 57 1 0 130 131 0 1 115 1 1.2 \n", - "302 57 0 1 130 236 0 0 174 0 0.0 \n", + " Age Cholesterol Heart Rate Diabetes Family History Smoking \\\n", + "0 67 208 72 0 0 1 \n", + "1 21 389 98 1 1 1 \n", + "2 21 324 72 1 0 0 \n", + "3 84 383 73 1 1 1 \n", + "4 66 318 93 1 1 1 \n", + "... ... ... ... ... ... ... \n", + "8758 60 121 61 1 1 1 \n", + "8759 28 120 73 1 0 0 \n", + "8760 47 250 105 0 1 1 \n", + "8761 36 178 60 1 0 1 \n", + "8762 25 356 75 1 1 0 \n", + "\n", + " Obesity Alcohol Consumption Exercise Hours Per Week \\\n", + "0 0 0 4.168189 \n", + "1 1 1 1.813242 \n", + "2 0 0 2.078353 \n", + "3 0 1 9.828130 \n", + "4 1 0 5.804299 \n", + "... ... ... ... \n", + "8758 0 1 7.917342 \n", + "8759 1 0 16.558426 \n", + "8760 1 1 3.148438 \n", + "8761 0 0 3.789950 \n", + "8762 0 1 18.081748 \n", + "\n", + " Previous Heart Problems Medication Use Stress Level \\\n", + "0 0 0 9 \n", + "1 1 0 1 \n", + "2 1 1 9 \n", + "3 1 0 9 \n", + "4 1 0 6 \n", + "... ... ... ... \n", + "8758 1 1 8 \n", + "8759 0 0 8 \n", + "8760 1 0 5 \n", + "8761 1 1 5 \n", + "8762 0 0 8 \n", + "\n", + " Sedentary Hours Per Day Income BMI Triglycerides \\\n", + "0 6.615001 261404 31.251233 286 \n", + "1 4.963459 285768 27.194973 235 \n", + "2 9.463426 235282 28.176571 587 \n", + "3 7.648981 125640 36.464704 378 \n", + "4 1.514821 160555 21.809144 231 \n", + "... ... ... ... ... \n", + "8758 10.806373 235420 19.655895 67 \n", + "8759 3.833038 217881 23.993866 617 \n", + "8760 2.375214 36998 35.406146 527 \n", + "8761 0.029104 209943 27.294020 114 \n", + "8762 9.005234 247338 32.914151 180 \n", "\n", - " slope ca thal \n", - "0 0 0 1 \n", - "1 0 0 2 \n", - "2 2 0 2 \n", - "3 2 0 2 \n", - "4 2 0 2 \n", - ".. ... .. ... \n", - "298 1 0 3 \n", - "299 1 0 3 \n", - "300 1 2 3 \n", - "301 1 1 3 \n", - "302 1 1 2 \n", + " Physical Activity Days Per Week Sleep Hours Per Day \n", + "0 0 6 \n", + "1 1 7 \n", + "2 4 4 \n", + "3 3 4 \n", + "4 1 5 \n", + "... ... ... \n", + "8758 7 7 \n", + "8759 4 9 \n", + "8760 4 4 \n", + "8761 2 8 \n", + "8762 7 4 \n", "\n", - "[303 rows x 13 columns]\n" + "[8763 rows x 18 columns]\n" ] } ] @@ -1516,26 +1282,26 @@ "base_uri": "https://localhost:8080/" }, "id": "VpOvdXWgHWmI", - "outputId": "113ab45d-b6a3-45a1-912f-54a86cf21df2" + "outputId": "8d0190fe-62af-4d69-9af1-d4ce89b52bc4" }, - "execution_count": null, + "execution_count": 224, "outputs": [ { "output_type": "stream", "name": "stdout", "text": [ - "0 1\n", - "1 1\n", - "2 1\n", - "3 1\n", - "4 1\n", - " ..\n", - "298 0\n", - "299 0\n", - "300 0\n", - "301 0\n", - "302 0\n", - "Name: target, Length: 303, dtype: int64\n" + "0 0\n", + "1 0\n", + "2 0\n", + "3 0\n", + "4 0\n", + " ..\n", + "8758 0\n", + "8759 0\n", + "8760 1\n", + "8761 0\n", + "8762 1\n", + "Name: Heart Attack Risk, Length: 8763, dtype: int64\n" ] } ] @@ -1552,33 +1318,67 @@ { "cell_type": "code", "source": [ + "\n", "x_train,x_test,y_train,y_test=train_test_split(x,y,test_size=0.3,stratify=y,random_state=3)" ], "metadata": { "id": "SNK4hm8DIPSm" }, - "execution_count": null, + "execution_count": 225, "outputs": [] }, + { + "source": [ + "# Check the number of samples in x and y\n", + "print(f\"Number of samples in x: {len(x)}\")\n", + "print(f\"Number of samples in y: {len(y)}\")\n", + "\n", + "# If the number of samples is different, raise an error\n", + "if len(x) != len(y):\n", + " raise ValueError(\"Input arrays have different number of samples.\")\n", + "\n", + "# Proceed with train_test_split\n", + "x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.2, stratify=y, random_state=3)" + ], + "cell_type": "code", + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "GyZ6mljEHuVk", + "outputId": "c7c7bb9e-ed30-466a-ea57-88f43a409f0f" + }, + "execution_count": 226, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Number of samples in x: 8763\n", + "Number of samples in y: 8763\n" + ] + } + ] + }, { "cell_type": "code", "source": [ "print(x.shape,x_train.shape,x_test.shape)" ], "metadata": { + "id": "7OTKtdA-JLCV", "colab": { "base_uri": "https://localhost:8080/" }, - "id": "7OTKtdA-JLCV", - "outputId": "cb0304a8-2605-4f7e-8510-7b23a0fbd4dc" + "outputId": "a9616ddf-7f61-4f3b-fbe0-6d81ea326287" }, - "execution_count": null, + "execution_count": 227, "outputs": [ { "output_type": "stream", "name": "stdout", "text": [ - "(303, 13) (212, 13) (91, 13)\n" + "(8763, 18) (7010, 18) (1753, 18)\n" ] } ] @@ -1601,47 +1401,42 @@ "id": "AUEblGtLJlzD" } }, + { + "cell_type": "code", + "source": [], + "metadata": { + "id": "OVQDdrIpHm8P" + }, + "execution_count": 227, + "outputs": [] + }, { "cell_type": "code", "source": [ - "model=LogisticRegression()" + "model1=LogisticRegression()" ], "metadata": { - "id": "k-IIz1pzJtRd" + "id": "w0CnNIPkHnTT" }, - "execution_count": null, + "execution_count": 228, "outputs": [] }, { "cell_type": "code", "source": [ "# Training the logistic regression model with training data\n", - "model.fit(x_train,y_train)" + "model1.fit(x_train,y_train)" ], "metadata": { + "id": "kr84EwGwHqGY", "colab": { "base_uri": "https://localhost:8080/", - "height": 233 + "height": 74 }, - "id": "kqFKrLzlJ0N0", - "outputId": "3bb02431-b194-4619-a85f-8fbe87e779e6" + "outputId": "3a0e56ab-20f9-4584-8e40-c84af8b2c593" }, - "execution_count": null, + "execution_count": 229, "outputs": [ - { - "output_type": "stream", - "name": "stderr", - "text": [ - "/usr/local/lib/python3.10/dist-packages/sklearn/linear_model/_logistic.py:458: ConvergenceWarning: lbfgs failed to converge (status=1):\n", - "STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.\n", - "\n", - "Increase the number of iterations (max_iter) or scale the data as shown in:\n", - " https://scikit-learn.org/stable/modules/preprocessing.html\n", - "Please also refer to the documentation for alternative solver options:\n", - " https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression\n", - " n_iter_i = _check_optimize_result(\n" - ] - }, { "output_type": "execute_result", "data": { @@ -1649,11 +1444,11 @@ "LogisticRegression()" ], "text/html": [ - "
LogisticRegression()
In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook.
On GitHub, the HTML representation is unable to render, please try loading this page with nbviewer.org.
" + "
LogisticRegression()
In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook.
On GitHub, the HTML representation is unable to render, please try loading this page with nbviewer.org.
" ] }, "metadata": {}, - "execution_count": 17 + "execution_count": 229 } ] }, @@ -1668,17 +1463,24 @@ "id": "aPahD6MLKaPU" } }, + { + "cell_type": "markdown", + "source": [], + "metadata": { + "id": "FE92kQzZHIIl" + } + }, { "cell_type": "code", "source": [ "#accuracy on training data\n", - "x_train_prediction=model.predict(x_train)\n", - "training_data_accuracy=accuracy_score(x_train_prediction,y_train)" + "x_train_prediction = model1.predict(x_train)\n", + "training_data_accuracy = accuracy_score(x_train_prediction, y_train)" ], "metadata": { "id": "NHy61zdJKDR1" }, - "execution_count": null, + "execution_count": 230, "outputs": [] }, { @@ -1687,19 +1489,19 @@ "print('Accuracy on Training data:',training_data_accuracy)" ], "metadata": { + "id": "J4XiNRwXLCXf", "colab": { "base_uri": "https://localhost:8080/" }, - "id": "J4XiNRwXLCXf", - "outputId": "dd55ea56-948e-4bc3-b98d-273431014230" + "outputId": "ffd6a7b0-f978-4e5f-e677-7dcf55ac39a1" }, - "execution_count": null, + "execution_count": 231, "outputs": [ { "output_type": "stream", "name": "stdout", "text": [ - "Accuracy on Training data: 0.8679245283018868\n" + "Accuracy on Training data: 0.6417974322396577\n" ] } ] @@ -1708,13 +1510,13 @@ "cell_type": "code", "source": [ "#accuracy on test data\n", - "x_test_prediction=model.predict(x_test)\n", + "x_test_prediction=model1.predict(x_test)\n", "testing_data_accuracy=accuracy_score(x_test_prediction,y_test)" ], "metadata": { "id": "ehbFgWjhLK44" }, - "execution_count": null, + "execution_count": 232, "outputs": [] }, { @@ -1723,19 +1525,19 @@ "print('Accuracy on Testing data:',testing_data_accuracy)" ], "metadata": { + "id": "jYZIcbiVLs0G", "colab": { "base_uri": "https://localhost:8080/" }, - "id": "jYZIcbiVLs0G", - "outputId": "e7a36667-528e-42d1-e338-eeadc11c4947" + "outputId": "72bb19ec-73e3-437e-a6fb-e055fe2d31d0" }, - "execution_count": null, + "execution_count": 233, "outputs": [ { "output_type": "stream", "name": "stdout", "text": [ - "Accuracy on Testing data: 0.8021978021978022\n" + "Accuracy on Testing data: 0.6417569880205363\n" ] } ] @@ -1752,12 +1554,12 @@ { "cell_type": "code", "source": [ - "input_data=(44,0,0,130,60,0,0,131,1,2.2,1,3,3)\n", + "input_data=(1,67,208,72,0,0,1,0,0,0,0,31.251233, 286,0,0,6,0,0)\n", "# change the input data into numpy array\n", "input_data_as_numpy_array=np.asarray(input_data)\n", "#reshape the numpy array as we are predicting for only on instance\n", "input_data_reshaped =input_data_as_numpy_array.reshape(1,-1)\n", - "prediction=model.predict(input_data_reshaped)\n", + "prediction=model1.predict(input_data_reshaped)\n", "print(prediction)\n", "if (prediction[0]==0):\n", " print(\"The person does not have heart disease\")\n", @@ -1766,13 +1568,13 @@ "\n" ], "metadata": { + "id": "Ky2mzQUgL9IU", "colab": { "base_uri": "https://localhost:8080/" }, - "id": "Ky2mzQUgL9IU", - "outputId": "7301a0b0-8bb7-4dff-bee4-a7f26dd4b00c" + "outputId": "72053f1d-55ac-4927-f8d8-1659265bbc5f" }, - "execution_count": null, + "execution_count": 234, "outputs": [ { "output_type": "stream", @@ -1791,48 +1593,6 @@ ] } ] - }, - { - "cell_type": "code", - "source": [ - "input_data=(65\t,1,\t0\t,120\t,177\t,0,\t1\t,140,\t0,\t0.4,\t2,\t0,\t3)\n", - "# change the input data into numpy array\n", - "input_data_as_numpy_array=np.asarray(input_data)\n", - "#reshape the numpy array as we are predicting for only on instance\n", - "input_data_reshaped =input_data_as_numpy_array.reshape(1,-1)\n", - "prediction=model.predict(input_data_reshaped)\n", - "print(prediction)\n", - "if (prediction[0]==0):\n", - " print(\"The person does not have heart disease\")\n", - "else:\n", - " print(\"the person has heart disease\")" - ], - "metadata": { - "id": "WCbZkDR7PCyB", - "colab": { - "base_uri": "https://localhost:8080/" - }, - "outputId": "d7158150-6fe9-4271-833f-d16e78d435c6" - }, - "execution_count": null, - "outputs": [ - { - "output_type": "stream", - "name": "stdout", - "text": [ - "[1]\n", - "the person has heart disease\n" - ] - }, - { - "output_type": "stream", - "name": "stderr", - "text": [ - "/usr/local/lib/python3.10/dist-packages/sklearn/base.py:439: UserWarning: X does not have valid feature names, but LogisticRegression was fitted with feature names\n", - " warnings.warn(\n" - ] - } - ] } ] } \ No newline at end of file From 0fc87b502c2eb7ce0ee7725860e02dd7d0be3954 Mon Sep 17 00:00:00 2001 From: Sudiksha Thatipelli <163149118+Sudiksha18@users.noreply.github.com> Date: Mon, 13 May 2024 10:23:11 +0530 Subject: [PATCH 06/12] Created using Colab From 4c291235af36c3dabb19532cba550dd6114e6709 Mon Sep 17 00:00:00 2001 From: Sudiksha18 Date: Mon, 13 May 2024 10:30:42 +0530 Subject: [PATCH 07/12] Heart_Disease_Prediction --- Heart_Disease_Prediction.ipynb | 614 ++++++++++++++++----------------- 1 file changed, 307 insertions(+), 307 deletions(-) diff --git a/Heart_Disease_Prediction.ipynb b/Heart_Disease_Prediction.ipynb index 2c9128e5..c506c25d 100644 --- a/Heart_Disease_Prediction.ipynb +++ b/Heart_Disease_Prediction.ipynb @@ -1,26 +1,10 @@ { - "nbformat": 4, - "nbformat_minor": 0, - "metadata": { - "colab": { - "provenance": [], - "authorship_tag": "ABX9TyObVXmU5pb8i7Cea2P9aquf", - "include_colab_link": true - }, - "kernelspec": { - "name": "python3", - "display_name": "Python 3" - }, - "language_info": { - "name": "python" - } - }, "cells": [ { "cell_type": "markdown", "metadata": { - "id": "view-in-github", - "colab_type": "text" + "colab_type": "text", + "id": "view-in-github" }, "source": [ "\"Open" @@ -28,15 +12,20 @@ }, { "cell_type": "markdown", - "source": [ - "Importing the Dependencies\n" - ], "metadata": { "id": "Cj2SOXgaZt-Q" - } + }, + "source": [ + "Importing the Dependencies\n" + ] }, { "cell_type": "code", + "execution_count": 212, + "metadata": { + "id": "k850UGz1Z03B" + }, + "outputs": [], "source": [ "# @title\n", "import numpy as np\n", @@ -44,40 +33,32 @@ "from sklearn.model_selection import train_test_split\n", "from sklearn.linear_model import LogisticRegression\n", "from sklearn.metrics import accuracy_score" - ], - "metadata": { - "id": "k850UGz1Z03B" - }, - "execution_count": 212, - "outputs": [] + ] }, { "cell_type": "markdown", - "source": [ - "Data Collection and Processing\n" - ], "metadata": { "id": "APYsimt8bDoD" - } + }, + "source": [ + "Data Collection and Processing\n" + ] }, { "cell_type": "code", - "source": [ - "#loading the csv data to a Pandas DataFrame\n", - "heart_data= pd.read_csv('/content/heart.csv')" - ], + "execution_count": 213, "metadata": { "id": "RJg3aA91Z0-u" }, - "execution_count": 213, - "outputs": [] + "outputs": [], + "source": [ + "#loading the csv data to a Pandas DataFrame\n", + "heart_data= pd.read_csv('/content/heart.csv')" + ] }, { "cell_type": "code", - "source": [ - "#print first 5 rows of the datase\n", - "heart_data.head()\n" - ], + "execution_count": 214, "metadata": { "colab": { "base_uri": "https://localhost:8080/", @@ -86,49 +67,13 @@ "id": "BnoQ8u4hdZ8Z", "outputId": "452a033e-92bd-4b1e-b754-93d896d0c0a7" }, - "execution_count": 214, "outputs": [ { - "output_type": "execute_result", "data": { - "text/plain": [ - " Patient ID Age Sex Cholesterol Blood Pressure Heart Rate Diabetes \\\n", - "0 BMW7812 67 Male 208 158/88 72 0 \n", - "1 CZE1114 21 Male 389 165/93 98 1 \n", - "2 BNI9906 21 Female 324 174/99 72 1 \n", - "3 JLN3497 84 Male 383 163/100 73 1 \n", - "4 GFO8847 66 Male 318 91/88 93 1 \n", - "\n", - " Family History Smoking Obesity ... Sedentary Hours Per Day Income \\\n", - "0 0 1 0 ... 6.615001 261404 \n", - "1 1 1 1 ... 4.963459 285768 \n", - "2 0 0 0 ... 9.463426 235282 \n", - "3 1 1 0 ... 7.648981 125640 \n", - "4 1 1 1 ... 1.514821 160555 \n", - "\n", - " BMI Triglycerides Physical Activity Days Per Week \\\n", - "0 31.251233 286 0 \n", - "1 27.194973 235 1 \n", - "2 28.176571 587 4 \n", - "3 36.464704 378 3 \n", - "4 21.809144 231 1 \n", - "\n", - " Sleep Hours Per Day Country Continent Hemisphere \\\n", - "0 6 Argentina South America Southern Hemisphere \n", - "1 7 Canada North America Northern Hemisphere \n", - "2 4 France Europe Northern Hemisphere \n", - "3 4 Canada North America Northern Hemisphere \n", - "4 5 Thailand Asia Northern Hemisphere \n", - "\n", - " Heart Attack Risk \n", - "0 0 \n", - "1 0 \n", - "2 0 \n", - "3 0 \n", - "4 0 \n", - "\n", - "[5 rows x 26 columns]" - ], + "application/vnd.google.colaboratory.intrinsic+json": { + "type": "dataframe", + "variable_name": "heart_data" + }, "text/html": [ "\n", "
\n", @@ -508,21 +453,58 @@ "
\n", "
\n" ], - "application/vnd.google.colaboratory.intrinsic+json": { - "type": "dataframe", - "variable_name": "heart_data" - } + "text/plain": [ + " Patient ID Age Sex Cholesterol Blood Pressure Heart Rate Diabetes \\\n", + "0 BMW7812 67 Male 208 158/88 72 0 \n", + "1 CZE1114 21 Male 389 165/93 98 1 \n", + "2 BNI9906 21 Female 324 174/99 72 1 \n", + "3 JLN3497 84 Male 383 163/100 73 1 \n", + "4 GFO8847 66 Male 318 91/88 93 1 \n", + "\n", + " Family History Smoking Obesity ... Sedentary Hours Per Day Income \\\n", + "0 0 1 0 ... 6.615001 261404 \n", + "1 1 1 1 ... 4.963459 285768 \n", + "2 0 0 0 ... 9.463426 235282 \n", + "3 1 1 0 ... 7.648981 125640 \n", + "4 1 1 1 ... 1.514821 160555 \n", + "\n", + " BMI Triglycerides Physical Activity Days Per Week \\\n", + "0 31.251233 286 0 \n", + "1 27.194973 235 1 \n", + "2 28.176571 587 4 \n", + "3 36.464704 378 3 \n", + "4 21.809144 231 1 \n", + "\n", + " Sleep Hours Per Day Country Continent Hemisphere \\\n", + "0 6 Argentina South America Southern Hemisphere \n", + "1 7 Canada North America Northern Hemisphere \n", + "2 4 France Europe Northern Hemisphere \n", + "3 4 Canada North America Northern Hemisphere \n", + "4 5 Thailand Asia Northern Hemisphere \n", + "\n", + " Heart Attack Risk \n", + "0 0 \n", + "1 0 \n", + "2 0 \n", + "3 0 \n", + "4 0 \n", + "\n", + "[5 rows x 26 columns]" + ] }, + "execution_count": 214, "metadata": {}, - "execution_count": 214 + "output_type": "execute_result" } + ], + "source": [ + "#print first 5 rows of the datase\n", + "heart_data.head()\n" ] }, { "cell_type": "code", - "source": [ - "heart_data.tail()\n" - ], + "execution_count": 215, "metadata": { "colab": { "base_uri": "https://localhost:8080/", @@ -531,49 +513,12 @@ "id": "NQwDjwwGeBF4", "outputId": "60d69d34-5c6e-4975-c633-13cc786065f6" }, - "execution_count": 215, "outputs": [ { - "output_type": "execute_result", "data": { - "text/plain": [ - " Patient ID Age Sex Cholesterol Blood Pressure Heart Rate \\\n", - "8758 MSV9918 60 Male 121 94/76 61 \n", - "8759 QSV6764 28 Female 120 157/102 73 \n", - "8760 XKA5925 47 Male 250 161/75 105 \n", - "8761 EPE6801 36 Male 178 119/67 60 \n", - "8762 ZWN9666 25 Female 356 138/67 75 \n", - "\n", - " Diabetes Family History Smoking Obesity ... \\\n", - "8758 1 1 1 0 ... \n", - "8759 1 0 0 1 ... \n", - "8760 0 1 1 1 ... \n", - "8761 1 0 1 0 ... \n", - "8762 1 1 0 0 ... \n", - "\n", - " Sedentary Hours Per Day Income BMI Triglycerides \\\n", - "8758 10.806373 235420 19.655895 67 \n", - "8759 3.833038 217881 23.993866 617 \n", - "8760 2.375214 36998 35.406146 527 \n", - "8761 0.029104 209943 27.294020 114 \n", - "8762 9.005234 247338 32.914151 180 \n", - "\n", - " Physical Activity Days Per Week Sleep Hours Per Day Country \\\n", - "8758 7 7 Thailand \n", - "8759 4 9 Canada \n", - "8760 4 4 Brazil \n", - "8761 2 8 Brazil \n", - "8762 7 4 United Kingdom \n", - "\n", - " Continent Hemisphere Heart Attack Risk \n", - "8758 Asia Northern Hemisphere 0 \n", - "8759 North America Northern Hemisphere 0 \n", - "8760 South America Southern Hemisphere 1 \n", - "8761 South America Southern Hemisphere 0 \n", - "8762 Europe Northern Hemisphere 1 \n", - "\n", - "[5 rows x 26 columns]" - ], + "application/vnd.google.colaboratory.intrinsic+json": { + "type": "dataframe" + }, "text/html": [ "\n", "
\n", @@ -953,22 +898,57 @@ "
\n", " \n" ], - "application/vnd.google.colaboratory.intrinsic+json": { - "type": "dataframe" - } + "text/plain": [ + " Patient ID Age Sex Cholesterol Blood Pressure Heart Rate \\\n", + "8758 MSV9918 60 Male 121 94/76 61 \n", + "8759 QSV6764 28 Female 120 157/102 73 \n", + "8760 XKA5925 47 Male 250 161/75 105 \n", + "8761 EPE6801 36 Male 178 119/67 60 \n", + "8762 ZWN9666 25 Female 356 138/67 75 \n", + "\n", + " Diabetes Family History Smoking Obesity ... \\\n", + "8758 1 1 1 0 ... \n", + "8759 1 0 0 1 ... \n", + "8760 0 1 1 1 ... \n", + "8761 1 0 1 0 ... \n", + "8762 1 1 0 0 ... \n", + "\n", + " Sedentary Hours Per Day Income BMI Triglycerides \\\n", + "8758 10.806373 235420 19.655895 67 \n", + "8759 3.833038 217881 23.993866 617 \n", + "8760 2.375214 36998 35.406146 527 \n", + "8761 0.029104 209943 27.294020 114 \n", + "8762 9.005234 247338 32.914151 180 \n", + "\n", + " Physical Activity Days Per Week Sleep Hours Per Day Country \\\n", + "8758 7 7 Thailand \n", + "8759 4 9 Canada \n", + "8760 4 4 Brazil \n", + "8761 2 8 Brazil \n", + "8762 7 4 United Kingdom \n", + "\n", + " Continent Hemisphere Heart Attack Risk \n", + "8758 Asia Northern Hemisphere 0 \n", + "8759 North America Northern Hemisphere 0 \n", + "8760 South America Southern Hemisphere 1 \n", + "8761 South America Southern Hemisphere 0 \n", + "8762 Europe Northern Hemisphere 1 \n", + "\n", + "[5 rows x 26 columns]" + ] }, + "execution_count": 215, "metadata": {}, - "execution_count": 215 + "output_type": "execute_result" } + ], + "source": [ + "heart_data.tail()\n" ] }, { "cell_type": "code", - "source": [ - "# number of rows and columns in the dataset\n", - "heart_data.shape\n", - "\n" - ], + "execution_count": 216, "metadata": { "colab": { "base_uri": "https://localhost:8080/" @@ -976,26 +956,27 @@ "id": "Ye8LlTQVeHs1", "outputId": "e0fb1303-1000-45df-8cbf-49a3322f42b1" }, - "execution_count": 216, "outputs": [ { - "output_type": "execute_result", "data": { "text/plain": [ "(8763, 26)" ] }, + "execution_count": 216, "metadata": {}, - "execution_count": 216 + "output_type": "execute_result" } + ], + "source": [ + "# number of rows and columns in the dataset\n", + "heart_data.shape\n", + "\n" ] }, { "cell_type": "code", - "source": [ - "# getting some info about the data\n", - "heart_data.info()" - ], + "execution_count": 217, "metadata": { "colab": { "base_uri": "https://localhost:8080/" @@ -1003,11 +984,10 @@ "id": "5YZLMOwFeXF3", "outputId": "4a322f74-c893-45a0-e4e2-cca32e4d0bd3" }, - "execution_count": 217, "outputs": [ { - "output_type": "stream", "name": "stdout", + "output_type": "stream", "text": [ "\n", "RangeIndex: 8763 entries, 0 to 8762\n", @@ -1044,14 +1024,15 @@ "memory usage: 1.7+ MB\n" ] } + ], + "source": [ + "# getting some info about the data\n", + "heart_data.info()" ] }, { "cell_type": "code", - "source": [ - "#checking for missing values\n", - "heart_data.isnull().sum()" - ], + "execution_count": 218, "metadata": { "colab": { "base_uri": "https://localhost:8080/" @@ -1059,10 +1040,8 @@ "id": "QHazm2rze6Oj", "outputId": "1601115a-a97a-4aa7-b0ca-1d94d09d11fa" }, - "execution_count": 218, "outputs": [ { - "output_type": "execute_result", "data": { "text/plain": [ "Patient ID 0\n", @@ -1094,29 +1073,31 @@ "dtype: int64" ] }, + "execution_count": 218, "metadata": {}, - "execution_count": 218 + "output_type": "execute_result" } - ] + ], + "source": [ + "#checking for missing values\n", + "heart_data.isnull().sum()" + ] }, { "cell_type": "code", - "source": [ - "# statistical measures about the data\n", - "z=heart_data.describe()" - ], + "execution_count": 219, "metadata": { "id": "nt15bvuYfBcA" }, - "execution_count": 219, - "outputs": [] + "outputs": [], + "source": [ + "# statistical measures about the data\n", + "z=heart_data.describe()" + ] }, { "cell_type": "code", - "source": [ - "# checking the distribution of Target Variable\n", - "heart_data['Heart Attack Risk'].value_counts()" - ], + "execution_count": 220, "metadata": { "colab": { "base_uri": "https://localhost:8080/" @@ -1124,10 +1105,8 @@ "id": "NCbxYqqNf2-4", "outputId": "1e38c06b-606b-4509-8e29-2d249daaf4d0" }, - "execution_count": 220, "outputs": [ { - "output_type": "execute_result", "data": { "text/plain": [ "Heart Attack Risk\n", @@ -1136,59 +1115,62 @@ "Name: count, dtype: int64" ] }, + "execution_count": 220, "metadata": {}, - "execution_count": 220 + "output_type": "execute_result" } + ], + "source": [ + "# checking the distribution of Target Variable\n", + "heart_data['Heart Attack Risk'].value_counts()" ] }, { "cell_type": "markdown", - "source": [], "metadata": { "id": "DvvKtsuILgK1" - } + }, + "source": [] }, { "cell_type": "code", - "source": [ - "heart_data_num = heart_data.select_dtypes(include=[np.float32,np.float64,np.int64])" - ], + "execution_count": 221, "metadata": { "id": "mfiZ3MDvIiaV" }, - "execution_count": 221, - "outputs": [] + "outputs": [], + "source": [ + "heart_data_num = heart_data.select_dtypes(include=[np.float32,np.float64,np.int64])" + ] }, { "cell_type": "markdown", + "metadata": { + "id": "qWNMUL5_CrfC" + }, "source": [ "1-->Defective heart\n", "\n", "0-->Healthy heart\n", "\n", "\n" - ], - "metadata": { - "id": "qWNMUL5_CrfC" - } + ] }, { "cell_type": "code", - "source": [ - "x=heart_data_num.drop(columns='Heart Attack Risk', axis=1)\n", - "y=heart_data_num['Heart Attack Risk']\n" - ], + "execution_count": 222, "metadata": { "id": "oSgKSF5-DGVk" }, - "execution_count": 222, - "outputs": [] + "outputs": [], + "source": [ + "x=heart_data_num.drop(columns='Heart Attack Risk', axis=1)\n", + "y=heart_data_num['Heart Attack Risk']\n" + ] }, { "cell_type": "code", - "source": [ - "print(x)" - ], + "execution_count": 223, "metadata": { "colab": { "base_uri": "https://localhost:8080/" @@ -1196,11 +1178,10 @@ "id": "zhiIhyMxDhWF", "outputId": "5684dc44-c814-4d81-e438-ecc544010d10" }, - "execution_count": 223, "outputs": [ { - "output_type": "stream", "name": "stdout", + "output_type": "stream", "text": [ " Age Cholesterol Heart Rate Diabetes Family History Smoking \\\n", "0 67 208 72 0 0 1 \n", @@ -1270,13 +1251,14 @@ "[8763 rows x 18 columns]\n" ] } + ], + "source": [ + "print(x)" ] }, { "cell_type": "code", - "source": [ - "print(y)" - ], + "execution_count": 224, "metadata": { "colab": { "base_uri": "https://localhost:8080/" @@ -1284,11 +1266,10 @@ "id": "VpOvdXWgHWmI", "outputId": "8d0190fe-62af-4d69-9af1-d4ce89b52bc4" }, - "execution_count": 224, "outputs": [ { - "output_type": "stream", "name": "stdout", + "output_type": "stream", "text": [ "0 0\n", "1 0\n", @@ -1304,43 +1285,35 @@ "Name: Heart Attack Risk, Length: 8763, dtype: int64\n" ] } + ], + "source": [ + "print(y)" ] }, { "cell_type": "markdown", - "source": [ - "Splitting data into Training data" - ], "metadata": { "id": "5A20XMHYII3T" - } + }, + "source": [ + "Splitting data into Training data" + ] }, { "cell_type": "code", - "source": [ - "\n", - "x_train,x_test,y_train,y_test=train_test_split(x,y,test_size=0.3,stratify=y,random_state=3)" - ], + "execution_count": 225, "metadata": { "id": "SNK4hm8DIPSm" }, - "execution_count": 225, - "outputs": [] - }, - { + "outputs": [], "source": [ - "# Check the number of samples in x and y\n", - "print(f\"Number of samples in x: {len(x)}\")\n", - "print(f\"Number of samples in y: {len(y)}\")\n", "\n", - "# If the number of samples is different, raise an error\n", - "if len(x) != len(y):\n", - " raise ValueError(\"Input arrays have different number of samples.\")\n", - "\n", - "# Proceed with train_test_split\n", - "x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.2, stratify=y, random_state=3)" - ], + "x_train,x_test,y_train,y_test=train_test_split(x,y,test_size=0.3,stratify=y,random_state=3)" + ] + }, + { "cell_type": "code", + "execution_count": 226, "metadata": { "colab": { "base_uri": "https://localhost:8080/" @@ -1348,251 +1321,278 @@ "id": "GyZ6mljEHuVk", "outputId": "c7c7bb9e-ed30-466a-ea57-88f43a409f0f" }, - "execution_count": 226, "outputs": [ { - "output_type": "stream", "name": "stdout", + "output_type": "stream", "text": [ "Number of samples in x: 8763\n", "Number of samples in y: 8763\n" ] } + ], + "source": [ + "# Check the number of samples in x and y\n", + "print(f\"Number of samples in x: {len(x)}\")\n", + "print(f\"Number of samples in y: {len(y)}\")\n", + "\n", + "# If the number of samples is different, raise an error\n", + "if len(x) != len(y):\n", + " raise ValueError(\"Input arrays have different number of samples.\")\n", + "\n", + "# Proceed with train_test_split\n", + "x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.2, stratify=y, random_state=3)" ] }, { "cell_type": "code", - "source": [ - "print(x.shape,x_train.shape,x_test.shape)" - ], + "execution_count": 227, "metadata": { - "id": "7OTKtdA-JLCV", "colab": { "base_uri": "https://localhost:8080/" }, + "id": "7OTKtdA-JLCV", "outputId": "a9616ddf-7f61-4f3b-fbe0-6d81ea326287" }, - "execution_count": 227, "outputs": [ { - "output_type": "stream", "name": "stdout", + "output_type": "stream", "text": [ "(8763, 18) (7010, 18) (1753, 18)\n" ] } + ], + "source": [ + "print(x.shape,x_train.shape,x_test.shape)" ] }, { "cell_type": "markdown", - "source": [ - "MODEL TRAINING" - ], "metadata": { "id": "ne2RibQaJdNe" - } + }, + "source": [ + "MODEL TRAINING" + ] }, { "cell_type": "markdown", - "source": [ - "LOGISTIC REGRESSION" - ], "metadata": { "id": "AUEblGtLJlzD" - } + }, + "source": [ + "LOGISTIC REGRESSION" + ] }, { "cell_type": "code", - "source": [], + "execution_count": 227, "metadata": { "id": "OVQDdrIpHm8P" }, - "execution_count": 227, - "outputs": [] + "outputs": [], + "source": [] }, { "cell_type": "code", - "source": [ - "model1=LogisticRegression()" - ], + "execution_count": 228, "metadata": { "id": "w0CnNIPkHnTT" }, - "execution_count": 228, - "outputs": [] + "outputs": [], + "source": [ + "model1=LogisticRegression()" + ] }, { "cell_type": "code", - "source": [ - "# Training the logistic regression model with training data\n", - "model1.fit(x_train,y_train)" - ], + "execution_count": 229, "metadata": { - "id": "kr84EwGwHqGY", "colab": { "base_uri": "https://localhost:8080/", "height": 74 }, + "id": "kr84EwGwHqGY", "outputId": "3a0e56ab-20f9-4584-8e40-c84af8b2c593" }, - "execution_count": 229, "outputs": [ { - "output_type": "execute_result", "data": { - "text/plain": [ - "LogisticRegression()" - ], "text/html": [ "
LogisticRegression()
In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook.
On GitHub, the HTML representation is unable to render, please try loading this page with nbviewer.org.
" + ], + "text/plain": [ + "LogisticRegression()" ] }, + "execution_count": 229, "metadata": {}, - "execution_count": 229 + "output_type": "execute_result" } + ], + "source": [ + "# Training the logistic regression model with training data\n", + "model1.fit(x_train,y_train)" ] }, { "cell_type": "markdown", + "metadata": { + "id": "aPahD6MLKaPU" + }, "source": [ "Model Evaluation\n", "\n", "Accuracy Score" - ], - "metadata": { - "id": "aPahD6MLKaPU" - } + ] }, { "cell_type": "markdown", - "source": [], "metadata": { "id": "FE92kQzZHIIl" - } + }, + "source": [] }, { "cell_type": "code", + "execution_count": 230, + "metadata": { + "id": "NHy61zdJKDR1" + }, + "outputs": [], "source": [ "#accuracy on training data\n", "x_train_prediction = model1.predict(x_train)\n", "training_data_accuracy = accuracy_score(x_train_prediction, y_train)" - ], - "metadata": { - "id": "NHy61zdJKDR1" - }, - "execution_count": 230, - "outputs": [] + ] }, { "cell_type": "code", - "source": [ - "print('Accuracy on Training data:',training_data_accuracy)" - ], + "execution_count": 231, "metadata": { - "id": "J4XiNRwXLCXf", "colab": { "base_uri": "https://localhost:8080/" }, + "id": "J4XiNRwXLCXf", "outputId": "ffd6a7b0-f978-4e5f-e677-7dcf55ac39a1" }, - "execution_count": 231, "outputs": [ { - "output_type": "stream", "name": "stdout", + "output_type": "stream", "text": [ "Accuracy on Training data: 0.6417974322396577\n" ] } + ], + "source": [ + "print('Accuracy on Training data:',training_data_accuracy)" ] }, { "cell_type": "code", + "execution_count": 232, + "metadata": { + "id": "ehbFgWjhLK44" + }, + "outputs": [], "source": [ "#accuracy on test data\n", "x_test_prediction=model1.predict(x_test)\n", "testing_data_accuracy=accuracy_score(x_test_prediction,y_test)" - ], - "metadata": { - "id": "ehbFgWjhLK44" - }, - "execution_count": 232, - "outputs": [] + ] }, { "cell_type": "code", - "source": [ - "print('Accuracy on Testing data:',testing_data_accuracy)" - ], + "execution_count": 233, "metadata": { - "id": "jYZIcbiVLs0G", "colab": { "base_uri": "https://localhost:8080/" }, + "id": "jYZIcbiVLs0G", "outputId": "72bb19ec-73e3-437e-a6fb-e055fe2d31d0" }, - "execution_count": 233, "outputs": [ { - "output_type": "stream", "name": "stdout", + "output_type": "stream", "text": [ "Accuracy on Testing data: 0.6417569880205363\n" ] } + ], + "source": [ + "print('Accuracy on Testing data:',testing_data_accuracy)" ] }, { "cell_type": "markdown", - "source": [ - "BUILDING PREDICTING SYSTEM" - ], "metadata": { "id": "rec6Gz8vMP_G" - } + }, + "source": [ + "BUILDING PREDICTING SYSTEM" + ] }, { "cell_type": "code", - "source": [ - "input_data=(1,67,208,72,0,0,1,0,0,0,0,31.251233, 286,0,0,6,0,0)\n", - "# change the input data into numpy array\n", - "input_data_as_numpy_array=np.asarray(input_data)\n", - "#reshape the numpy array as we are predicting for only on instance\n", - "input_data_reshaped =input_data_as_numpy_array.reshape(1,-1)\n", - "prediction=model1.predict(input_data_reshaped)\n", - "print(prediction)\n", - "if (prediction[0]==0):\n", - " print(\"The person does not have heart disease\")\n", - "else:\n", - " print(\"the person has heart disease\")\n", - "\n" - ], + "execution_count": 234, "metadata": { - "id": "Ky2mzQUgL9IU", "colab": { "base_uri": "https://localhost:8080/" }, + "id": "Ky2mzQUgL9IU", "outputId": "72053f1d-55ac-4927-f8d8-1659265bbc5f" }, - "execution_count": 234, "outputs": [ { - "output_type": "stream", "name": "stdout", + "output_type": "stream", "text": [ "[0]\n", "The person does not have heart disease\n" ] }, { - "output_type": "stream", "name": "stderr", + "output_type": "stream", "text": [ "/usr/local/lib/python3.10/dist-packages/sklearn/base.py:439: UserWarning: X does not have valid feature names, but LogisticRegression was fitted with feature names\n", " warnings.warn(\n" ] } + ], + "source": [ + "input_data=(1,67,208,72,0,0,1,0,0,0,0,31.251233, 286,0,0,6,0,0)\n", + "# change the input data into numpy array\n", + "input_data_as_numpy_array=np.asarray(input_data)\n", + "#reshape the numpy array as we are predicting for only on instance\n", + "input_data_reshaped =input_data_as_numpy_array.reshape(1,-1)\n", + "prediction=model1.predict(input_data_reshaped)\n", + "print(prediction)\n", + "if (prediction[0]==0):\n", + " print(\"The person does not have heart disease\")\n", + "else:\n", + " print(\"the person has heart disease\")\n", + "\n" ] } - ] -} \ No newline at end of file + ], + "metadata": { + "colab": { + "authorship_tag": "ABX9TyObVXmU5pb8i7Cea2P9aquf", + "include_colab_link": true, + "provenance": [] + }, + "kernelspec": { + "display_name": "Python 3", + "name": "python3" + }, + "language_info": { + "name": "python" + } + }, + "nbformat": 4, + "nbformat_minor": 0 +} From f185b64e9b7744cce5046580ceb1eb02f779ec88 Mon Sep 17 00:00:00 2001 From: Sudiksha Thatipelli <163149118+Sudiksha18@users.noreply.github.com> Date: Mon, 13 May 2024 10:42:53 +0530 Subject: [PATCH 08/12] Heart_Disease_Prediction This is an heart disease prediction model which is made by Logistic Regression and python libraries like numpy , pandas. It also consist of machine learning libraries also like sklearn. --- Heart_Disease_Prediction (2).ipynb | 1586 ++++++++++++++++++++++++++++ 1 file changed, 1586 insertions(+) create mode 100644 Heart_Disease_Prediction (2).ipynb diff --git a/Heart_Disease_Prediction (2).ipynb b/Heart_Disease_Prediction (2).ipynb new file mode 100644 index 00000000..6acdbb30 --- /dev/null +++ b/Heart_Disease_Prediction (2).ipynb @@ -0,0 +1,1586 @@ +{ + "nbformat": 4, + "nbformat_minor": 0, + "metadata": { + "colab": { + "provenance": [] + }, + "kernelspec": { + "name": "python3", + "display_name": "Python 3" + }, + "language_info": { + "name": "python" + } + }, + "cells": [ + { + "cell_type": "markdown", + "source": [ + "Importing the Dependencies\n" + ], + "metadata": { + "id": "Cj2SOXgaZt-Q" + } + }, + { + "cell_type": "code", + "source": [ + "# @title\n", + "import numpy as np\n", + "import pandas as pd\n", + "from sklearn.model_selection import train_test_split\n", + "from sklearn.linear_model import LogisticRegression\n", + "from sklearn.metrics import accuracy_score" + ], + "metadata": { + "id": "k850UGz1Z03B" + }, + "execution_count": 212, + "outputs": [] + }, + { + "cell_type": "markdown", + "source": [ + "Data Collection and Processing\n" + ], + "metadata": { + "id": "APYsimt8bDoD" + } + }, + { + "cell_type": "code", + "source": [ + "#loading the csv data to a Pandas DataFrame\n", + "heart_data= pd.read_csv('/content/heart.csv')" + ], + "metadata": { + "id": "RJg3aA91Z0-u" + }, + "execution_count": 213, + "outputs": [] + }, + { + "cell_type": "code", + "source": [ + "#print first 5 rows of the datase\n", + "heart_data.head()\n" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 394 + }, + "id": "BnoQ8u4hdZ8Z", + "outputId": "452a033e-92bd-4b1e-b754-93d896d0c0a7" + }, + "execution_count": 214, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + " Patient ID Age Sex Cholesterol Blood Pressure Heart Rate Diabetes \\\n", + "0 BMW7812 67 Male 208 158/88 72 0 \n", + "1 CZE1114 21 Male 389 165/93 98 1 \n", + "2 BNI9906 21 Female 324 174/99 72 1 \n", + "3 JLN3497 84 Male 383 163/100 73 1 \n", + "4 GFO8847 66 Male 318 91/88 93 1 \n", + "\n", + " Family History Smoking Obesity ... Sedentary Hours Per Day Income \\\n", + "0 0 1 0 ... 6.615001 261404 \n", + "1 1 1 1 ... 4.963459 285768 \n", + "2 0 0 0 ... 9.463426 235282 \n", + "3 1 1 0 ... 7.648981 125640 \n", + "4 1 1 1 ... 1.514821 160555 \n", + "\n", + " BMI Triglycerides Physical Activity Days Per Week \\\n", + "0 31.251233 286 0 \n", + "1 27.194973 235 1 \n", + "2 28.176571 587 4 \n", + "3 36.464704 378 3 \n", + "4 21.809144 231 1 \n", + "\n", + " Sleep Hours Per Day Country Continent Hemisphere \\\n", + "0 6 Argentina South America Southern Hemisphere \n", + "1 7 Canada North America Northern Hemisphere \n", + "2 4 France Europe Northern Hemisphere \n", + "3 4 Canada North America Northern Hemisphere \n", + "4 5 Thailand Asia Northern Hemisphere \n", + "\n", + " Heart Attack Risk \n", + "0 0 \n", + "1 0 \n", + "2 0 \n", + "3 0 \n", + "4 0 \n", + "\n", + "[5 rows x 26 columns]" + ], + "text/html": [ + "\n", + "
\n", + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
Patient IDAgeSexCholesterolBlood PressureHeart RateDiabetesFamily HistorySmokingObesity...Sedentary Hours Per DayIncomeBMITriglyceridesPhysical Activity Days Per WeekSleep Hours Per DayCountryContinentHemisphereHeart Attack Risk
0BMW781267Male208158/88720010...6.61500126140431.25123328606ArgentinaSouth AmericaSouthern Hemisphere0
1CZE111421Male389165/93981111...4.96345928576827.19497323517CanadaNorth AmericaNorthern Hemisphere0
2BNI990621Female324174/99721000...9.46342623528228.17657158744FranceEuropeNorthern Hemisphere0
3JLN349784Male383163/100731110...7.64898112564036.46470437834CanadaNorth AmericaNorthern Hemisphere0
4GFO884766Male31891/88931111...1.51482116055521.80914423115ThailandAsiaNorthern Hemisphere0
\n", + "

5 rows × 26 columns

\n", + "
\n", + "
\n", + "\n", + "
\n", + " \n", + "\n", + " \n", + "\n", + " \n", + "
\n", + "\n", + "\n", + "
\n", + " \n", + "\n", + "\n", + "\n", + " \n", + "
\n", + "\n", + "
\n", + "
\n" + ], + "application/vnd.google.colaboratory.intrinsic+json": { + "type": "dataframe", + "variable_name": "heart_data" + } + }, + "metadata": {}, + "execution_count": 214 + } + ] + }, + { + "cell_type": "code", + "source": [ + "heart_data.tail()\n" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 394 + }, + "id": "NQwDjwwGeBF4", + "outputId": "60d69d34-5c6e-4975-c633-13cc786065f6" + }, + "execution_count": 215, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + " Patient ID Age Sex Cholesterol Blood Pressure Heart Rate \\\n", + "8758 MSV9918 60 Male 121 94/76 61 \n", + "8759 QSV6764 28 Female 120 157/102 73 \n", + "8760 XKA5925 47 Male 250 161/75 105 \n", + "8761 EPE6801 36 Male 178 119/67 60 \n", + "8762 ZWN9666 25 Female 356 138/67 75 \n", + "\n", + " Diabetes Family History Smoking Obesity ... \\\n", + "8758 1 1 1 0 ... \n", + "8759 1 0 0 1 ... \n", + "8760 0 1 1 1 ... \n", + "8761 1 0 1 0 ... \n", + "8762 1 1 0 0 ... \n", + "\n", + " Sedentary Hours Per Day Income BMI Triglycerides \\\n", + "8758 10.806373 235420 19.655895 67 \n", + "8759 3.833038 217881 23.993866 617 \n", + "8760 2.375214 36998 35.406146 527 \n", + "8761 0.029104 209943 27.294020 114 \n", + "8762 9.005234 247338 32.914151 180 \n", + "\n", + " Physical Activity Days Per Week Sleep Hours Per Day Country \\\n", + "8758 7 7 Thailand \n", + "8759 4 9 Canada \n", + "8760 4 4 Brazil \n", + "8761 2 8 Brazil \n", + "8762 7 4 United Kingdom \n", + "\n", + " Continent Hemisphere Heart Attack Risk \n", + "8758 Asia Northern Hemisphere 0 \n", + "8759 North America Northern Hemisphere 0 \n", + "8760 South America Southern Hemisphere 1 \n", + "8761 South America Southern Hemisphere 0 \n", + "8762 Europe Northern Hemisphere 1 \n", + "\n", + "[5 rows x 26 columns]" + ], + "text/html": [ + "\n", + "
\n", + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
Patient IDAgeSexCholesterolBlood PressureHeart RateDiabetesFamily HistorySmokingObesity...Sedentary Hours Per DayIncomeBMITriglyceridesPhysical Activity Days Per WeekSleep Hours Per DayCountryContinentHemisphereHeart Attack Risk
8758MSV991860Male12194/76611110...10.80637323542019.6558956777ThailandAsiaNorthern Hemisphere0
8759QSV676428Female120157/102731001...3.83303821788123.99386661749CanadaNorth AmericaNorthern Hemisphere0
8760XKA592547Male250161/751050111...2.3752143699835.40614652744BrazilSouth AmericaSouthern Hemisphere1
8761EPE680136Male178119/67601010...0.02910420994327.29402011428BrazilSouth AmericaSouthern Hemisphere0
8762ZWN966625Female356138/67751100...9.00523424733832.91415118074United KingdomEuropeNorthern Hemisphere1
\n", + "

5 rows × 26 columns

\n", + "
\n", + "
\n", + "\n", + "
\n", + " \n", + "\n", + " \n", + "\n", + " \n", + "
\n", + "\n", + "\n", + "
\n", + " \n", + "\n", + "\n", + "\n", + " \n", + "
\n", + "\n", + "
\n", + "
\n" + ], + "application/vnd.google.colaboratory.intrinsic+json": { + "type": "dataframe" + } + }, + "metadata": {}, + "execution_count": 215 + } + ] + }, + { + "cell_type": "code", + "source": [ + "# number of rows and columns in the dataset\n", + "heart_data.shape\n", + "\n" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "Ye8LlTQVeHs1", + "outputId": "e0fb1303-1000-45df-8cbf-49a3322f42b1" + }, + "execution_count": 216, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "(8763, 26)" + ] + }, + "metadata": {}, + "execution_count": 216 + } + ] + }, + { + "cell_type": "code", + "source": [ + "# getting some info about the data\n", + "heart_data.info()" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "5YZLMOwFeXF3", + "outputId": "4a322f74-c893-45a0-e4e2-cca32e4d0bd3" + }, + "execution_count": 217, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "\n", + "RangeIndex: 8763 entries, 0 to 8762\n", + "Data columns (total 26 columns):\n", + " # Column Non-Null Count Dtype \n", + "--- ------ -------------- ----- \n", + " 0 Patient ID 8763 non-null object \n", + " 1 Age 8763 non-null int64 \n", + " 2 Sex 8763 non-null object \n", + " 3 Cholesterol 8763 non-null int64 \n", + " 4 Blood Pressure 8763 non-null object \n", + " 5 Heart Rate 8763 non-null int64 \n", + " 6 Diabetes 8763 non-null int64 \n", + " 7 Family History 8763 non-null int64 \n", + " 8 Smoking 8763 non-null int64 \n", + " 9 Obesity 8763 non-null int64 \n", + " 10 Alcohol Consumption 8763 non-null int64 \n", + " 11 Exercise Hours Per Week 8763 non-null float64\n", + " 12 Diet 8763 non-null object \n", + " 13 Previous Heart Problems 8763 non-null int64 \n", + " 14 Medication Use 8763 non-null int64 \n", + " 15 Stress Level 8763 non-null int64 \n", + " 16 Sedentary Hours Per Day 8763 non-null float64\n", + " 17 Income 8763 non-null int64 \n", + " 18 BMI 8763 non-null float64\n", + " 19 Triglycerides 8763 non-null int64 \n", + " 20 Physical Activity Days Per Week 8763 non-null int64 \n", + " 21 Sleep Hours Per Day 8763 non-null int64 \n", + " 22 Country 8763 non-null object \n", + " 23 Continent 8763 non-null object \n", + " 24 Hemisphere 8763 non-null object \n", + " 25 Heart Attack Risk 8763 non-null int64 \n", + "dtypes: float64(3), int64(16), object(7)\n", + "memory usage: 1.7+ MB\n" + ] + } + ] + }, + { + "cell_type": "code", + "source": [ + "#checking for missing values\n", + "heart_data.isnull().sum()" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "QHazm2rze6Oj", + "outputId": "1601115a-a97a-4aa7-b0ca-1d94d09d11fa" + }, + "execution_count": 218, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "Patient ID 0\n", + "Age 0\n", + "Sex 0\n", + "Cholesterol 0\n", + "Blood Pressure 0\n", + "Heart Rate 0\n", + "Diabetes 0\n", + "Family History 0\n", + "Smoking 0\n", + "Obesity 0\n", + "Alcohol Consumption 0\n", + "Exercise Hours Per Week 0\n", + "Diet 0\n", + "Previous Heart Problems 0\n", + "Medication Use 0\n", + "Stress Level 0\n", + "Sedentary Hours Per Day 0\n", + "Income 0\n", + "BMI 0\n", + "Triglycerides 0\n", + "Physical Activity Days Per Week 0\n", + "Sleep Hours Per Day 0\n", + "Country 0\n", + "Continent 0\n", + "Hemisphere 0\n", + "Heart Attack Risk 0\n", + "dtype: int64" + ] + }, + "metadata": {}, + "execution_count": 218 + } + ] + }, + { + "cell_type": "code", + "source": [ + "# statistical measures about the data\n", + "z=heart_data.describe()" + ], + "metadata": { + "id": "nt15bvuYfBcA" + }, + "execution_count": 219, + "outputs": [] + }, + { + "cell_type": "code", + "source": [ + "# checking the distribution of Target Variable\n", + "heart_data['Heart Attack Risk'].value_counts()" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "NCbxYqqNf2-4", + "outputId": "1e38c06b-606b-4509-8e29-2d249daaf4d0" + }, + "execution_count": 220, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "Heart Attack Risk\n", + "0 5624\n", + "1 3139\n", + "Name: count, dtype: int64" + ] + }, + "metadata": {}, + "execution_count": 220 + } + ] + }, + { + "cell_type": "markdown", + "source": [], + "metadata": { + "id": "DvvKtsuILgK1" + } + }, + { + "cell_type": "code", + "source": [ + "heart_data_num = heart_data.select_dtypes(include=[np.float32,np.float64,np.int64])" + ], + "metadata": { + "id": "mfiZ3MDvIiaV" + }, + "execution_count": 221, + "outputs": [] + }, + { + "cell_type": "markdown", + "source": [ + "1-->Defective heart\n", + "\n", + "0-->Healthy heart\n", + "\n", + "\n" + ], + "metadata": { + "id": "qWNMUL5_CrfC" + } + }, + { + "cell_type": "code", + "source": [ + "x=heart_data_num.drop(columns='Heart Attack Risk', axis=1)\n", + "y=heart_data_num['Heart Attack Risk']\n" + ], + "metadata": { + "id": "oSgKSF5-DGVk" + }, + "execution_count": 222, + "outputs": [] + }, + { + "cell_type": "code", + "source": [ + "print(x)" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "zhiIhyMxDhWF", + "outputId": "5684dc44-c814-4d81-e438-ecc544010d10" + }, + "execution_count": 223, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + " Age Cholesterol Heart Rate Diabetes Family History Smoking \\\n", + "0 67 208 72 0 0 1 \n", + "1 21 389 98 1 1 1 \n", + "2 21 324 72 1 0 0 \n", + "3 84 383 73 1 1 1 \n", + "4 66 318 93 1 1 1 \n", + "... ... ... ... ... ... ... \n", + "8758 60 121 61 1 1 1 \n", + "8759 28 120 73 1 0 0 \n", + "8760 47 250 105 0 1 1 \n", + "8761 36 178 60 1 0 1 \n", + "8762 25 356 75 1 1 0 \n", + "\n", + " Obesity Alcohol Consumption Exercise Hours Per Week \\\n", + "0 0 0 4.168189 \n", + "1 1 1 1.813242 \n", + "2 0 0 2.078353 \n", + "3 0 1 9.828130 \n", + "4 1 0 5.804299 \n", + "... ... ... ... \n", + "8758 0 1 7.917342 \n", + "8759 1 0 16.558426 \n", + "8760 1 1 3.148438 \n", + "8761 0 0 3.789950 \n", + "8762 0 1 18.081748 \n", + "\n", + " Previous Heart Problems Medication Use Stress Level \\\n", + "0 0 0 9 \n", + "1 1 0 1 \n", + "2 1 1 9 \n", + "3 1 0 9 \n", + "4 1 0 6 \n", + "... ... ... ... \n", + "8758 1 1 8 \n", + "8759 0 0 8 \n", + "8760 1 0 5 \n", + "8761 1 1 5 \n", + "8762 0 0 8 \n", + "\n", + " Sedentary Hours Per Day Income BMI Triglycerides \\\n", + "0 6.615001 261404 31.251233 286 \n", + "1 4.963459 285768 27.194973 235 \n", + "2 9.463426 235282 28.176571 587 \n", + "3 7.648981 125640 36.464704 378 \n", + "4 1.514821 160555 21.809144 231 \n", + "... ... ... ... ... \n", + "8758 10.806373 235420 19.655895 67 \n", + "8759 3.833038 217881 23.993866 617 \n", + "8760 2.375214 36998 35.406146 527 \n", + "8761 0.029104 209943 27.294020 114 \n", + "8762 9.005234 247338 32.914151 180 \n", + "\n", + " Physical Activity Days Per Week Sleep Hours Per Day \n", + "0 0 6 \n", + "1 1 7 \n", + "2 4 4 \n", + "3 3 4 \n", + "4 1 5 \n", + "... ... ... \n", + "8758 7 7 \n", + "8759 4 9 \n", + "8760 4 4 \n", + "8761 2 8 \n", + "8762 7 4 \n", + "\n", + "[8763 rows x 18 columns]\n" + ] + } + ] + }, + { + "cell_type": "code", + "source": [ + "print(y)" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "VpOvdXWgHWmI", + "outputId": "8d0190fe-62af-4d69-9af1-d4ce89b52bc4" + }, + "execution_count": 224, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "0 0\n", + "1 0\n", + "2 0\n", + "3 0\n", + "4 0\n", + " ..\n", + "8758 0\n", + "8759 0\n", + "8760 1\n", + "8761 0\n", + "8762 1\n", + "Name: Heart Attack Risk, Length: 8763, dtype: int64\n" + ] + } + ] + }, + { + "cell_type": "markdown", + "source": [ + "Splitting data into Training data" + ], + "metadata": { + "id": "5A20XMHYII3T" + } + }, + { + "cell_type": "code", + "source": [ + "\n", + "x_train,x_test,y_train,y_test=train_test_split(x,y,test_size=0.3,stratify=y,random_state=3)" + ], + "metadata": { + "id": "SNK4hm8DIPSm" + }, + "execution_count": 225, + "outputs": [] + }, + { + "source": [ + "# Check the number of samples in x and y\n", + "print(f\"Number of samples in x: {len(x)}\")\n", + "print(f\"Number of samples in y: {len(y)}\")\n", + "\n", + "# If the number of samples is different, raise an error\n", + "if len(x) != len(y):\n", + " raise ValueError(\"Input arrays have different number of samples.\")\n", + "\n", + "# Proceed with train_test_split\n", + "x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.2, stratify=y, random_state=3)" + ], + "cell_type": "code", + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "GyZ6mljEHuVk", + "outputId": "c7c7bb9e-ed30-466a-ea57-88f43a409f0f" + }, + "execution_count": 226, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Number of samples in x: 8763\n", + "Number of samples in y: 8763\n" + ] + } + ] + }, + { + "cell_type": "code", + "source": [ + "print(x.shape,x_train.shape,x_test.shape)" + ], + "metadata": { + "id": "7OTKtdA-JLCV", + "colab": { + "base_uri": "https://localhost:8080/" + }, + "outputId": "a9616ddf-7f61-4f3b-fbe0-6d81ea326287" + }, + "execution_count": 227, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "(8763, 18) (7010, 18) (1753, 18)\n" + ] + } + ] + }, + { + "cell_type": "markdown", + "source": [ + "MODEL TRAINING" + ], + "metadata": { + "id": "ne2RibQaJdNe" + } + }, + { + "cell_type": "markdown", + "source": [ + "LOGISTIC REGRESSION" + ], + "metadata": { + "id": "AUEblGtLJlzD" + } + }, + { + "cell_type": "code", + "source": [], + "metadata": { + "id": "OVQDdrIpHm8P" + }, + "execution_count": 227, + "outputs": [] + }, + { + "cell_type": "code", + "source": [ + "model1=LogisticRegression()" + ], + "metadata": { + "id": "w0CnNIPkHnTT" + }, + "execution_count": 228, + "outputs": [] + }, + { + "cell_type": "code", + "source": [ + "# Training the logistic regression model with training data\n", + "model1.fit(x_train,y_train)" + ], + "metadata": { + "id": "kr84EwGwHqGY", + "colab": { + "base_uri": "https://localhost:8080/", + "height": 74 + }, + "outputId": "3a0e56ab-20f9-4584-8e40-c84af8b2c593" + }, + "execution_count": 229, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "LogisticRegression()" + ], + "text/html": [ + "
LogisticRegression()
In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook.
On GitHub, the HTML representation is unable to render, please try loading this page with nbviewer.org.
" + ] + }, + "metadata": {}, + "execution_count": 229 + } + ] + }, + { + "cell_type": "markdown", + "source": [ + "Model Evaluation\n", + "\n", + "Accuracy Score" + ], + "metadata": { + "id": "aPahD6MLKaPU" + } + }, + { + "cell_type": "markdown", + "source": [], + "metadata": { + "id": "FE92kQzZHIIl" + } + }, + { + "cell_type": "code", + "source": [ + "#accuracy on training data\n", + "x_train_prediction = model1.predict(x_train)\n", + "training_data_accuracy = accuracy_score(x_train_prediction, y_train)" + ], + "metadata": { + "id": "NHy61zdJKDR1" + }, + "execution_count": 230, + "outputs": [] + }, + { + "cell_type": "code", + "source": [ + "print('Accuracy on Training data:',training_data_accuracy)" + ], + "metadata": { + "id": "J4XiNRwXLCXf", + "colab": { + "base_uri": "https://localhost:8080/" + }, + "outputId": "ffd6a7b0-f978-4e5f-e677-7dcf55ac39a1" + }, + "execution_count": 231, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Accuracy on Training data: 0.6417974322396577\n" + ] + } + ] + }, + { + "cell_type": "code", + "source": [ + "#accuracy on test data\n", + "x_test_prediction=model1.predict(x_test)\n", + "testing_data_accuracy=accuracy_score(x_test_prediction,y_test)" + ], + "metadata": { + "id": "ehbFgWjhLK44" + }, + "execution_count": 232, + "outputs": [] + }, + { + "cell_type": "code", + "source": [ + "print('Accuracy on Testing data:',testing_data_accuracy)" + ], + "metadata": { + "id": "jYZIcbiVLs0G", + "colab": { + "base_uri": "https://localhost:8080/" + }, + "outputId": "72bb19ec-73e3-437e-a6fb-e055fe2d31d0" + }, + "execution_count": 233, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Accuracy on Testing data: 0.6417569880205363\n" + ] + } + ] + }, + { + "cell_type": "markdown", + "source": [ + "BUILDING PREDICTING SYSTEM" + ], + "metadata": { + "id": "rec6Gz8vMP_G" + } + }, + { + "cell_type": "code", + "source": [ + "input_data=(1,67,208,72,0,0,1,0,0,0,0,31.251233, 286,0,0,6,0,0)\n", + "# change the input data into numpy array\n", + "input_data_as_numpy_array=np.asarray(input_data)\n", + "#reshape the numpy array as we are predicting for only on instance\n", + "input_data_reshaped =input_data_as_numpy_array.reshape(1,-1)\n", + "prediction=model1.predict(input_data_reshaped)\n", + "print(prediction)\n", + "if (prediction[0]==0):\n", + " print(\"The person does not have heart disease\")\n", + "else:\n", + " print(\"the person has heart disease\")\n", + "\n" + ], + "metadata": { + "id": "Ky2mzQUgL9IU", + "colab": { + "base_uri": "https://localhost:8080/" + }, + "outputId": "72053f1d-55ac-4927-f8d8-1659265bbc5f" + }, + "execution_count": 234, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "[0]\n", + "The person does not have heart disease\n" + ] + }, + { + "output_type": "stream", + "name": "stderr", + "text": [ + "/usr/local/lib/python3.10/dist-packages/sklearn/base.py:439: UserWarning: X does not have valid feature names, but LogisticRegression was fitted with feature names\n", + " warnings.warn(\n" + ] + } + ] + } + ] +} \ No newline at end of file From e0f379deddb72f711ed4f5964beca8f40cb271c1 Mon Sep 17 00:00:00 2001 From: Sudiksha Thatipelli <163149118+Sudiksha18@users.noreply.github.com> Date: Mon, 13 May 2024 10:56:43 +0530 Subject: [PATCH 09/12] Heart_Disease_Prediction This is an heart disease prediction model which is made by Logistic Regression and python libraries like numpy , pandas. It also consist of machine learning libraries also like sklearn. --- Heart_Disease_Prediction (2).ipynb | 606 ++++++++++++++--------------- 1 file changed, 303 insertions(+), 303 deletions(-) diff --git a/Heart_Disease_Prediction (2).ipynb b/Heart_Disease_Prediction (2).ipynb index 6acdbb30..314b9652 100644 --- a/Heart_Disease_Prediction (2).ipynb +++ b/Heart_Disease_Prediction (2).ipynb @@ -1,30 +1,21 @@ { - "nbformat": 4, - "nbformat_minor": 0, - "metadata": { - "colab": { - "provenance": [] - }, - "kernelspec": { - "name": "python3", - "display_name": "Python 3" - }, - "language_info": { - "name": "python" - } - }, "cells": [ { "cell_type": "markdown", - "source": [ - "Importing the Dependencies\n" - ], "metadata": { "id": "Cj2SOXgaZt-Q" - } + }, + "source": [ + "Importing the Dependencies\n" + ] }, { "cell_type": "code", + "execution_count": 212, + "metadata": { + "id": "k850UGz1Z03B" + }, + "outputs": [], "source": [ "# @title\n", "import numpy as np\n", @@ -32,40 +23,32 @@ "from sklearn.model_selection import train_test_split\n", "from sklearn.linear_model import LogisticRegression\n", "from sklearn.metrics import accuracy_score" - ], - "metadata": { - "id": "k850UGz1Z03B" - }, - "execution_count": 212, - "outputs": [] + ] }, { "cell_type": "markdown", - "source": [ - "Data Collection and Processing\n" - ], "metadata": { "id": "APYsimt8bDoD" - } + }, + "source": [ + "Data Collection and Processing\n" + ] }, { "cell_type": "code", - "source": [ - "#loading the csv data to a Pandas DataFrame\n", - "heart_data= pd.read_csv('/content/heart.csv')" - ], + "execution_count": 213, "metadata": { "id": "RJg3aA91Z0-u" }, - "execution_count": 213, - "outputs": [] + "outputs": [], + "source": [ + "#loading the csv data to a Pandas DataFrame\n", + "heart_data= pd.read_csv('/content/heart.csv')" + ] }, { "cell_type": "code", - "source": [ - "#print first 5 rows of the datase\n", - "heart_data.head()\n" - ], + "execution_count": 214, "metadata": { "colab": { "base_uri": "https://localhost:8080/", @@ -74,49 +57,13 @@ "id": "BnoQ8u4hdZ8Z", "outputId": "452a033e-92bd-4b1e-b754-93d896d0c0a7" }, - "execution_count": 214, "outputs": [ { - "output_type": "execute_result", "data": { - "text/plain": [ - " Patient ID Age Sex Cholesterol Blood Pressure Heart Rate Diabetes \\\n", - "0 BMW7812 67 Male 208 158/88 72 0 \n", - "1 CZE1114 21 Male 389 165/93 98 1 \n", - "2 BNI9906 21 Female 324 174/99 72 1 \n", - "3 JLN3497 84 Male 383 163/100 73 1 \n", - "4 GFO8847 66 Male 318 91/88 93 1 \n", - "\n", - " Family History Smoking Obesity ... Sedentary Hours Per Day Income \\\n", - "0 0 1 0 ... 6.615001 261404 \n", - "1 1 1 1 ... 4.963459 285768 \n", - "2 0 0 0 ... 9.463426 235282 \n", - "3 1 1 0 ... 7.648981 125640 \n", - "4 1 1 1 ... 1.514821 160555 \n", - "\n", - " BMI Triglycerides Physical Activity Days Per Week \\\n", - "0 31.251233 286 0 \n", - "1 27.194973 235 1 \n", - "2 28.176571 587 4 \n", - "3 36.464704 378 3 \n", - "4 21.809144 231 1 \n", - "\n", - " Sleep Hours Per Day Country Continent Hemisphere \\\n", - "0 6 Argentina South America Southern Hemisphere \n", - "1 7 Canada North America Northern Hemisphere \n", - "2 4 France Europe Northern Hemisphere \n", - "3 4 Canada North America Northern Hemisphere \n", - "4 5 Thailand Asia Northern Hemisphere \n", - "\n", - " Heart Attack Risk \n", - "0 0 \n", - "1 0 \n", - "2 0 \n", - "3 0 \n", - "4 0 \n", - "\n", - "[5 rows x 26 columns]" - ], + "application/vnd.google.colaboratory.intrinsic+json": { + "type": "dataframe", + "variable_name": "heart_data" + }, "text/html": [ "\n", "
\n", @@ -496,21 +443,58 @@ "
\n", " \n" ], - "application/vnd.google.colaboratory.intrinsic+json": { - "type": "dataframe", - "variable_name": "heart_data" - } + "text/plain": [ + " Patient ID Age Sex Cholesterol Blood Pressure Heart Rate Diabetes \\\n", + "0 BMW7812 67 Male 208 158/88 72 0 \n", + "1 CZE1114 21 Male 389 165/93 98 1 \n", + "2 BNI9906 21 Female 324 174/99 72 1 \n", + "3 JLN3497 84 Male 383 163/100 73 1 \n", + "4 GFO8847 66 Male 318 91/88 93 1 \n", + "\n", + " Family History Smoking Obesity ... Sedentary Hours Per Day Income \\\n", + "0 0 1 0 ... 6.615001 261404 \n", + "1 1 1 1 ... 4.963459 285768 \n", + "2 0 0 0 ... 9.463426 235282 \n", + "3 1 1 0 ... 7.648981 125640 \n", + "4 1 1 1 ... 1.514821 160555 \n", + "\n", + " BMI Triglycerides Physical Activity Days Per Week \\\n", + "0 31.251233 286 0 \n", + "1 27.194973 235 1 \n", + "2 28.176571 587 4 \n", + "3 36.464704 378 3 \n", + "4 21.809144 231 1 \n", + "\n", + " Sleep Hours Per Day Country Continent Hemisphere \\\n", + "0 6 Argentina South America Southern Hemisphere \n", + "1 7 Canada North America Northern Hemisphere \n", + "2 4 France Europe Northern Hemisphere \n", + "3 4 Canada North America Northern Hemisphere \n", + "4 5 Thailand Asia Northern Hemisphere \n", + "\n", + " Heart Attack Risk \n", + "0 0 \n", + "1 0 \n", + "2 0 \n", + "3 0 \n", + "4 0 \n", + "\n", + "[5 rows x 26 columns]" + ] }, + "execution_count": 214, "metadata": {}, - "execution_count": 214 + "output_type": "execute_result" } + ], + "source": [ + "#print first 5 rows of the datase\n", + "heart_data.head()\n" ] }, { "cell_type": "code", - "source": [ - "heart_data.tail()\n" - ], + "execution_count": 215, "metadata": { "colab": { "base_uri": "https://localhost:8080/", @@ -519,49 +503,12 @@ "id": "NQwDjwwGeBF4", "outputId": "60d69d34-5c6e-4975-c633-13cc786065f6" }, - "execution_count": 215, "outputs": [ { - "output_type": "execute_result", "data": { - "text/plain": [ - " Patient ID Age Sex Cholesterol Blood Pressure Heart Rate \\\n", - "8758 MSV9918 60 Male 121 94/76 61 \n", - "8759 QSV6764 28 Female 120 157/102 73 \n", - "8760 XKA5925 47 Male 250 161/75 105 \n", - "8761 EPE6801 36 Male 178 119/67 60 \n", - "8762 ZWN9666 25 Female 356 138/67 75 \n", - "\n", - " Diabetes Family History Smoking Obesity ... \\\n", - "8758 1 1 1 0 ... \n", - "8759 1 0 0 1 ... \n", - "8760 0 1 1 1 ... \n", - "8761 1 0 1 0 ... \n", - "8762 1 1 0 0 ... \n", - "\n", - " Sedentary Hours Per Day Income BMI Triglycerides \\\n", - "8758 10.806373 235420 19.655895 67 \n", - "8759 3.833038 217881 23.993866 617 \n", - "8760 2.375214 36998 35.406146 527 \n", - "8761 0.029104 209943 27.294020 114 \n", - "8762 9.005234 247338 32.914151 180 \n", - "\n", - " Physical Activity Days Per Week Sleep Hours Per Day Country \\\n", - "8758 7 7 Thailand \n", - "8759 4 9 Canada \n", - "8760 4 4 Brazil \n", - "8761 2 8 Brazil \n", - "8762 7 4 United Kingdom \n", - "\n", - " Continent Hemisphere Heart Attack Risk \n", - "8758 Asia Northern Hemisphere 0 \n", - "8759 North America Northern Hemisphere 0 \n", - "8760 South America Southern Hemisphere 1 \n", - "8761 South America Southern Hemisphere 0 \n", - "8762 Europe Northern Hemisphere 1 \n", - "\n", - "[5 rows x 26 columns]" - ], + "application/vnd.google.colaboratory.intrinsic+json": { + "type": "dataframe" + }, "text/html": [ "\n", "
\n", @@ -941,22 +888,57 @@ "
\n", " \n" ], - "application/vnd.google.colaboratory.intrinsic+json": { - "type": "dataframe" - } + "text/plain": [ + " Patient ID Age Sex Cholesterol Blood Pressure Heart Rate \\\n", + "8758 MSV9918 60 Male 121 94/76 61 \n", + "8759 QSV6764 28 Female 120 157/102 73 \n", + "8760 XKA5925 47 Male 250 161/75 105 \n", + "8761 EPE6801 36 Male 178 119/67 60 \n", + "8762 ZWN9666 25 Female 356 138/67 75 \n", + "\n", + " Diabetes Family History Smoking Obesity ... \\\n", + "8758 1 1 1 0 ... \n", + "8759 1 0 0 1 ... \n", + "8760 0 1 1 1 ... \n", + "8761 1 0 1 0 ... \n", + "8762 1 1 0 0 ... \n", + "\n", + " Sedentary Hours Per Day Income BMI Triglycerides \\\n", + "8758 10.806373 235420 19.655895 67 \n", + "8759 3.833038 217881 23.993866 617 \n", + "8760 2.375214 36998 35.406146 527 \n", + "8761 0.029104 209943 27.294020 114 \n", + "8762 9.005234 247338 32.914151 180 \n", + "\n", + " Physical Activity Days Per Week Sleep Hours Per Day Country \\\n", + "8758 7 7 Thailand \n", + "8759 4 9 Canada \n", + "8760 4 4 Brazil \n", + "8761 2 8 Brazil \n", + "8762 7 4 United Kingdom \n", + "\n", + " Continent Hemisphere Heart Attack Risk \n", + "8758 Asia Northern Hemisphere 0 \n", + "8759 North America Northern Hemisphere 0 \n", + "8760 South America Southern Hemisphere 1 \n", + "8761 South America Southern Hemisphere 0 \n", + "8762 Europe Northern Hemisphere 1 \n", + "\n", + "[5 rows x 26 columns]" + ] }, + "execution_count": 215, "metadata": {}, - "execution_count": 215 + "output_type": "execute_result" } + ], + "source": [ + "heart_data.tail()\n" ] }, { "cell_type": "code", - "source": [ - "# number of rows and columns in the dataset\n", - "heart_data.shape\n", - "\n" - ], + "execution_count": 216, "metadata": { "colab": { "base_uri": "https://localhost:8080/" @@ -964,26 +946,27 @@ "id": "Ye8LlTQVeHs1", "outputId": "e0fb1303-1000-45df-8cbf-49a3322f42b1" }, - "execution_count": 216, "outputs": [ { - "output_type": "execute_result", "data": { "text/plain": [ "(8763, 26)" ] }, + "execution_count": 216, "metadata": {}, - "execution_count": 216 + "output_type": "execute_result" } + ], + "source": [ + "# number of rows and columns in the dataset\n", + "heart_data.shape\n", + "\n" ] }, { "cell_type": "code", - "source": [ - "# getting some info about the data\n", - "heart_data.info()" - ], + "execution_count": 217, "metadata": { "colab": { "base_uri": "https://localhost:8080/" @@ -991,11 +974,10 @@ "id": "5YZLMOwFeXF3", "outputId": "4a322f74-c893-45a0-e4e2-cca32e4d0bd3" }, - "execution_count": 217, "outputs": [ { - "output_type": "stream", "name": "stdout", + "output_type": "stream", "text": [ "\n", "RangeIndex: 8763 entries, 0 to 8762\n", @@ -1032,14 +1014,15 @@ "memory usage: 1.7+ MB\n" ] } + ], + "source": [ + "# getting some info about the data\n", + "heart_data.info()" ] }, { "cell_type": "code", - "source": [ - "#checking for missing values\n", - "heart_data.isnull().sum()" - ], + "execution_count": 218, "metadata": { "colab": { "base_uri": "https://localhost:8080/" @@ -1047,10 +1030,8 @@ "id": "QHazm2rze6Oj", "outputId": "1601115a-a97a-4aa7-b0ca-1d94d09d11fa" }, - "execution_count": 218, "outputs": [ { - "output_type": "execute_result", "data": { "text/plain": [ "Patient ID 0\n", @@ -1082,29 +1063,31 @@ "dtype: int64" ] }, + "execution_count": 218, "metadata": {}, - "execution_count": 218 + "output_type": "execute_result" } - ] + ], + "source": [ + "#checking for missing values\n", + "heart_data.isnull().sum()" + ] }, { "cell_type": "code", - "source": [ - "# statistical measures about the data\n", - "z=heart_data.describe()" - ], + "execution_count": 219, "metadata": { "id": "nt15bvuYfBcA" }, - "execution_count": 219, - "outputs": [] + "outputs": [], + "source": [ + "# statistical measures about the data\n", + "z=heart_data.describe()" + ] }, { "cell_type": "code", - "source": [ - "# checking the distribution of Target Variable\n", - "heart_data['Heart Attack Risk'].value_counts()" - ], + "execution_count": 220, "metadata": { "colab": { "base_uri": "https://localhost:8080/" @@ -1112,10 +1095,8 @@ "id": "NCbxYqqNf2-4", "outputId": "1e38c06b-606b-4509-8e29-2d249daaf4d0" }, - "execution_count": 220, "outputs": [ { - "output_type": "execute_result", "data": { "text/plain": [ "Heart Attack Risk\n", @@ -1124,59 +1105,62 @@ "Name: count, dtype: int64" ] }, + "execution_count": 220, "metadata": {}, - "execution_count": 220 + "output_type": "execute_result" } + ], + "source": [ + "# checking the distribution of Target Variable\n", + "heart_data['Heart Attack Risk'].value_counts()" ] }, { "cell_type": "markdown", - "source": [], "metadata": { "id": "DvvKtsuILgK1" - } + }, + "source": [] }, { "cell_type": "code", - "source": [ - "heart_data_num = heart_data.select_dtypes(include=[np.float32,np.float64,np.int64])" - ], + "execution_count": 221, "metadata": { "id": "mfiZ3MDvIiaV" }, - "execution_count": 221, - "outputs": [] + "outputs": [], + "source": [ + "heart_data_num = heart_data.select_dtypes(include=[np.float32,np.float64,np.int64])" + ] }, { "cell_type": "markdown", + "metadata": { + "id": "qWNMUL5_CrfC" + }, "source": [ "1-->Defective heart\n", "\n", "0-->Healthy heart\n", "\n", "\n" - ], - "metadata": { - "id": "qWNMUL5_CrfC" - } + ] }, { "cell_type": "code", - "source": [ - "x=heart_data_num.drop(columns='Heart Attack Risk', axis=1)\n", - "y=heart_data_num['Heart Attack Risk']\n" - ], + "execution_count": 222, "metadata": { "id": "oSgKSF5-DGVk" }, - "execution_count": 222, - "outputs": [] + "outputs": [], + "source": [ + "x=heart_data_num.drop(columns='Heart Attack Risk', axis=1)\n", + "y=heart_data_num['Heart Attack Risk']\n" + ] }, { "cell_type": "code", - "source": [ - "print(x)" - ], + "execution_count": 223, "metadata": { "colab": { "base_uri": "https://localhost:8080/" @@ -1184,11 +1168,10 @@ "id": "zhiIhyMxDhWF", "outputId": "5684dc44-c814-4d81-e438-ecc544010d10" }, - "execution_count": 223, "outputs": [ { - "output_type": "stream", "name": "stdout", + "output_type": "stream", "text": [ " Age Cholesterol Heart Rate Diabetes Family History Smoking \\\n", "0 67 208 72 0 0 1 \n", @@ -1258,13 +1241,14 @@ "[8763 rows x 18 columns]\n" ] } + ], + "source": [ + "print(x)" ] }, { "cell_type": "code", - "source": [ - "print(y)" - ], + "execution_count": 224, "metadata": { "colab": { "base_uri": "https://localhost:8080/" @@ -1272,11 +1256,10 @@ "id": "VpOvdXWgHWmI", "outputId": "8d0190fe-62af-4d69-9af1-d4ce89b52bc4" }, - "execution_count": 224, "outputs": [ { - "output_type": "stream", "name": "stdout", + "output_type": "stream", "text": [ "0 0\n", "1 0\n", @@ -1292,43 +1275,35 @@ "Name: Heart Attack Risk, Length: 8763, dtype: int64\n" ] } + ], + "source": [ + "print(y)" ] }, { "cell_type": "markdown", - "source": [ - "Splitting data into Training data" - ], "metadata": { "id": "5A20XMHYII3T" - } + }, + "source": [ + "Splitting data into Training data" + ] }, { "cell_type": "code", - "source": [ - "\n", - "x_train,x_test,y_train,y_test=train_test_split(x,y,test_size=0.3,stratify=y,random_state=3)" - ], + "execution_count": 225, "metadata": { "id": "SNK4hm8DIPSm" }, - "execution_count": 225, - "outputs": [] - }, - { + "outputs": [], "source": [ - "# Check the number of samples in x and y\n", - "print(f\"Number of samples in x: {len(x)}\")\n", - "print(f\"Number of samples in y: {len(y)}\")\n", "\n", - "# If the number of samples is different, raise an error\n", - "if len(x) != len(y):\n", - " raise ValueError(\"Input arrays have different number of samples.\")\n", - "\n", - "# Proceed with train_test_split\n", - "x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.2, stratify=y, random_state=3)" - ], + "x_train,x_test,y_train,y_test=train_test_split(x,y,test_size=0.3,stratify=y,random_state=3)" + ] + }, + { "cell_type": "code", + "execution_count": 226, "metadata": { "colab": { "base_uri": "https://localhost:8080/" @@ -1336,251 +1311,276 @@ "id": "GyZ6mljEHuVk", "outputId": "c7c7bb9e-ed30-466a-ea57-88f43a409f0f" }, - "execution_count": 226, "outputs": [ { - "output_type": "stream", "name": "stdout", + "output_type": "stream", "text": [ "Number of samples in x: 8763\n", "Number of samples in y: 8763\n" ] } + ], + "source": [ + "# Check the number of samples in x and y\n", + "print(f\"Number of samples in x: {len(x)}\")\n", + "print(f\"Number of samples in y: {len(y)}\")\n", + "\n", + "# If the number of samples is different, raise an error\n", + "if len(x) != len(y):\n", + " raise ValueError(\"Input arrays have different number of samples.\")\n", + "\n", + "# Proceed with train_test_split\n", + "x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.2, stratify=y, random_state=3)" ] }, { "cell_type": "code", - "source": [ - "print(x.shape,x_train.shape,x_test.shape)" - ], + "execution_count": 227, "metadata": { - "id": "7OTKtdA-JLCV", "colab": { "base_uri": "https://localhost:8080/" }, + "id": "7OTKtdA-JLCV", "outputId": "a9616ddf-7f61-4f3b-fbe0-6d81ea326287" }, - "execution_count": 227, "outputs": [ { - "output_type": "stream", "name": "stdout", + "output_type": "stream", "text": [ "(8763, 18) (7010, 18) (1753, 18)\n" ] } + ], + "source": [ + "print(x.shape,x_train.shape,x_test.shape)" ] }, { "cell_type": "markdown", - "source": [ - "MODEL TRAINING" - ], "metadata": { "id": "ne2RibQaJdNe" - } + }, + "source": [ + "MODEL TRAINING" + ] }, { "cell_type": "markdown", - "source": [ - "LOGISTIC REGRESSION" - ], "metadata": { "id": "AUEblGtLJlzD" - } + }, + "source": [ + "LOGISTIC REGRESSION" + ] }, { "cell_type": "code", - "source": [], + "execution_count": 227, "metadata": { "id": "OVQDdrIpHm8P" }, - "execution_count": 227, - "outputs": [] + "outputs": [], + "source": [] }, { "cell_type": "code", - "source": [ - "model1=LogisticRegression()" - ], + "execution_count": 228, "metadata": { "id": "w0CnNIPkHnTT" }, - "execution_count": 228, - "outputs": [] + "outputs": [], + "source": [ + "model1=LogisticRegression()" + ] }, { "cell_type": "code", - "source": [ - "# Training the logistic regression model with training data\n", - "model1.fit(x_train,y_train)" - ], + "execution_count": 229, "metadata": { - "id": "kr84EwGwHqGY", "colab": { "base_uri": "https://localhost:8080/", "height": 74 }, + "id": "kr84EwGwHqGY", "outputId": "3a0e56ab-20f9-4584-8e40-c84af8b2c593" }, - "execution_count": 229, "outputs": [ { - "output_type": "execute_result", "data": { - "text/plain": [ - "LogisticRegression()" - ], "text/html": [ "
LogisticRegression()
In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook.
On GitHub, the HTML representation is unable to render, please try loading this page with nbviewer.org.
" + ], + "text/plain": [ + "LogisticRegression()" ] }, + "execution_count": 229, "metadata": {}, - "execution_count": 229 + "output_type": "execute_result" } + ], + "source": [ + "# Training the logistic regression model with training data\n", + "model1.fit(x_train,y_train)" ] }, { "cell_type": "markdown", + "metadata": { + "id": "aPahD6MLKaPU" + }, "source": [ "Model Evaluation\n", "\n", "Accuracy Score" - ], - "metadata": { - "id": "aPahD6MLKaPU" - } + ] }, { "cell_type": "markdown", - "source": [], "metadata": { "id": "FE92kQzZHIIl" - } + }, + "source": [] }, { "cell_type": "code", + "execution_count": 230, + "metadata": { + "id": "NHy61zdJKDR1" + }, + "outputs": [], "source": [ "#accuracy on training data\n", "x_train_prediction = model1.predict(x_train)\n", "training_data_accuracy = accuracy_score(x_train_prediction, y_train)" - ], - "metadata": { - "id": "NHy61zdJKDR1" - }, - "execution_count": 230, - "outputs": [] + ] }, { "cell_type": "code", - "source": [ - "print('Accuracy on Training data:',training_data_accuracy)" - ], + "execution_count": 231, "metadata": { - "id": "J4XiNRwXLCXf", "colab": { "base_uri": "https://localhost:8080/" }, + "id": "J4XiNRwXLCXf", "outputId": "ffd6a7b0-f978-4e5f-e677-7dcf55ac39a1" }, - "execution_count": 231, "outputs": [ { - "output_type": "stream", "name": "stdout", + "output_type": "stream", "text": [ "Accuracy on Training data: 0.6417974322396577\n" ] } + ], + "source": [ + "print('Accuracy on Training data:',training_data_accuracy)" ] }, { "cell_type": "code", + "execution_count": 232, + "metadata": { + "id": "ehbFgWjhLK44" + }, + "outputs": [], "source": [ "#accuracy on test data\n", "x_test_prediction=model1.predict(x_test)\n", "testing_data_accuracy=accuracy_score(x_test_prediction,y_test)" - ], - "metadata": { - "id": "ehbFgWjhLK44" - }, - "execution_count": 232, - "outputs": [] + ] }, { "cell_type": "code", - "source": [ - "print('Accuracy on Testing data:',testing_data_accuracy)" - ], + "execution_count": 233, "metadata": { - "id": "jYZIcbiVLs0G", "colab": { "base_uri": "https://localhost:8080/" }, + "id": "jYZIcbiVLs0G", "outputId": "72bb19ec-73e3-437e-a6fb-e055fe2d31d0" }, - "execution_count": 233, "outputs": [ { - "output_type": "stream", "name": "stdout", + "output_type": "stream", "text": [ "Accuracy on Testing data: 0.6417569880205363\n" ] } + ], + "source": [ + "print('Accuracy on Testing data:',testing_data_accuracy)" ] }, { "cell_type": "markdown", - "source": [ - "BUILDING PREDICTING SYSTEM" - ], "metadata": { "id": "rec6Gz8vMP_G" - } + }, + "source": [ + "BUILDING PREDICTING SYSTEM" + ] }, { "cell_type": "code", - "source": [ - "input_data=(1,67,208,72,0,0,1,0,0,0,0,31.251233, 286,0,0,6,0,0)\n", - "# change the input data into numpy array\n", - "input_data_as_numpy_array=np.asarray(input_data)\n", - "#reshape the numpy array as we are predicting for only on instance\n", - "input_data_reshaped =input_data_as_numpy_array.reshape(1,-1)\n", - "prediction=model1.predict(input_data_reshaped)\n", - "print(prediction)\n", - "if (prediction[0]==0):\n", - " print(\"The person does not have heart disease\")\n", - "else:\n", - " print(\"the person has heart disease\")\n", - "\n" - ], + "execution_count": 234, "metadata": { - "id": "Ky2mzQUgL9IU", "colab": { "base_uri": "https://localhost:8080/" }, + "id": "Ky2mzQUgL9IU", "outputId": "72053f1d-55ac-4927-f8d8-1659265bbc5f" }, - "execution_count": 234, "outputs": [ { - "output_type": "stream", "name": "stdout", + "output_type": "stream", "text": [ "[0]\n", "The person does not have heart disease\n" ] }, { - "output_type": "stream", "name": "stderr", + "output_type": "stream", "text": [ "/usr/local/lib/python3.10/dist-packages/sklearn/base.py:439: UserWarning: X does not have valid feature names, but LogisticRegression was fitted with feature names\n", " warnings.warn(\n" ] } + ], + "source": [ + "input_data=(1,67,208,72,0,0,1,0,0,0,0,31.251233, 286,0,0,6,0,0)\n", + "# change the input data into numpy array\n", + "input_data_as_numpy_array=np.asarray(input_data)\n", + "#reshape the numpy array as we are predicting for only on instance\n", + "input_data_reshaped =input_data_as_numpy_array.reshape(1,-1)\n", + "prediction=model1.predict(input_data_reshaped)\n", + "print(prediction)\n", + "if (prediction[0]==0):\n", + " print(\"The person does not have heart disease\")\n", + "else:\n", + " print(\"the person has heart disease\")\n", + "\n" ] } - ] -} \ No newline at end of file + ], + "metadata": { + "colab": { + "provenance": [] + }, + "kernelspec": { + "display_name": "Python 3", + "name": "python3" + }, + "language_info": { + "name": "python" + } + }, + "nbformat": 4, + "nbformat_minor": 0 +} From b837d199a908bd872571838cb5807b1606723eb0 Mon Sep 17 00:00:00 2001 From: Sudiksha Thatipelli <163149118+Sudiksha18@users.noreply.github.com> Date: Mon, 13 May 2024 10:58:47 +0530 Subject: [PATCH 10/12] Created using Colab --- Copy_of_Heart_Disease_Prediction.ipynb | 1598 ++++++++++++++++++++++++ 1 file changed, 1598 insertions(+) create mode 100644 Copy_of_Heart_Disease_Prediction.ipynb diff --git a/Copy_of_Heart_Disease_Prediction.ipynb b/Copy_of_Heart_Disease_Prediction.ipynb new file mode 100644 index 00000000..2220c52e --- /dev/null +++ b/Copy_of_Heart_Disease_Prediction.ipynb @@ -0,0 +1,1598 @@ +{ + "nbformat": 4, + "nbformat_minor": 0, + "metadata": { + "colab": { + "provenance": [], + "authorship_tag": "ABX9TyObVXmU5pb8i7Cea2P9aquf", + "include_colab_link": true + }, + "kernelspec": { + "name": "python3", + "display_name": "Python 3" + }, + "language_info": { + "name": "python" + } + }, + "cells": [ + { + "cell_type": "markdown", + "metadata": { + "id": "view-in-github", + "colab_type": "text" + }, + "source": [ + "\"Open" + ] + }, + { + "cell_type": "markdown", + "source": [ + "Importing the Dependencies\n" + ], + "metadata": { + "id": "Cj2SOXgaZt-Q" + } + }, + { + "cell_type": "code", + "source": [ + "# @title\n", + "import numpy as np\n", + "import pandas as pd\n", + "from sklearn.model_selection import train_test_split\n", + "from sklearn.linear_model import LogisticRegression\n", + "from sklearn.metrics import accuracy_score" + ], + "metadata": { + "id": "k850UGz1Z03B" + }, + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "markdown", + "source": [ + "Data Collection and Processing\n" + ], + "metadata": { + "id": "APYsimt8bDoD" + } + }, + { + "cell_type": "code", + "source": [ + "#loading the csv data to a Pandas DataFrame\n", + "heart_data= pd.read_csv('/content/heart.csv')" + ], + "metadata": { + "id": "RJg3aA91Z0-u" + }, + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "code", + "source": [ + "#print first 5 rows of the datase\n", + "heart_data.head()\n" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 394 + }, + "id": "BnoQ8u4hdZ8Z", + "outputId": "34f7ab52-db6f-4fd8-ad46-3317765efd4d" + }, + "execution_count": null, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + " Patient ID Age Sex Cholesterol Blood Pressure Heart Rate Diabetes \\\n", + "0 BMW7812 67 Male 208 158/88 72 0 \n", + "1 CZE1114 21 Male 389 165/93 98 1 \n", + "2 BNI9906 21 Female 324 174/99 72 1 \n", + "3 JLN3497 84 Male 383 163/100 73 1 \n", + "4 GFO8847 66 Male 318 91/88 93 1 \n", + "\n", + " Family History Smoking Obesity ... Sedentary Hours Per Day Income \\\n", + "0 0 1 0 ... 6.615001 261404 \n", + "1 1 1 1 ... 4.963459 285768 \n", + "2 0 0 0 ... 9.463426 235282 \n", + "3 1 1 0 ... 7.648981 125640 \n", + "4 1 1 1 ... 1.514821 160555 \n", + "\n", + " BMI Triglycerides Physical Activity Days Per Week \\\n", + "0 31.251233 286 0 \n", + "1 27.194973 235 1 \n", + "2 28.176571 587 4 \n", + "3 36.464704 378 3 \n", + "4 21.809144 231 1 \n", + "\n", + " Sleep Hours Per Day Country Continent Hemisphere \\\n", + "0 6 Argentina South America Southern Hemisphere \n", + "1 7 Canada North America Northern Hemisphere \n", + "2 4 France Europe Northern Hemisphere \n", + "3 4 Canada North America Northern Hemisphere \n", + "4 5 Thailand Asia Northern Hemisphere \n", + "\n", + " Heart Attack Risk \n", + "0 0 \n", + "1 0 \n", + "2 0 \n", + "3 0 \n", + "4 0 \n", + "\n", + "[5 rows x 26 columns]" + ], + "text/html": [ + "\n", + "
\n", + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
Patient IDAgeSexCholesterolBlood PressureHeart RateDiabetesFamily HistorySmokingObesity...Sedentary Hours Per DayIncomeBMITriglyceridesPhysical Activity Days Per WeekSleep Hours Per DayCountryContinentHemisphereHeart Attack Risk
0BMW781267Male208158/88720010...6.61500126140431.25123328606ArgentinaSouth AmericaSouthern Hemisphere0
1CZE111421Male389165/93981111...4.96345928576827.19497323517CanadaNorth AmericaNorthern Hemisphere0
2BNI990621Female324174/99721000...9.46342623528228.17657158744FranceEuropeNorthern Hemisphere0
3JLN349784Male383163/100731110...7.64898112564036.46470437834CanadaNorth AmericaNorthern Hemisphere0
4GFO884766Male31891/88931111...1.51482116055521.80914423115ThailandAsiaNorthern Hemisphere0
\n", + "

5 rows × 26 columns

\n", + "
\n", + "
\n", + "\n", + "
\n", + " \n", + "\n", + " \n", + "\n", + " \n", + "
\n", + "\n", + "\n", + "
\n", + " \n", + "\n", + "\n", + "\n", + " \n", + "
\n", + "\n", + "
\n", + "
\n" + ], + "application/vnd.google.colaboratory.intrinsic+json": { + "type": "dataframe", + "variable_name": "heart_data" + } + }, + "metadata": {}, + "execution_count": 238 + } + ] + }, + { + "cell_type": "code", + "source": [ + "heart_data.tail()\n" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 394 + }, + "id": "NQwDjwwGeBF4", + "outputId": "5e223775-21bd-476f-9877-d798115cfba1" + }, + "execution_count": null, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + " Patient ID Age Sex Cholesterol Blood Pressure Heart Rate \\\n", + "8758 MSV9918 60 Male 121 94/76 61 \n", + "8759 QSV6764 28 Female 120 157/102 73 \n", + "8760 XKA5925 47 Male 250 161/75 105 \n", + "8761 EPE6801 36 Male 178 119/67 60 \n", + "8762 ZWN9666 25 Female 356 138/67 75 \n", + "\n", + " Diabetes Family History Smoking Obesity ... \\\n", + "8758 1 1 1 0 ... \n", + "8759 1 0 0 1 ... \n", + "8760 0 1 1 1 ... \n", + "8761 1 0 1 0 ... \n", + "8762 1 1 0 0 ... \n", + "\n", + " Sedentary Hours Per Day Income BMI Triglycerides \\\n", + "8758 10.806373 235420 19.655895 67 \n", + "8759 3.833038 217881 23.993866 617 \n", + "8760 2.375214 36998 35.406146 527 \n", + "8761 0.029104 209943 27.294020 114 \n", + "8762 9.005234 247338 32.914151 180 \n", + "\n", + " Physical Activity Days Per Week Sleep Hours Per Day Country \\\n", + "8758 7 7 Thailand \n", + "8759 4 9 Canada \n", + "8760 4 4 Brazil \n", + "8761 2 8 Brazil \n", + "8762 7 4 United Kingdom \n", + "\n", + " Continent Hemisphere Heart Attack Risk \n", + "8758 Asia Northern Hemisphere 0 \n", + "8759 North America Northern Hemisphere 0 \n", + "8760 South America Southern Hemisphere 1 \n", + "8761 South America Southern Hemisphere 0 \n", + "8762 Europe Northern Hemisphere 1 \n", + "\n", + "[5 rows x 26 columns]" + ], + "text/html": [ + "\n", + "
\n", + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
Patient IDAgeSexCholesterolBlood PressureHeart RateDiabetesFamily HistorySmokingObesity...Sedentary Hours Per DayIncomeBMITriglyceridesPhysical Activity Days Per WeekSleep Hours Per DayCountryContinentHemisphereHeart Attack Risk
8758MSV991860Male12194/76611110...10.80637323542019.6558956777ThailandAsiaNorthern Hemisphere0
8759QSV676428Female120157/102731001...3.83303821788123.99386661749CanadaNorth AmericaNorthern Hemisphere0
8760XKA592547Male250161/751050111...2.3752143699835.40614652744BrazilSouth AmericaSouthern Hemisphere1
8761EPE680136Male178119/67601010...0.02910420994327.29402011428BrazilSouth AmericaSouthern Hemisphere0
8762ZWN966625Female356138/67751100...9.00523424733832.91415118074United KingdomEuropeNorthern Hemisphere1
\n", + "

5 rows × 26 columns

\n", + "
\n", + "
\n", + "\n", + "
\n", + " \n", + "\n", + " \n", + "\n", + " \n", + "
\n", + "\n", + "\n", + "
\n", + " \n", + "\n", + "\n", + "\n", + " \n", + "
\n", + "\n", + "
\n", + "
\n" + ], + "application/vnd.google.colaboratory.intrinsic+json": { + "type": "dataframe" + } + }, + "metadata": {}, + "execution_count": 239 + } + ] + }, + { + "cell_type": "code", + "source": [ + "# number of rows and columns in the dataset\n", + "heart_data.shape\n", + "\n" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "Ye8LlTQVeHs1", + "outputId": "dfa1c74f-1d0b-41e3-ab38-be04e322cc3f" + }, + "execution_count": null, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "(8763, 26)" + ] + }, + "metadata": {}, + "execution_count": 240 + } + ] + }, + { + "cell_type": "code", + "source": [ + "# getting some info about the data\n", + "heart_data.info()" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "5YZLMOwFeXF3", + "outputId": "49505b06-cf79-4cd9-d596-1c770ee19201" + }, + "execution_count": null, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "\n", + "RangeIndex: 8763 entries, 0 to 8762\n", + "Data columns (total 26 columns):\n", + " # Column Non-Null Count Dtype \n", + "--- ------ -------------- ----- \n", + " 0 Patient ID 8763 non-null object \n", + " 1 Age 8763 non-null int64 \n", + " 2 Sex 8763 non-null object \n", + " 3 Cholesterol 8763 non-null int64 \n", + " 4 Blood Pressure 8763 non-null object \n", + " 5 Heart Rate 8763 non-null int64 \n", + " 6 Diabetes 8763 non-null int64 \n", + " 7 Family History 8763 non-null int64 \n", + " 8 Smoking 8763 non-null int64 \n", + " 9 Obesity 8763 non-null int64 \n", + " 10 Alcohol Consumption 8763 non-null int64 \n", + " 11 Exercise Hours Per Week 8763 non-null float64\n", + " 12 Diet 8763 non-null object \n", + " 13 Previous Heart Problems 8763 non-null int64 \n", + " 14 Medication Use 8763 non-null int64 \n", + " 15 Stress Level 8763 non-null int64 \n", + " 16 Sedentary Hours Per Day 8763 non-null float64\n", + " 17 Income 8763 non-null int64 \n", + " 18 BMI 8763 non-null float64\n", + " 19 Triglycerides 8763 non-null int64 \n", + " 20 Physical Activity Days Per Week 8763 non-null int64 \n", + " 21 Sleep Hours Per Day 8763 non-null int64 \n", + " 22 Country 8763 non-null object \n", + " 23 Continent 8763 non-null object \n", + " 24 Hemisphere 8763 non-null object \n", + " 25 Heart Attack Risk 8763 non-null int64 \n", + "dtypes: float64(3), int64(16), object(7)\n", + "memory usage: 1.7+ MB\n" + ] + } + ] + }, + { + "cell_type": "code", + "source": [ + "#checking for missing values\n", + "heart_data.isnull().sum()" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "QHazm2rze6Oj", + "outputId": "6279a525-3c1f-488a-9c74-03e70fb60e2b" + }, + "execution_count": null, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "Patient ID 0\n", + "Age 0\n", + "Sex 0\n", + "Cholesterol 0\n", + "Blood Pressure 0\n", + "Heart Rate 0\n", + "Diabetes 0\n", + "Family History 0\n", + "Smoking 0\n", + "Obesity 0\n", + "Alcohol Consumption 0\n", + "Exercise Hours Per Week 0\n", + "Diet 0\n", + "Previous Heart Problems 0\n", + "Medication Use 0\n", + "Stress Level 0\n", + "Sedentary Hours Per Day 0\n", + "Income 0\n", + "BMI 0\n", + "Triglycerides 0\n", + "Physical Activity Days Per Week 0\n", + "Sleep Hours Per Day 0\n", + "Country 0\n", + "Continent 0\n", + "Hemisphere 0\n", + "Heart Attack Risk 0\n", + "dtype: int64" + ] + }, + "metadata": {}, + "execution_count": 242 + } + ] + }, + { + "cell_type": "code", + "source": [ + "# statistical measures about the data\n", + "z=heart_data.describe()" + ], + "metadata": { + "id": "nt15bvuYfBcA" + }, + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "code", + "source": [ + "# checking the distribution of Target Variable\n", + "heart_data['Heart Attack Risk'].value_counts()" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "NCbxYqqNf2-4", + "outputId": "3c17e4f1-7b2c-4250-df19-5ee460d97462" + }, + "execution_count": null, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "Heart Attack Risk\n", + "0 5624\n", + "1 3139\n", + "Name: count, dtype: int64" + ] + }, + "metadata": {}, + "execution_count": 244 + } + ] + }, + { + "cell_type": "markdown", + "source": [], + "metadata": { + "id": "DvvKtsuILgK1" + } + }, + { + "cell_type": "code", + "source": [ + "heart_data_num = heart_data.select_dtypes(include=[np.float32,np.float64,np.int64])" + ], + "metadata": { + "id": "mfiZ3MDvIiaV" + }, + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "markdown", + "source": [ + "1-->Defective heart\n", + "\n", + "0-->Healthy heart\n", + "\n", + "\n" + ], + "metadata": { + "id": "qWNMUL5_CrfC" + } + }, + { + "cell_type": "code", + "source": [ + "x=heart_data_num.drop(columns='Heart Attack Risk', axis=1)\n", + "y=heart_data_num['Heart Attack Risk']\n" + ], + "metadata": { + "id": "oSgKSF5-DGVk" + }, + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "code", + "source": [ + "print(x)" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "zhiIhyMxDhWF", + "outputId": "89ec1b1c-7e9a-41e3-c70d-0705c6be5550" + }, + "execution_count": null, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + " Age Cholesterol Heart Rate Diabetes Family History Smoking \\\n", + "0 67 208 72 0 0 1 \n", + "1 21 389 98 1 1 1 \n", + "2 21 324 72 1 0 0 \n", + "3 84 383 73 1 1 1 \n", + "4 66 318 93 1 1 1 \n", + "... ... ... ... ... ... ... \n", + "8758 60 121 61 1 1 1 \n", + "8759 28 120 73 1 0 0 \n", + "8760 47 250 105 0 1 1 \n", + "8761 36 178 60 1 0 1 \n", + "8762 25 356 75 1 1 0 \n", + "\n", + " Obesity Alcohol Consumption Exercise Hours Per Week \\\n", + "0 0 0 4.168189 \n", + "1 1 1 1.813242 \n", + "2 0 0 2.078353 \n", + "3 0 1 9.828130 \n", + "4 1 0 5.804299 \n", + "... ... ... ... \n", + "8758 0 1 7.917342 \n", + "8759 1 0 16.558426 \n", + "8760 1 1 3.148438 \n", + "8761 0 0 3.789950 \n", + "8762 0 1 18.081748 \n", + "\n", + " Previous Heart Problems Medication Use Stress Level \\\n", + "0 0 0 9 \n", + "1 1 0 1 \n", + "2 1 1 9 \n", + "3 1 0 9 \n", + "4 1 0 6 \n", + "... ... ... ... \n", + "8758 1 1 8 \n", + "8759 0 0 8 \n", + "8760 1 0 5 \n", + "8761 1 1 5 \n", + "8762 0 0 8 \n", + "\n", + " Sedentary Hours Per Day Income BMI Triglycerides \\\n", + "0 6.615001 261404 31.251233 286 \n", + "1 4.963459 285768 27.194973 235 \n", + "2 9.463426 235282 28.176571 587 \n", + "3 7.648981 125640 36.464704 378 \n", + "4 1.514821 160555 21.809144 231 \n", + "... ... ... ... ... \n", + "8758 10.806373 235420 19.655895 67 \n", + "8759 3.833038 217881 23.993866 617 \n", + "8760 2.375214 36998 35.406146 527 \n", + "8761 0.029104 209943 27.294020 114 \n", + "8762 9.005234 247338 32.914151 180 \n", + "\n", + " Physical Activity Days Per Week Sleep Hours Per Day \n", + "0 0 6 \n", + "1 1 7 \n", + "2 4 4 \n", + "3 3 4 \n", + "4 1 5 \n", + "... ... ... \n", + "8758 7 7 \n", + "8759 4 9 \n", + "8760 4 4 \n", + "8761 2 8 \n", + "8762 7 4 \n", + "\n", + "[8763 rows x 18 columns]\n" + ] + } + ] + }, + { + "cell_type": "code", + "source": [ + "print(y)" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "VpOvdXWgHWmI", + "outputId": "0fe8d553-03e9-43ae-d516-a53b3d4882f6" + }, + "execution_count": null, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "0 0\n", + "1 0\n", + "2 0\n", + "3 0\n", + "4 0\n", + " ..\n", + "8758 0\n", + "8759 0\n", + "8760 1\n", + "8761 0\n", + "8762 1\n", + "Name: Heart Attack Risk, Length: 8763, dtype: int64\n" + ] + } + ] + }, + { + "cell_type": "markdown", + "source": [ + "Splitting data into Training data" + ], + "metadata": { + "id": "5A20XMHYII3T" + } + }, + { + "cell_type": "code", + "source": [ + "\n", + "x_train,x_test,y_train,y_test=train_test_split(x,y,test_size=0.3,stratify=y,random_state=3)" + ], + "metadata": { + "id": "SNK4hm8DIPSm" + }, + "execution_count": null, + "outputs": [] + }, + { + "source": [ + "# Check the number of samples in x and y\n", + "print(f\"Number of samples in x: {len(x)}\")\n", + "print(f\"Number of samples in y: {len(y)}\")\n", + "\n", + "# If the number of samples is different, raise an error\n", + "if len(x) != len(y):\n", + " raise ValueError(\"Input arrays have different number of samples.\")\n", + "\n", + "# Proceed with train_test_split\n", + "x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.2, stratify=y, random_state=3)" + ], + "cell_type": "code", + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "GyZ6mljEHuVk", + "outputId": "04a4ce50-fb5c-40c4-ff62-df8d9421d9bc" + }, + "execution_count": null, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Number of samples in x: 8763\n", + "Number of samples in y: 8763\n" + ] + } + ] + }, + { + "cell_type": "code", + "source": [ + "print(x.shape,x_train.shape,x_test.shape)" + ], + "metadata": { + "id": "7OTKtdA-JLCV", + "colab": { + "base_uri": "https://localhost:8080/" + }, + "outputId": "2076607f-b25d-4c24-87ea-ed78fd8fa16a" + }, + "execution_count": null, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "(8763, 18) (7010, 18) (1753, 18)\n" + ] + } + ] + }, + { + "cell_type": "markdown", + "source": [ + "MODEL TRAINING" + ], + "metadata": { + "id": "ne2RibQaJdNe" + } + }, + { + "cell_type": "markdown", + "source": [ + "LOGISTIC REGRESSION" + ], + "metadata": { + "id": "AUEblGtLJlzD" + } + }, + { + "cell_type": "code", + "source": [], + "metadata": { + "id": "OVQDdrIpHm8P" + }, + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "code", + "source": [ + "model1=LogisticRegression()" + ], + "metadata": { + "id": "w0CnNIPkHnTT" + }, + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "code", + "source": [ + "# Training the logistic regression model with training data\n", + "model1.fit(x_train,y_train)" + ], + "metadata": { + "id": "kr84EwGwHqGY", + "colab": { + "base_uri": "https://localhost:8080/", + "height": 74 + }, + "outputId": "b95bdf48-6838-4805-9236-d18a5c8604cc" + }, + "execution_count": null, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "LogisticRegression()" + ], + "text/html": [ + "
LogisticRegression()
In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook.
On GitHub, the HTML representation is unable to render, please try loading this page with nbviewer.org.
" + ] + }, + "metadata": {}, + "execution_count": 253 + } + ] + }, + { + "cell_type": "markdown", + "source": [ + "Model Evaluation\n", + "\n", + "Accuracy Score" + ], + "metadata": { + "id": "aPahD6MLKaPU" + } + }, + { + "cell_type": "markdown", + "source": [], + "metadata": { + "id": "FE92kQzZHIIl" + } + }, + { + "cell_type": "code", + "source": [ + "#accuracy on training data\n", + "x_train_prediction = model1.predict(x_train)\n", + "training_data_accuracy = accuracy_score(x_train_prediction, y_train)" + ], + "metadata": { + "id": "NHy61zdJKDR1" + }, + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "code", + "source": [ + "print('Accuracy on Training data:',training_data_accuracy)" + ], + "metadata": { + "id": "J4XiNRwXLCXf", + "colab": { + "base_uri": "https://localhost:8080/" + }, + "outputId": "5cfed6e8-1d32-4813-8855-729dc9b94737" + }, + "execution_count": null, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Accuracy on Training data: 0.6417974322396577\n" + ] + } + ] + }, + { + "cell_type": "code", + "source": [ + "#accuracy on test data\n", + "x_test_prediction=model1.predict(x_test)\n", + "testing_data_accuracy=accuracy_score(x_test_prediction,y_test)" + ], + "metadata": { + "id": "ehbFgWjhLK44" + }, + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "code", + "source": [ + "print('Accuracy on Testing data:',testing_data_accuracy)" + ], + "metadata": { + "id": "jYZIcbiVLs0G", + "colab": { + "base_uri": "https://localhost:8080/" + }, + "outputId": "9915891c-189f-490a-f3b5-b73ad1793fb3" + }, + "execution_count": null, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Accuracy on Testing data: 0.6417569880205363\n" + ] + } + ] + }, + { + "cell_type": "markdown", + "source": [ + "BUILDING PREDICTING SYSTEM" + ], + "metadata": { + "id": "rec6Gz8vMP_G" + } + }, + { + "cell_type": "code", + "source": [ + "input_data=(1,67,208,72,0,0,1,0,0,0,0,31.251233, 286,0,0,6,0,0)\n", + "# change the input data into numpy array\n", + "input_data_as_numpy_array=np.asarray(input_data)\n", + "#reshape the numpy array as we are predicting for only on instance\n", + "input_data_reshaped =input_data_as_numpy_array.reshape(1,-1)\n", + "prediction=model1.predict(input_data_reshaped)\n", + "print(prediction)\n", + "if (prediction[0]==0):\n", + " print(\"The person does not have heart disease\")\n", + "else:\n", + " print(\"the person has heart disease\")\n", + "\n" + ], + "metadata": { + "id": "Ky2mzQUgL9IU", + "colab": { + "base_uri": "https://localhost:8080/" + }, + "outputId": "68bd337e-c1ad-4926-b4de-bfdd74639b01" + }, + "execution_count": null, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "[0]\n", + "The person does not have heart disease\n" + ] + }, + { + "output_type": "stream", + "name": "stderr", + "text": [ + "/usr/local/lib/python3.10/dist-packages/sklearn/base.py:439: UserWarning: X does not have valid feature names, but LogisticRegression was fitted with feature names\n", + " warnings.warn(\n" + ] + } + ] + } + ] +} \ No newline at end of file From 754ddef94e466ce2afd010c137f7addfe9268462 Mon Sep 17 00:00:00 2001 From: Sudiksha Thatipelli <163149118+Sudiksha18@users.noreply.github.com> Date: Mon, 13 May 2024 13:37:04 +0530 Subject: [PATCH 11/12] Delete Copy_of_Heart_Disease_Prediction.ipynb --- Copy_of_Heart_Disease_Prediction.ipynb | 1598 ------------------------ 1 file changed, 1598 deletions(-) delete mode 100644 Copy_of_Heart_Disease_Prediction.ipynb diff --git a/Copy_of_Heart_Disease_Prediction.ipynb b/Copy_of_Heart_Disease_Prediction.ipynb deleted file mode 100644 index 2220c52e..00000000 --- a/Copy_of_Heart_Disease_Prediction.ipynb +++ /dev/null @@ -1,1598 +0,0 @@ -{ - "nbformat": 4, - "nbformat_minor": 0, - "metadata": { - "colab": { - "provenance": [], - "authorship_tag": "ABX9TyObVXmU5pb8i7Cea2P9aquf", - "include_colab_link": true - }, - "kernelspec": { - "name": "python3", - "display_name": "Python 3" - }, - "language_info": { - "name": "python" - } - }, - "cells": [ - { - "cell_type": "markdown", - "metadata": { - "id": "view-in-github", - "colab_type": "text" - }, - "source": [ - "\"Open" - ] - }, - { - "cell_type": "markdown", - "source": [ - "Importing the Dependencies\n" - ], - "metadata": { - "id": "Cj2SOXgaZt-Q" - } - }, - { - "cell_type": "code", - "source": [ - "# @title\n", - "import numpy as np\n", - "import pandas as pd\n", - "from sklearn.model_selection import train_test_split\n", - "from sklearn.linear_model import LogisticRegression\n", - "from sklearn.metrics import accuracy_score" - ], - "metadata": { - "id": "k850UGz1Z03B" - }, - "execution_count": null, - "outputs": [] - }, - { - "cell_type": "markdown", - "source": [ - "Data Collection and Processing\n" - ], - "metadata": { - "id": "APYsimt8bDoD" - } - }, - { - "cell_type": "code", - "source": [ - "#loading the csv data to a Pandas DataFrame\n", - "heart_data= pd.read_csv('/content/heart.csv')" - ], - "metadata": { - "id": "RJg3aA91Z0-u" - }, - "execution_count": null, - "outputs": [] - }, - { - "cell_type": "code", - "source": [ - "#print first 5 rows of the datase\n", - "heart_data.head()\n" - ], - "metadata": { - "colab": { - "base_uri": "https://localhost:8080/", - "height": 394 - }, - "id": "BnoQ8u4hdZ8Z", - "outputId": "34f7ab52-db6f-4fd8-ad46-3317765efd4d" - }, - "execution_count": null, - "outputs": [ - { - "output_type": "execute_result", - "data": { - "text/plain": [ - " Patient ID Age Sex Cholesterol Blood Pressure Heart Rate Diabetes \\\n", - "0 BMW7812 67 Male 208 158/88 72 0 \n", - "1 CZE1114 21 Male 389 165/93 98 1 \n", - "2 BNI9906 21 Female 324 174/99 72 1 \n", - "3 JLN3497 84 Male 383 163/100 73 1 \n", - "4 GFO8847 66 Male 318 91/88 93 1 \n", - "\n", - " Family History Smoking Obesity ... Sedentary Hours Per Day Income \\\n", - "0 0 1 0 ... 6.615001 261404 \n", - "1 1 1 1 ... 4.963459 285768 \n", - "2 0 0 0 ... 9.463426 235282 \n", - "3 1 1 0 ... 7.648981 125640 \n", - "4 1 1 1 ... 1.514821 160555 \n", - "\n", - " BMI Triglycerides Physical Activity Days Per Week \\\n", - "0 31.251233 286 0 \n", - "1 27.194973 235 1 \n", - "2 28.176571 587 4 \n", - "3 36.464704 378 3 \n", - "4 21.809144 231 1 \n", - "\n", - " Sleep Hours Per Day Country Continent Hemisphere \\\n", - "0 6 Argentina South America Southern Hemisphere \n", - "1 7 Canada North America Northern Hemisphere \n", - "2 4 France Europe Northern Hemisphere \n", - "3 4 Canada North America Northern Hemisphere \n", - "4 5 Thailand Asia Northern Hemisphere \n", - "\n", - " Heart Attack Risk \n", - "0 0 \n", - "1 0 \n", - "2 0 \n", - "3 0 \n", - "4 0 \n", - "\n", - "[5 rows x 26 columns]" - ], - "text/html": [ - "\n", - "
\n", - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
Patient IDAgeSexCholesterolBlood PressureHeart RateDiabetesFamily HistorySmokingObesity...Sedentary Hours Per DayIncomeBMITriglyceridesPhysical Activity Days Per WeekSleep Hours Per DayCountryContinentHemisphereHeart Attack Risk
0BMW781267Male208158/88720010...6.61500126140431.25123328606ArgentinaSouth AmericaSouthern Hemisphere0
1CZE111421Male389165/93981111...4.96345928576827.19497323517CanadaNorth AmericaNorthern Hemisphere0
2BNI990621Female324174/99721000...9.46342623528228.17657158744FranceEuropeNorthern Hemisphere0
3JLN349784Male383163/100731110...7.64898112564036.46470437834CanadaNorth AmericaNorthern Hemisphere0
4GFO884766Male31891/88931111...1.51482116055521.80914423115ThailandAsiaNorthern Hemisphere0
\n", - "

5 rows × 26 columns

\n", - "
\n", - "
\n", - "\n", - "
\n", - " \n", - "\n", - " \n", - "\n", - " \n", - "
\n", - "\n", - "\n", - "
\n", - " \n", - "\n", - "\n", - "\n", - " \n", - "
\n", - "\n", - "
\n", - "
\n" - ], - "application/vnd.google.colaboratory.intrinsic+json": { - "type": "dataframe", - "variable_name": "heart_data" - } - }, - "metadata": {}, - "execution_count": 238 - } - ] - }, - { - "cell_type": "code", - "source": [ - "heart_data.tail()\n" - ], - "metadata": { - "colab": { - "base_uri": "https://localhost:8080/", - "height": 394 - }, - "id": "NQwDjwwGeBF4", - "outputId": "5e223775-21bd-476f-9877-d798115cfba1" - }, - "execution_count": null, - "outputs": [ - { - "output_type": "execute_result", - "data": { - "text/plain": [ - " Patient ID Age Sex Cholesterol Blood Pressure Heart Rate \\\n", - "8758 MSV9918 60 Male 121 94/76 61 \n", - "8759 QSV6764 28 Female 120 157/102 73 \n", - "8760 XKA5925 47 Male 250 161/75 105 \n", - "8761 EPE6801 36 Male 178 119/67 60 \n", - "8762 ZWN9666 25 Female 356 138/67 75 \n", - "\n", - " Diabetes Family History Smoking Obesity ... \\\n", - "8758 1 1 1 0 ... \n", - "8759 1 0 0 1 ... \n", - "8760 0 1 1 1 ... \n", - "8761 1 0 1 0 ... \n", - "8762 1 1 0 0 ... \n", - "\n", - " Sedentary Hours Per Day Income BMI Triglycerides \\\n", - "8758 10.806373 235420 19.655895 67 \n", - "8759 3.833038 217881 23.993866 617 \n", - "8760 2.375214 36998 35.406146 527 \n", - "8761 0.029104 209943 27.294020 114 \n", - "8762 9.005234 247338 32.914151 180 \n", - "\n", - " Physical Activity Days Per Week Sleep Hours Per Day Country \\\n", - "8758 7 7 Thailand \n", - "8759 4 9 Canada \n", - "8760 4 4 Brazil \n", - "8761 2 8 Brazil \n", - "8762 7 4 United Kingdom \n", - "\n", - " Continent Hemisphere Heart Attack Risk \n", - "8758 Asia Northern Hemisphere 0 \n", - "8759 North America Northern Hemisphere 0 \n", - "8760 South America Southern Hemisphere 1 \n", - "8761 South America Southern Hemisphere 0 \n", - "8762 Europe Northern Hemisphere 1 \n", - "\n", - "[5 rows x 26 columns]" - ], - "text/html": [ - "\n", - "
\n", - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
Patient IDAgeSexCholesterolBlood PressureHeart RateDiabetesFamily HistorySmokingObesity...Sedentary Hours Per DayIncomeBMITriglyceridesPhysical Activity Days Per WeekSleep Hours Per DayCountryContinentHemisphereHeart Attack Risk
8758MSV991860Male12194/76611110...10.80637323542019.6558956777ThailandAsiaNorthern Hemisphere0
8759QSV676428Female120157/102731001...3.83303821788123.99386661749CanadaNorth AmericaNorthern Hemisphere0
8760XKA592547Male250161/751050111...2.3752143699835.40614652744BrazilSouth AmericaSouthern Hemisphere1
8761EPE680136Male178119/67601010...0.02910420994327.29402011428BrazilSouth AmericaSouthern Hemisphere0
8762ZWN966625Female356138/67751100...9.00523424733832.91415118074United KingdomEuropeNorthern Hemisphere1
\n", - "

5 rows × 26 columns

\n", - "
\n", - "
\n", - "\n", - "
\n", - " \n", - "\n", - " \n", - "\n", - " \n", - "
\n", - "\n", - "\n", - "
\n", - " \n", - "\n", - "\n", - "\n", - " \n", - "
\n", - "\n", - "
\n", - "
\n" - ], - "application/vnd.google.colaboratory.intrinsic+json": { - "type": "dataframe" - } - }, - "metadata": {}, - "execution_count": 239 - } - ] - }, - { - "cell_type": "code", - "source": [ - "# number of rows and columns in the dataset\n", - "heart_data.shape\n", - "\n" - ], - "metadata": { - "colab": { - "base_uri": "https://localhost:8080/" - }, - "id": "Ye8LlTQVeHs1", - "outputId": "dfa1c74f-1d0b-41e3-ab38-be04e322cc3f" - }, - "execution_count": null, - "outputs": [ - { - "output_type": "execute_result", - "data": { - "text/plain": [ - "(8763, 26)" - ] - }, - "metadata": {}, - "execution_count": 240 - } - ] - }, - { - "cell_type": "code", - "source": [ - "# getting some info about the data\n", - "heart_data.info()" - ], - "metadata": { - "colab": { - "base_uri": "https://localhost:8080/" - }, - "id": "5YZLMOwFeXF3", - "outputId": "49505b06-cf79-4cd9-d596-1c770ee19201" - }, - "execution_count": null, - "outputs": [ - { - "output_type": "stream", - "name": "stdout", - "text": [ - "\n", - "RangeIndex: 8763 entries, 0 to 8762\n", - "Data columns (total 26 columns):\n", - " # Column Non-Null Count Dtype \n", - "--- ------ -------------- ----- \n", - " 0 Patient ID 8763 non-null object \n", - " 1 Age 8763 non-null int64 \n", - " 2 Sex 8763 non-null object \n", - " 3 Cholesterol 8763 non-null int64 \n", - " 4 Blood Pressure 8763 non-null object \n", - " 5 Heart Rate 8763 non-null int64 \n", - " 6 Diabetes 8763 non-null int64 \n", - " 7 Family History 8763 non-null int64 \n", - " 8 Smoking 8763 non-null int64 \n", - " 9 Obesity 8763 non-null int64 \n", - " 10 Alcohol Consumption 8763 non-null int64 \n", - " 11 Exercise Hours Per Week 8763 non-null float64\n", - " 12 Diet 8763 non-null object \n", - " 13 Previous Heart Problems 8763 non-null int64 \n", - " 14 Medication Use 8763 non-null int64 \n", - " 15 Stress Level 8763 non-null int64 \n", - " 16 Sedentary Hours Per Day 8763 non-null float64\n", - " 17 Income 8763 non-null int64 \n", - " 18 BMI 8763 non-null float64\n", - " 19 Triglycerides 8763 non-null int64 \n", - " 20 Physical Activity Days Per Week 8763 non-null int64 \n", - " 21 Sleep Hours Per Day 8763 non-null int64 \n", - " 22 Country 8763 non-null object \n", - " 23 Continent 8763 non-null object \n", - " 24 Hemisphere 8763 non-null object \n", - " 25 Heart Attack Risk 8763 non-null int64 \n", - "dtypes: float64(3), int64(16), object(7)\n", - "memory usage: 1.7+ MB\n" - ] - } - ] - }, - { - "cell_type": "code", - "source": [ - "#checking for missing values\n", - "heart_data.isnull().sum()" - ], - "metadata": { - "colab": { - "base_uri": "https://localhost:8080/" - }, - "id": "QHazm2rze6Oj", - "outputId": "6279a525-3c1f-488a-9c74-03e70fb60e2b" - }, - "execution_count": null, - "outputs": [ - { - "output_type": "execute_result", - "data": { - "text/plain": [ - "Patient ID 0\n", - "Age 0\n", - "Sex 0\n", - "Cholesterol 0\n", - "Blood Pressure 0\n", - "Heart Rate 0\n", - "Diabetes 0\n", - "Family History 0\n", - "Smoking 0\n", - "Obesity 0\n", - "Alcohol Consumption 0\n", - "Exercise Hours Per Week 0\n", - "Diet 0\n", - "Previous Heart Problems 0\n", - "Medication Use 0\n", - "Stress Level 0\n", - "Sedentary Hours Per Day 0\n", - "Income 0\n", - "BMI 0\n", - "Triglycerides 0\n", - "Physical Activity Days Per Week 0\n", - "Sleep Hours Per Day 0\n", - "Country 0\n", - "Continent 0\n", - "Hemisphere 0\n", - "Heart Attack Risk 0\n", - "dtype: int64" - ] - }, - "metadata": {}, - "execution_count": 242 - } - ] - }, - { - "cell_type": "code", - "source": [ - "# statistical measures about the data\n", - "z=heart_data.describe()" - ], - "metadata": { - "id": "nt15bvuYfBcA" - }, - "execution_count": null, - "outputs": [] - }, - { - "cell_type": "code", - "source": [ - "# checking the distribution of Target Variable\n", - "heart_data['Heart Attack Risk'].value_counts()" - ], - "metadata": { - "colab": { - "base_uri": "https://localhost:8080/" - }, - "id": "NCbxYqqNf2-4", - "outputId": "3c17e4f1-7b2c-4250-df19-5ee460d97462" - }, - "execution_count": null, - "outputs": [ - { - "output_type": "execute_result", - "data": { - "text/plain": [ - "Heart Attack Risk\n", - "0 5624\n", - "1 3139\n", - "Name: count, dtype: int64" - ] - }, - "metadata": {}, - "execution_count": 244 - } - ] - }, - { - "cell_type": "markdown", - "source": [], - "metadata": { - "id": "DvvKtsuILgK1" - } - }, - { - "cell_type": "code", - "source": [ - "heart_data_num = heart_data.select_dtypes(include=[np.float32,np.float64,np.int64])" - ], - "metadata": { - "id": "mfiZ3MDvIiaV" - }, - "execution_count": null, - "outputs": [] - }, - { - "cell_type": "markdown", - "source": [ - "1-->Defective heart\n", - "\n", - "0-->Healthy heart\n", - "\n", - "\n" - ], - "metadata": { - "id": "qWNMUL5_CrfC" - } - }, - { - "cell_type": "code", - "source": [ - "x=heart_data_num.drop(columns='Heart Attack Risk', axis=1)\n", - "y=heart_data_num['Heart Attack Risk']\n" - ], - "metadata": { - "id": "oSgKSF5-DGVk" - }, - "execution_count": null, - "outputs": [] - }, - { - "cell_type": "code", - "source": [ - "print(x)" - ], - "metadata": { - "colab": { - "base_uri": "https://localhost:8080/" - }, - "id": "zhiIhyMxDhWF", - "outputId": "89ec1b1c-7e9a-41e3-c70d-0705c6be5550" - }, - "execution_count": null, - "outputs": [ - { - "output_type": "stream", - "name": "stdout", - "text": [ - " Age Cholesterol Heart Rate Diabetes Family History Smoking \\\n", - "0 67 208 72 0 0 1 \n", - "1 21 389 98 1 1 1 \n", - "2 21 324 72 1 0 0 \n", - "3 84 383 73 1 1 1 \n", - "4 66 318 93 1 1 1 \n", - "... ... ... ... ... ... ... \n", - "8758 60 121 61 1 1 1 \n", - "8759 28 120 73 1 0 0 \n", - "8760 47 250 105 0 1 1 \n", - "8761 36 178 60 1 0 1 \n", - "8762 25 356 75 1 1 0 \n", - "\n", - " Obesity Alcohol Consumption Exercise Hours Per Week \\\n", - "0 0 0 4.168189 \n", - "1 1 1 1.813242 \n", - "2 0 0 2.078353 \n", - "3 0 1 9.828130 \n", - "4 1 0 5.804299 \n", - "... ... ... ... \n", - "8758 0 1 7.917342 \n", - "8759 1 0 16.558426 \n", - "8760 1 1 3.148438 \n", - "8761 0 0 3.789950 \n", - "8762 0 1 18.081748 \n", - "\n", - " Previous Heart Problems Medication Use Stress Level \\\n", - "0 0 0 9 \n", - "1 1 0 1 \n", - "2 1 1 9 \n", - "3 1 0 9 \n", - "4 1 0 6 \n", - "... ... ... ... \n", - "8758 1 1 8 \n", - "8759 0 0 8 \n", - "8760 1 0 5 \n", - "8761 1 1 5 \n", - "8762 0 0 8 \n", - "\n", - " Sedentary Hours Per Day Income BMI Triglycerides \\\n", - "0 6.615001 261404 31.251233 286 \n", - "1 4.963459 285768 27.194973 235 \n", - "2 9.463426 235282 28.176571 587 \n", - "3 7.648981 125640 36.464704 378 \n", - "4 1.514821 160555 21.809144 231 \n", - "... ... ... ... ... \n", - "8758 10.806373 235420 19.655895 67 \n", - "8759 3.833038 217881 23.993866 617 \n", - "8760 2.375214 36998 35.406146 527 \n", - "8761 0.029104 209943 27.294020 114 \n", - "8762 9.005234 247338 32.914151 180 \n", - "\n", - " Physical Activity Days Per Week Sleep Hours Per Day \n", - "0 0 6 \n", - "1 1 7 \n", - "2 4 4 \n", - "3 3 4 \n", - "4 1 5 \n", - "... ... ... \n", - "8758 7 7 \n", - "8759 4 9 \n", - "8760 4 4 \n", - "8761 2 8 \n", - "8762 7 4 \n", - "\n", - "[8763 rows x 18 columns]\n" - ] - } - ] - }, - { - "cell_type": "code", - "source": [ - "print(y)" - ], - "metadata": { - "colab": { - "base_uri": "https://localhost:8080/" - }, - "id": "VpOvdXWgHWmI", - "outputId": "0fe8d553-03e9-43ae-d516-a53b3d4882f6" - }, - "execution_count": null, - "outputs": [ - { - "output_type": "stream", - "name": "stdout", - "text": [ - "0 0\n", - "1 0\n", - "2 0\n", - "3 0\n", - "4 0\n", - " ..\n", - "8758 0\n", - "8759 0\n", - "8760 1\n", - "8761 0\n", - "8762 1\n", - "Name: Heart Attack Risk, Length: 8763, dtype: int64\n" - ] - } - ] - }, - { - "cell_type": "markdown", - "source": [ - "Splitting data into Training data" - ], - "metadata": { - "id": "5A20XMHYII3T" - } - }, - { - "cell_type": "code", - "source": [ - "\n", - "x_train,x_test,y_train,y_test=train_test_split(x,y,test_size=0.3,stratify=y,random_state=3)" - ], - "metadata": { - "id": "SNK4hm8DIPSm" - }, - "execution_count": null, - "outputs": [] - }, - { - "source": [ - "# Check the number of samples in x and y\n", - "print(f\"Number of samples in x: {len(x)}\")\n", - "print(f\"Number of samples in y: {len(y)}\")\n", - "\n", - "# If the number of samples is different, raise an error\n", - "if len(x) != len(y):\n", - " raise ValueError(\"Input arrays have different number of samples.\")\n", - "\n", - "# Proceed with train_test_split\n", - "x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.2, stratify=y, random_state=3)" - ], - "cell_type": "code", - "metadata": { - "colab": { - "base_uri": "https://localhost:8080/" - }, - "id": "GyZ6mljEHuVk", - "outputId": "04a4ce50-fb5c-40c4-ff62-df8d9421d9bc" - }, - "execution_count": null, - "outputs": [ - { - "output_type": "stream", - "name": "stdout", - "text": [ - "Number of samples in x: 8763\n", - "Number of samples in y: 8763\n" - ] - } - ] - }, - { - "cell_type": "code", - "source": [ - "print(x.shape,x_train.shape,x_test.shape)" - ], - "metadata": { - "id": "7OTKtdA-JLCV", - "colab": { - "base_uri": "https://localhost:8080/" - }, - "outputId": "2076607f-b25d-4c24-87ea-ed78fd8fa16a" - }, - "execution_count": null, - "outputs": [ - { - "output_type": "stream", - "name": "stdout", - "text": [ - "(8763, 18) (7010, 18) (1753, 18)\n" - ] - } - ] - }, - { - "cell_type": "markdown", - "source": [ - "MODEL TRAINING" - ], - "metadata": { - "id": "ne2RibQaJdNe" - } - }, - { - "cell_type": "markdown", - "source": [ - "LOGISTIC REGRESSION" - ], - "metadata": { - "id": "AUEblGtLJlzD" - } - }, - { - "cell_type": "code", - "source": [], - "metadata": { - "id": "OVQDdrIpHm8P" - }, - "execution_count": null, - "outputs": [] - }, - { - "cell_type": "code", - "source": [ - "model1=LogisticRegression()" - ], - "metadata": { - "id": "w0CnNIPkHnTT" - }, - "execution_count": null, - "outputs": [] - }, - { - "cell_type": "code", - "source": [ - "# Training the logistic regression model with training data\n", - "model1.fit(x_train,y_train)" - ], - "metadata": { - "id": "kr84EwGwHqGY", - "colab": { - "base_uri": "https://localhost:8080/", - "height": 74 - }, - "outputId": "b95bdf48-6838-4805-9236-d18a5c8604cc" - }, - "execution_count": null, - "outputs": [ - { - "output_type": "execute_result", - "data": { - "text/plain": [ - "LogisticRegression()" - ], - "text/html": [ - "
LogisticRegression()
In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook.
On GitHub, the HTML representation is unable to render, please try loading this page with nbviewer.org.
" - ] - }, - "metadata": {}, - "execution_count": 253 - } - ] - }, - { - "cell_type": "markdown", - "source": [ - "Model Evaluation\n", - "\n", - "Accuracy Score" - ], - "metadata": { - "id": "aPahD6MLKaPU" - } - }, - { - "cell_type": "markdown", - "source": [], - "metadata": { - "id": "FE92kQzZHIIl" - } - }, - { - "cell_type": "code", - "source": [ - "#accuracy on training data\n", - "x_train_prediction = model1.predict(x_train)\n", - "training_data_accuracy = accuracy_score(x_train_prediction, y_train)" - ], - "metadata": { - "id": "NHy61zdJKDR1" - }, - "execution_count": null, - "outputs": [] - }, - { - "cell_type": "code", - "source": [ - "print('Accuracy on Training data:',training_data_accuracy)" - ], - "metadata": { - "id": "J4XiNRwXLCXf", - "colab": { - "base_uri": "https://localhost:8080/" - }, - "outputId": "5cfed6e8-1d32-4813-8855-729dc9b94737" - }, - "execution_count": null, - "outputs": [ - { - "output_type": "stream", - "name": "stdout", - "text": [ - "Accuracy on Training data: 0.6417974322396577\n" - ] - } - ] - }, - { - "cell_type": "code", - "source": [ - "#accuracy on test data\n", - "x_test_prediction=model1.predict(x_test)\n", - "testing_data_accuracy=accuracy_score(x_test_prediction,y_test)" - ], - "metadata": { - "id": "ehbFgWjhLK44" - }, - "execution_count": null, - "outputs": [] - }, - { - "cell_type": "code", - "source": [ - "print('Accuracy on Testing data:',testing_data_accuracy)" - ], - "metadata": { - "id": "jYZIcbiVLs0G", - "colab": { - "base_uri": "https://localhost:8080/" - }, - "outputId": "9915891c-189f-490a-f3b5-b73ad1793fb3" - }, - "execution_count": null, - "outputs": [ - { - "output_type": "stream", - "name": "stdout", - "text": [ - "Accuracy on Testing data: 0.6417569880205363\n" - ] - } - ] - }, - { - "cell_type": "markdown", - "source": [ - "BUILDING PREDICTING SYSTEM" - ], - "metadata": { - "id": "rec6Gz8vMP_G" - } - }, - { - "cell_type": "code", - "source": [ - "input_data=(1,67,208,72,0,0,1,0,0,0,0,31.251233, 286,0,0,6,0,0)\n", - "# change the input data into numpy array\n", - "input_data_as_numpy_array=np.asarray(input_data)\n", - "#reshape the numpy array as we are predicting for only on instance\n", - "input_data_reshaped =input_data_as_numpy_array.reshape(1,-1)\n", - "prediction=model1.predict(input_data_reshaped)\n", - "print(prediction)\n", - "if (prediction[0]==0):\n", - " print(\"The person does not have heart disease\")\n", - "else:\n", - " print(\"the person has heart disease\")\n", - "\n" - ], - "metadata": { - "id": "Ky2mzQUgL9IU", - "colab": { - "base_uri": "https://localhost:8080/" - }, - "outputId": "68bd337e-c1ad-4926-b4de-bfdd74639b01" - }, - "execution_count": null, - "outputs": [ - { - "output_type": "stream", - "name": "stdout", - "text": [ - "[0]\n", - "The person does not have heart disease\n" - ] - }, - { - "output_type": "stream", - "name": "stderr", - "text": [ - "/usr/local/lib/python3.10/dist-packages/sklearn/base.py:439: UserWarning: X does not have valid feature names, but LogisticRegression was fitted with feature names\n", - " warnings.warn(\n" - ] - } - ] - } - ] -} \ No newline at end of file From 4397ea603a5ed20dae25178b21cf526ae0a9e5f9 Mon Sep 17 00:00:00 2001 From: Sudiksha Thatipelli <163149118+Sudiksha18@users.noreply.github.com> Date: Mon, 13 May 2024 13:38:05 +0530 Subject: [PATCH 12/12] Delete Heart_Disease_Prediction (2).ipynb --- Heart_Disease_Prediction (2).ipynb | 1586 ---------------------------- 1 file changed, 1586 deletions(-) delete mode 100644 Heart_Disease_Prediction (2).ipynb diff --git a/Heart_Disease_Prediction (2).ipynb b/Heart_Disease_Prediction (2).ipynb deleted file mode 100644 index 314b9652..00000000 --- a/Heart_Disease_Prediction (2).ipynb +++ /dev/null @@ -1,1586 +0,0 @@ -{ - "cells": [ - { - "cell_type": "markdown", - "metadata": { - "id": "Cj2SOXgaZt-Q" - }, - "source": [ - "Importing the Dependencies\n" - ] - }, - { - "cell_type": "code", - "execution_count": 212, - "metadata": { - "id": "k850UGz1Z03B" - }, - "outputs": [], - "source": [ - "# @title\n", - "import numpy as np\n", - "import pandas as pd\n", - "from sklearn.model_selection import train_test_split\n", - "from sklearn.linear_model import LogisticRegression\n", - "from sklearn.metrics import accuracy_score" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "APYsimt8bDoD" - }, - "source": [ - "Data Collection and Processing\n" - ] - }, - { - "cell_type": "code", - "execution_count": 213, - "metadata": { - "id": "RJg3aA91Z0-u" - }, - "outputs": [], - "source": [ - "#loading the csv data to a Pandas DataFrame\n", - "heart_data= pd.read_csv('/content/heart.csv')" - ] - }, - { - "cell_type": "code", - "execution_count": 214, - "metadata": { - "colab": { - "base_uri": "https://localhost:8080/", - "height": 394 - }, - "id": "BnoQ8u4hdZ8Z", - "outputId": "452a033e-92bd-4b1e-b754-93d896d0c0a7" - }, - "outputs": [ - { - "data": { - "application/vnd.google.colaboratory.intrinsic+json": { - "type": "dataframe", - "variable_name": "heart_data" - }, - "text/html": [ - "\n", - "
\n", - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
Patient IDAgeSexCholesterolBlood PressureHeart RateDiabetesFamily HistorySmokingObesity...Sedentary Hours Per DayIncomeBMITriglyceridesPhysical Activity Days Per WeekSleep Hours Per DayCountryContinentHemisphereHeart Attack Risk
0BMW781267Male208158/88720010...6.61500126140431.25123328606ArgentinaSouth AmericaSouthern Hemisphere0
1CZE111421Male389165/93981111...4.96345928576827.19497323517CanadaNorth AmericaNorthern Hemisphere0
2BNI990621Female324174/99721000...9.46342623528228.17657158744FranceEuropeNorthern Hemisphere0
3JLN349784Male383163/100731110...7.64898112564036.46470437834CanadaNorth AmericaNorthern Hemisphere0
4GFO884766Male31891/88931111...1.51482116055521.80914423115ThailandAsiaNorthern Hemisphere0
\n", - "

5 rows × 26 columns

\n", - "
\n", - "
\n", - "\n", - "
\n", - " \n", - "\n", - " \n", - "\n", - " \n", - "
\n", - "\n", - "\n", - "
\n", - " \n", - "\n", - "\n", - "\n", - " \n", - "
\n", - "\n", - "
\n", - "
\n" - ], - "text/plain": [ - " Patient ID Age Sex Cholesterol Blood Pressure Heart Rate Diabetes \\\n", - "0 BMW7812 67 Male 208 158/88 72 0 \n", - "1 CZE1114 21 Male 389 165/93 98 1 \n", - "2 BNI9906 21 Female 324 174/99 72 1 \n", - "3 JLN3497 84 Male 383 163/100 73 1 \n", - "4 GFO8847 66 Male 318 91/88 93 1 \n", - "\n", - " Family History Smoking Obesity ... Sedentary Hours Per Day Income \\\n", - "0 0 1 0 ... 6.615001 261404 \n", - "1 1 1 1 ... 4.963459 285768 \n", - "2 0 0 0 ... 9.463426 235282 \n", - "3 1 1 0 ... 7.648981 125640 \n", - "4 1 1 1 ... 1.514821 160555 \n", - "\n", - " BMI Triglycerides Physical Activity Days Per Week \\\n", - "0 31.251233 286 0 \n", - "1 27.194973 235 1 \n", - "2 28.176571 587 4 \n", - "3 36.464704 378 3 \n", - "4 21.809144 231 1 \n", - "\n", - " Sleep Hours Per Day Country Continent Hemisphere \\\n", - "0 6 Argentina South America Southern Hemisphere \n", - "1 7 Canada North America Northern Hemisphere \n", - "2 4 France Europe Northern Hemisphere \n", - "3 4 Canada North America Northern Hemisphere \n", - "4 5 Thailand Asia Northern Hemisphere \n", - "\n", - " Heart Attack Risk \n", - "0 0 \n", - "1 0 \n", - "2 0 \n", - "3 0 \n", - "4 0 \n", - "\n", - "[5 rows x 26 columns]" - ] - }, - "execution_count": 214, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "#print first 5 rows of the datase\n", - "heart_data.head()\n" - ] - }, - { - "cell_type": "code", - "execution_count": 215, - "metadata": { - "colab": { - "base_uri": "https://localhost:8080/", - "height": 394 - }, - "id": "NQwDjwwGeBF4", - "outputId": "60d69d34-5c6e-4975-c633-13cc786065f6" - }, - "outputs": [ - { - "data": { - "application/vnd.google.colaboratory.intrinsic+json": { - "type": "dataframe" - }, - "text/html": [ - "\n", - "
\n", - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
Patient IDAgeSexCholesterolBlood PressureHeart RateDiabetesFamily HistorySmokingObesity...Sedentary Hours Per DayIncomeBMITriglyceridesPhysical Activity Days Per WeekSleep Hours Per DayCountryContinentHemisphereHeart Attack Risk
8758MSV991860Male12194/76611110...10.80637323542019.6558956777ThailandAsiaNorthern Hemisphere0
8759QSV676428Female120157/102731001...3.83303821788123.99386661749CanadaNorth AmericaNorthern Hemisphere0
8760XKA592547Male250161/751050111...2.3752143699835.40614652744BrazilSouth AmericaSouthern Hemisphere1
8761EPE680136Male178119/67601010...0.02910420994327.29402011428BrazilSouth AmericaSouthern Hemisphere0
8762ZWN966625Female356138/67751100...9.00523424733832.91415118074United KingdomEuropeNorthern Hemisphere1
\n", - "

5 rows × 26 columns

\n", - "
\n", - "
\n", - "\n", - "
\n", - " \n", - "\n", - " \n", - "\n", - " \n", - "
\n", - "\n", - "\n", - "
\n", - " \n", - "\n", - "\n", - "\n", - " \n", - "
\n", - "\n", - "
\n", - "
\n" - ], - "text/plain": [ - " Patient ID Age Sex Cholesterol Blood Pressure Heart Rate \\\n", - "8758 MSV9918 60 Male 121 94/76 61 \n", - "8759 QSV6764 28 Female 120 157/102 73 \n", - "8760 XKA5925 47 Male 250 161/75 105 \n", - "8761 EPE6801 36 Male 178 119/67 60 \n", - "8762 ZWN9666 25 Female 356 138/67 75 \n", - "\n", - " Diabetes Family History Smoking Obesity ... \\\n", - "8758 1 1 1 0 ... \n", - "8759 1 0 0 1 ... \n", - "8760 0 1 1 1 ... \n", - "8761 1 0 1 0 ... \n", - "8762 1 1 0 0 ... \n", - "\n", - " Sedentary Hours Per Day Income BMI Triglycerides \\\n", - "8758 10.806373 235420 19.655895 67 \n", - "8759 3.833038 217881 23.993866 617 \n", - "8760 2.375214 36998 35.406146 527 \n", - "8761 0.029104 209943 27.294020 114 \n", - "8762 9.005234 247338 32.914151 180 \n", - "\n", - " Physical Activity Days Per Week Sleep Hours Per Day Country \\\n", - "8758 7 7 Thailand \n", - "8759 4 9 Canada \n", - "8760 4 4 Brazil \n", - "8761 2 8 Brazil \n", - "8762 7 4 United Kingdom \n", - "\n", - " Continent Hemisphere Heart Attack Risk \n", - "8758 Asia Northern Hemisphere 0 \n", - "8759 North America Northern Hemisphere 0 \n", - "8760 South America Southern Hemisphere 1 \n", - "8761 South America Southern Hemisphere 0 \n", - "8762 Europe Northern Hemisphere 1 \n", - "\n", - "[5 rows x 26 columns]" - ] - }, - "execution_count": 215, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "heart_data.tail()\n" - ] - }, - { - "cell_type": "code", - "execution_count": 216, - "metadata": { - "colab": { - "base_uri": "https://localhost:8080/" - }, - "id": "Ye8LlTQVeHs1", - "outputId": "e0fb1303-1000-45df-8cbf-49a3322f42b1" - }, - "outputs": [ - { - "data": { - "text/plain": [ - "(8763, 26)" - ] - }, - "execution_count": 216, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "# number of rows and columns in the dataset\n", - "heart_data.shape\n", - "\n" - ] - }, - { - "cell_type": "code", - "execution_count": 217, - "metadata": { - "colab": { - "base_uri": "https://localhost:8080/" - }, - "id": "5YZLMOwFeXF3", - "outputId": "4a322f74-c893-45a0-e4e2-cca32e4d0bd3" - }, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\n", - "RangeIndex: 8763 entries, 0 to 8762\n", - "Data columns (total 26 columns):\n", - " # Column Non-Null Count Dtype \n", - "--- ------ -------------- ----- \n", - " 0 Patient ID 8763 non-null object \n", - " 1 Age 8763 non-null int64 \n", - " 2 Sex 8763 non-null object \n", - " 3 Cholesterol 8763 non-null int64 \n", - " 4 Blood Pressure 8763 non-null object \n", - " 5 Heart Rate 8763 non-null int64 \n", - " 6 Diabetes 8763 non-null int64 \n", - " 7 Family History 8763 non-null int64 \n", - " 8 Smoking 8763 non-null int64 \n", - " 9 Obesity 8763 non-null int64 \n", - " 10 Alcohol Consumption 8763 non-null int64 \n", - " 11 Exercise Hours Per Week 8763 non-null float64\n", - " 12 Diet 8763 non-null object \n", - " 13 Previous Heart Problems 8763 non-null int64 \n", - " 14 Medication Use 8763 non-null int64 \n", - " 15 Stress Level 8763 non-null int64 \n", - " 16 Sedentary Hours Per Day 8763 non-null float64\n", - " 17 Income 8763 non-null int64 \n", - " 18 BMI 8763 non-null float64\n", - " 19 Triglycerides 8763 non-null int64 \n", - " 20 Physical Activity Days Per Week 8763 non-null int64 \n", - " 21 Sleep Hours Per Day 8763 non-null int64 \n", - " 22 Country 8763 non-null object \n", - " 23 Continent 8763 non-null object \n", - " 24 Hemisphere 8763 non-null object \n", - " 25 Heart Attack Risk 8763 non-null int64 \n", - "dtypes: float64(3), int64(16), object(7)\n", - "memory usage: 1.7+ MB\n" - ] - } - ], - "source": [ - "# getting some info about the data\n", - "heart_data.info()" - ] - }, - { - "cell_type": "code", - "execution_count": 218, - "metadata": { - "colab": { - "base_uri": "https://localhost:8080/" - }, - "id": "QHazm2rze6Oj", - "outputId": "1601115a-a97a-4aa7-b0ca-1d94d09d11fa" - }, - "outputs": [ - { - "data": { - "text/plain": [ - "Patient ID 0\n", - "Age 0\n", - "Sex 0\n", - "Cholesterol 0\n", - "Blood Pressure 0\n", - "Heart Rate 0\n", - "Diabetes 0\n", - "Family History 0\n", - "Smoking 0\n", - "Obesity 0\n", - "Alcohol Consumption 0\n", - "Exercise Hours Per Week 0\n", - "Diet 0\n", - "Previous Heart Problems 0\n", - "Medication Use 0\n", - "Stress Level 0\n", - "Sedentary Hours Per Day 0\n", - "Income 0\n", - "BMI 0\n", - "Triglycerides 0\n", - "Physical Activity Days Per Week 0\n", - "Sleep Hours Per Day 0\n", - "Country 0\n", - "Continent 0\n", - "Hemisphere 0\n", - "Heart Attack Risk 0\n", - "dtype: int64" - ] - }, - "execution_count": 218, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "#checking for missing values\n", - "heart_data.isnull().sum()" - ] - }, - { - "cell_type": "code", - "execution_count": 219, - "metadata": { - "id": "nt15bvuYfBcA" - }, - "outputs": [], - "source": [ - "# statistical measures about the data\n", - "z=heart_data.describe()" - ] - }, - { - "cell_type": "code", - "execution_count": 220, - "metadata": { - "colab": { - "base_uri": "https://localhost:8080/" - }, - "id": "NCbxYqqNf2-4", - "outputId": "1e38c06b-606b-4509-8e29-2d249daaf4d0" - }, - "outputs": [ - { - "data": { - "text/plain": [ - "Heart Attack Risk\n", - "0 5624\n", - "1 3139\n", - "Name: count, dtype: int64" - ] - }, - "execution_count": 220, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "# checking the distribution of Target Variable\n", - "heart_data['Heart Attack Risk'].value_counts()" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "DvvKtsuILgK1" - }, - "source": [] - }, - { - "cell_type": "code", - "execution_count": 221, - "metadata": { - "id": "mfiZ3MDvIiaV" - }, - "outputs": [], - "source": [ - "heart_data_num = heart_data.select_dtypes(include=[np.float32,np.float64,np.int64])" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "qWNMUL5_CrfC" - }, - "source": [ - "1-->Defective heart\n", - "\n", - "0-->Healthy heart\n", - "\n", - "\n" - ] - }, - { - "cell_type": "code", - "execution_count": 222, - "metadata": { - "id": "oSgKSF5-DGVk" - }, - "outputs": [], - "source": [ - "x=heart_data_num.drop(columns='Heart Attack Risk', axis=1)\n", - "y=heart_data_num['Heart Attack Risk']\n" - ] - }, - { - "cell_type": "code", - "execution_count": 223, - "metadata": { - "colab": { - "base_uri": "https://localhost:8080/" - }, - "id": "zhiIhyMxDhWF", - "outputId": "5684dc44-c814-4d81-e438-ecc544010d10" - }, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - " Age Cholesterol Heart Rate Diabetes Family History Smoking \\\n", - "0 67 208 72 0 0 1 \n", - "1 21 389 98 1 1 1 \n", - "2 21 324 72 1 0 0 \n", - "3 84 383 73 1 1 1 \n", - "4 66 318 93 1 1 1 \n", - "... ... ... ... ... ... ... \n", - "8758 60 121 61 1 1 1 \n", - "8759 28 120 73 1 0 0 \n", - "8760 47 250 105 0 1 1 \n", - "8761 36 178 60 1 0 1 \n", - "8762 25 356 75 1 1 0 \n", - "\n", - " Obesity Alcohol Consumption Exercise Hours Per Week \\\n", - "0 0 0 4.168189 \n", - "1 1 1 1.813242 \n", - "2 0 0 2.078353 \n", - "3 0 1 9.828130 \n", - "4 1 0 5.804299 \n", - "... ... ... ... \n", - "8758 0 1 7.917342 \n", - "8759 1 0 16.558426 \n", - "8760 1 1 3.148438 \n", - "8761 0 0 3.789950 \n", - "8762 0 1 18.081748 \n", - "\n", - " Previous Heart Problems Medication Use Stress Level \\\n", - "0 0 0 9 \n", - "1 1 0 1 \n", - "2 1 1 9 \n", - "3 1 0 9 \n", - "4 1 0 6 \n", - "... ... ... ... \n", - "8758 1 1 8 \n", - "8759 0 0 8 \n", - "8760 1 0 5 \n", - "8761 1 1 5 \n", - "8762 0 0 8 \n", - "\n", - " Sedentary Hours Per Day Income BMI Triglycerides \\\n", - "0 6.615001 261404 31.251233 286 \n", - "1 4.963459 285768 27.194973 235 \n", - "2 9.463426 235282 28.176571 587 \n", - "3 7.648981 125640 36.464704 378 \n", - "4 1.514821 160555 21.809144 231 \n", - "... ... ... ... ... \n", - "8758 10.806373 235420 19.655895 67 \n", - "8759 3.833038 217881 23.993866 617 \n", - "8760 2.375214 36998 35.406146 527 \n", - "8761 0.029104 209943 27.294020 114 \n", - "8762 9.005234 247338 32.914151 180 \n", - "\n", - " Physical Activity Days Per Week Sleep Hours Per Day \n", - "0 0 6 \n", - "1 1 7 \n", - "2 4 4 \n", - "3 3 4 \n", - "4 1 5 \n", - "... ... ... \n", - "8758 7 7 \n", - "8759 4 9 \n", - "8760 4 4 \n", - "8761 2 8 \n", - "8762 7 4 \n", - "\n", - "[8763 rows x 18 columns]\n" - ] - } - ], - "source": [ - "print(x)" - ] - }, - { - "cell_type": "code", - "execution_count": 224, - "metadata": { - "colab": { - "base_uri": "https://localhost:8080/" - }, - "id": "VpOvdXWgHWmI", - "outputId": "8d0190fe-62af-4d69-9af1-d4ce89b52bc4" - }, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "0 0\n", - "1 0\n", - "2 0\n", - "3 0\n", - "4 0\n", - " ..\n", - "8758 0\n", - "8759 0\n", - "8760 1\n", - "8761 0\n", - "8762 1\n", - "Name: Heart Attack Risk, Length: 8763, dtype: int64\n" - ] - } - ], - "source": [ - "print(y)" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "5A20XMHYII3T" - }, - "source": [ - "Splitting data into Training data" - ] - }, - { - "cell_type": "code", - "execution_count": 225, - "metadata": { - "id": "SNK4hm8DIPSm" - }, - "outputs": [], - "source": [ - "\n", - "x_train,x_test,y_train,y_test=train_test_split(x,y,test_size=0.3,stratify=y,random_state=3)" - ] - }, - { - "cell_type": "code", - "execution_count": 226, - "metadata": { - "colab": { - "base_uri": "https://localhost:8080/" - }, - "id": "GyZ6mljEHuVk", - "outputId": "c7c7bb9e-ed30-466a-ea57-88f43a409f0f" - }, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Number of samples in x: 8763\n", - "Number of samples in y: 8763\n" - ] - } - ], - "source": [ - "# Check the number of samples in x and y\n", - "print(f\"Number of samples in x: {len(x)}\")\n", - "print(f\"Number of samples in y: {len(y)}\")\n", - "\n", - "# If the number of samples is different, raise an error\n", - "if len(x) != len(y):\n", - " raise ValueError(\"Input arrays have different number of samples.\")\n", - "\n", - "# Proceed with train_test_split\n", - "x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.2, stratify=y, random_state=3)" - ] - }, - { - "cell_type": "code", - "execution_count": 227, - "metadata": { - "colab": { - "base_uri": "https://localhost:8080/" - }, - "id": "7OTKtdA-JLCV", - "outputId": "a9616ddf-7f61-4f3b-fbe0-6d81ea326287" - }, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "(8763, 18) (7010, 18) (1753, 18)\n" - ] - } - ], - "source": [ - "print(x.shape,x_train.shape,x_test.shape)" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "ne2RibQaJdNe" - }, - "source": [ - "MODEL TRAINING" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "AUEblGtLJlzD" - }, - "source": [ - "LOGISTIC REGRESSION" - ] - }, - { - "cell_type": "code", - "execution_count": 227, - "metadata": { - "id": "OVQDdrIpHm8P" - }, - "outputs": [], - "source": [] - }, - { - "cell_type": "code", - "execution_count": 228, - "metadata": { - "id": "w0CnNIPkHnTT" - }, - "outputs": [], - "source": [ - "model1=LogisticRegression()" - ] - }, - { - "cell_type": "code", - "execution_count": 229, - "metadata": { - "colab": { - "base_uri": "https://localhost:8080/", - "height": 74 - }, - "id": "kr84EwGwHqGY", - "outputId": "3a0e56ab-20f9-4584-8e40-c84af8b2c593" - }, - "outputs": [ - { - "data": { - "text/html": [ - "
LogisticRegression()
In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook.
On GitHub, the HTML representation is unable to render, please try loading this page with nbviewer.org.
" - ], - "text/plain": [ - "LogisticRegression()" - ] - }, - "execution_count": 229, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "# Training the logistic regression model with training data\n", - "model1.fit(x_train,y_train)" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "aPahD6MLKaPU" - }, - "source": [ - "Model Evaluation\n", - "\n", - "Accuracy Score" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "FE92kQzZHIIl" - }, - "source": [] - }, - { - "cell_type": "code", - "execution_count": 230, - "metadata": { - "id": "NHy61zdJKDR1" - }, - "outputs": [], - "source": [ - "#accuracy on training data\n", - "x_train_prediction = model1.predict(x_train)\n", - "training_data_accuracy = accuracy_score(x_train_prediction, y_train)" - ] - }, - { - "cell_type": "code", - "execution_count": 231, - "metadata": { - "colab": { - "base_uri": "https://localhost:8080/" - }, - "id": "J4XiNRwXLCXf", - "outputId": "ffd6a7b0-f978-4e5f-e677-7dcf55ac39a1" - }, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Accuracy on Training data: 0.6417974322396577\n" - ] - } - ], - "source": [ - "print('Accuracy on Training data:',training_data_accuracy)" - ] - }, - { - "cell_type": "code", - "execution_count": 232, - "metadata": { - "id": "ehbFgWjhLK44" - }, - "outputs": [], - "source": [ - "#accuracy on test data\n", - "x_test_prediction=model1.predict(x_test)\n", - "testing_data_accuracy=accuracy_score(x_test_prediction,y_test)" - ] - }, - { - "cell_type": "code", - "execution_count": 233, - "metadata": { - "colab": { - "base_uri": "https://localhost:8080/" - }, - "id": "jYZIcbiVLs0G", - "outputId": "72bb19ec-73e3-437e-a6fb-e055fe2d31d0" - }, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Accuracy on Testing data: 0.6417569880205363\n" - ] - } - ], - "source": [ - "print('Accuracy on Testing data:',testing_data_accuracy)" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "rec6Gz8vMP_G" - }, - "source": [ - "BUILDING PREDICTING SYSTEM" - ] - }, - { - "cell_type": "code", - "execution_count": 234, - "metadata": { - "colab": { - "base_uri": "https://localhost:8080/" - }, - "id": "Ky2mzQUgL9IU", - "outputId": "72053f1d-55ac-4927-f8d8-1659265bbc5f" - }, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "[0]\n", - "The person does not have heart disease\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "/usr/local/lib/python3.10/dist-packages/sklearn/base.py:439: UserWarning: X does not have valid feature names, but LogisticRegression was fitted with feature names\n", - " warnings.warn(\n" - ] - } - ], - "source": [ - "input_data=(1,67,208,72,0,0,1,0,0,0,0,31.251233, 286,0,0,6,0,0)\n", - "# change the input data into numpy array\n", - "input_data_as_numpy_array=np.asarray(input_data)\n", - "#reshape the numpy array as we are predicting for only on instance\n", - "input_data_reshaped =input_data_as_numpy_array.reshape(1,-1)\n", - "prediction=model1.predict(input_data_reshaped)\n", - "print(prediction)\n", - "if (prediction[0]==0):\n", - " print(\"The person does not have heart disease\")\n", - "else:\n", - " print(\"the person has heart disease\")\n", - "\n" - ] - } - ], - "metadata": { - "colab": { - "provenance": [] - }, - "kernelspec": { - "display_name": "Python 3", - "name": "python3" - }, - "language_info": { - "name": "python" - } - }, - "nbformat": 4, - "nbformat_minor": 0 -}