diff --git a/Lung Cancer Detection/Lung-cancer-detection-Exploration.ipynb b/Lung Cancer Detection/Lung-cancer-detection-Exploration.ipynb
new file mode 100644
index 00000000..1220d229
--- /dev/null
+++ b/Lung Cancer Detection/Lung-cancer-detection-Exploration.ipynb
@@ -0,0 +1,655 @@
+{
+ "cells": [
+ {
+ "cell_type": "code",
+ "execution_count": 4,
+ "id": "d5540bc1",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "import numpy as np\n",
+ "import pandas as pd\n",
+ "import matplotlib.pyplot as plt"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "01373691",
+ "metadata": {},
+ "source": [
+ "## Loading the Dataset"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 5,
+ "id": "79de3d42",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "df=pd.read_csv(\"Lung_Cancer_Dataset.csv\")"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "851ed69b",
+ "metadata": {},
+ "source": [
+ "## Features and Label"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 6,
+ "id": "9d065c97",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "
\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " GENDER | \n",
+ " AGE | \n",
+ " SMOKING | \n",
+ " YELLOW_FINGERS | \n",
+ " ANXIETY | \n",
+ " PEER_PRESSURE | \n",
+ " CHRONIC DISEASE | \n",
+ " FATIGUE | \n",
+ " ALLERGY | \n",
+ " WHEEZING | \n",
+ " ALCOHOL CONSUMING | \n",
+ " COUGHING | \n",
+ " SHORTNESS OF BREATH | \n",
+ " SWALLOWING DIFFICULTY | \n",
+ " CHEST PAIN | \n",
+ " LUNG_CANCER | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " 0 | \n",
+ " M | \n",
+ " 69 | \n",
+ " 1 | \n",
+ " 2 | \n",
+ " 2 | \n",
+ " 1 | \n",
+ " 1 | \n",
+ " 2 | \n",
+ " 1 | \n",
+ " 2 | \n",
+ " 2 | \n",
+ " 2 | \n",
+ " 2 | \n",
+ " 2 | \n",
+ " 2 | \n",
+ " YES | \n",
+ "
\n",
+ " \n",
+ " 1 | \n",
+ " M | \n",
+ " 74 | \n",
+ " 2 | \n",
+ " 1 | \n",
+ " 1 | \n",
+ " 1 | \n",
+ " 2 | \n",
+ " 2 | \n",
+ " 2 | \n",
+ " 1 | \n",
+ " 1 | \n",
+ " 1 | \n",
+ " 2 | \n",
+ " 2 | \n",
+ " 2 | \n",
+ " YES | \n",
+ "
\n",
+ " \n",
+ " 2 | \n",
+ " F | \n",
+ " 59 | \n",
+ " 1 | \n",
+ " 1 | \n",
+ " 1 | \n",
+ " 2 | \n",
+ " 1 | \n",
+ " 2 | \n",
+ " 1 | \n",
+ " 2 | \n",
+ " 1 | \n",
+ " 2 | \n",
+ " 2 | \n",
+ " 1 | \n",
+ " 2 | \n",
+ " NO | \n",
+ "
\n",
+ " \n",
+ " 3 | \n",
+ " M | \n",
+ " 63 | \n",
+ " 2 | \n",
+ " 2 | \n",
+ " 2 | \n",
+ " 1 | \n",
+ " 1 | \n",
+ " 1 | \n",
+ " 1 | \n",
+ " 1 | \n",
+ " 2 | \n",
+ " 1 | \n",
+ " 1 | \n",
+ " 2 | \n",
+ " 2 | \n",
+ " NO | \n",
+ "
\n",
+ " \n",
+ " 4 | \n",
+ " F | \n",
+ " 63 | \n",
+ " 1 | \n",
+ " 2 | \n",
+ " 1 | \n",
+ " 1 | \n",
+ " 1 | \n",
+ " 1 | \n",
+ " 1 | \n",
+ " 2 | \n",
+ " 1 | \n",
+ " 2 | \n",
+ " 2 | \n",
+ " 1 | \n",
+ " 1 | \n",
+ " NO | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ " GENDER AGE SMOKING YELLOW_FINGERS ANXIETY PEER_PRESSURE \\\n",
+ "0 M 69 1 2 2 1 \n",
+ "1 M 74 2 1 1 1 \n",
+ "2 F 59 1 1 1 2 \n",
+ "3 M 63 2 2 2 1 \n",
+ "4 F 63 1 2 1 1 \n",
+ "\n",
+ " CHRONIC DISEASE FATIGUE ALLERGY WHEEZING ALCOHOL CONSUMING COUGHING \\\n",
+ "0 1 2 1 2 2 2 \n",
+ "1 2 2 2 1 1 1 \n",
+ "2 1 2 1 2 1 2 \n",
+ "3 1 1 1 1 2 1 \n",
+ "4 1 1 1 2 1 2 \n",
+ "\n",
+ " SHORTNESS OF BREATH SWALLOWING DIFFICULTY CHEST PAIN LUNG_CANCER \n",
+ "0 2 2 2 YES \n",
+ "1 2 2 2 YES \n",
+ "2 2 1 2 NO \n",
+ "3 1 2 2 NO \n",
+ "4 2 1 1 NO "
+ ]
+ },
+ "execution_count": 6,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "df.head(5)"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "4a8a9caf",
+ "metadata": {},
+ "source": [
+ "#### Features :\n",
+ "GENDER, AGE, SMOKING, YELLOW_FINGERS, ANXIETY, PEER_PRESSURE, CHRONIC_DISEASE, FATIGUE, ALLERGY, WHEEZING, ALCOHOL_CONSUMING, COUGHING, SHORTNESS_OF_BREATH, SWALLOWING_DIFFICULTY, CHEST_PAIN\n",
+ "\n",
+ "#### label :\n",
+ "LUNG_CANCER"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "a564ab24",
+ "metadata": {},
+ "source": [
+ "## Shape of the dataset"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 7,
+ "id": "8b5d6d26",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "(309, 16)"
+ ]
+ },
+ "execution_count": 7,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "df.shape"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "f247e019",
+ "metadata": {},
+ "source": [
+ "### The dataset contains 309 rows and 16 columns(or 15 features and a label)"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "94767c78",
+ "metadata": {},
+ "source": [
+ "## Data Type and Memory Usage"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 8,
+ "id": "53064761",
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "\n",
+ "RangeIndex: 309 entries, 0 to 308\n",
+ "Data columns (total 16 columns):\n",
+ " # Column Non-Null Count Dtype \n",
+ "--- ------ -------------- ----- \n",
+ " 0 GENDER 309 non-null object\n",
+ " 1 AGE 309 non-null int64 \n",
+ " 2 SMOKING 309 non-null int64 \n",
+ " 3 YELLOW_FINGERS 309 non-null int64 \n",
+ " 4 ANXIETY 309 non-null int64 \n",
+ " 5 PEER_PRESSURE 309 non-null int64 \n",
+ " 6 CHRONIC DISEASE 309 non-null int64 \n",
+ " 7 FATIGUE 309 non-null int64 \n",
+ " 8 ALLERGY 309 non-null int64 \n",
+ " 9 WHEEZING 309 non-null int64 \n",
+ " 10 ALCOHOL CONSUMING 309 non-null int64 \n",
+ " 11 COUGHING 309 non-null int64 \n",
+ " 12 SHORTNESS OF BREATH 309 non-null int64 \n",
+ " 13 SWALLOWING DIFFICULTY 309 non-null int64 \n",
+ " 14 CHEST PAIN 309 non-null int64 \n",
+ " 15 LUNG_CANCER 309 non-null object\n",
+ "dtypes: int64(14), object(2)\n",
+ "memory usage: 38.8+ KB\n"
+ ]
+ }
+ ],
+ "source": [
+ "df.info()"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "f598a82d",
+ "metadata": {},
+ "source": [
+ "### One of the features is of \"Object\" data type, rest other features are of \"int64\" data type.\n",
+ "### Label is of \"Object\" data type.\n",
+ "### Memory usage : approximately 38.8 KB"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "b989c833",
+ "metadata": {},
+ "source": [
+ "## Statistical Analysis"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 9,
+ "id": "0616237a",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " AGE | \n",
+ " SMOKING | \n",
+ " YELLOW_FINGERS | \n",
+ " ANXIETY | \n",
+ " PEER_PRESSURE | \n",
+ " CHRONIC DISEASE | \n",
+ " FATIGUE | \n",
+ " ALLERGY | \n",
+ " WHEEZING | \n",
+ " ALCOHOL CONSUMING | \n",
+ " COUGHING | \n",
+ " SHORTNESS OF BREATH | \n",
+ " SWALLOWING DIFFICULTY | \n",
+ " CHEST PAIN | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " count | \n",
+ " 309.000000 | \n",
+ " 309.000000 | \n",
+ " 309.000000 | \n",
+ " 309.000000 | \n",
+ " 309.000000 | \n",
+ " 309.000000 | \n",
+ " 309.000000 | \n",
+ " 309.000000 | \n",
+ " 309.000000 | \n",
+ " 309.000000 | \n",
+ " 309.000000 | \n",
+ " 309.000000 | \n",
+ " 309.000000 | \n",
+ " 309.000000 | \n",
+ "
\n",
+ " \n",
+ " mean | \n",
+ " 62.673139 | \n",
+ " 1.563107 | \n",
+ " 1.569579 | \n",
+ " 1.498382 | \n",
+ " 1.501618 | \n",
+ " 1.504854 | \n",
+ " 1.673139 | \n",
+ " 1.556634 | \n",
+ " 1.556634 | \n",
+ " 1.556634 | \n",
+ " 1.579288 | \n",
+ " 1.640777 | \n",
+ " 1.469256 | \n",
+ " 1.556634 | \n",
+ "
\n",
+ " \n",
+ " std | \n",
+ " 8.210301 | \n",
+ " 0.496806 | \n",
+ " 0.495938 | \n",
+ " 0.500808 | \n",
+ " 0.500808 | \n",
+ " 0.500787 | \n",
+ " 0.469827 | \n",
+ " 0.497588 | \n",
+ " 0.497588 | \n",
+ " 0.497588 | \n",
+ " 0.494474 | \n",
+ " 0.480551 | \n",
+ " 0.499863 | \n",
+ " 0.497588 | \n",
+ "
\n",
+ " \n",
+ " min | \n",
+ " 21.000000 | \n",
+ " 1.000000 | \n",
+ " 1.000000 | \n",
+ " 1.000000 | \n",
+ " 1.000000 | \n",
+ " 1.000000 | \n",
+ " 1.000000 | \n",
+ " 1.000000 | \n",
+ " 1.000000 | \n",
+ " 1.000000 | \n",
+ " 1.000000 | \n",
+ " 1.000000 | \n",
+ " 1.000000 | \n",
+ " 1.000000 | \n",
+ "
\n",
+ " \n",
+ " 25% | \n",
+ " 57.000000 | \n",
+ " 1.000000 | \n",
+ " 1.000000 | \n",
+ " 1.000000 | \n",
+ " 1.000000 | \n",
+ " 1.000000 | \n",
+ " 1.000000 | \n",
+ " 1.000000 | \n",
+ " 1.000000 | \n",
+ " 1.000000 | \n",
+ " 1.000000 | \n",
+ " 1.000000 | \n",
+ " 1.000000 | \n",
+ " 1.000000 | \n",
+ "
\n",
+ " \n",
+ " 50% | \n",
+ " 62.000000 | \n",
+ " 2.000000 | \n",
+ " 2.000000 | \n",
+ " 1.000000 | \n",
+ " 2.000000 | \n",
+ " 2.000000 | \n",
+ " 2.000000 | \n",
+ " 2.000000 | \n",
+ " 2.000000 | \n",
+ " 2.000000 | \n",
+ " 2.000000 | \n",
+ " 2.000000 | \n",
+ " 1.000000 | \n",
+ " 2.000000 | \n",
+ "
\n",
+ " \n",
+ " 75% | \n",
+ " 69.000000 | \n",
+ " 2.000000 | \n",
+ " 2.000000 | \n",
+ " 2.000000 | \n",
+ " 2.000000 | \n",
+ " 2.000000 | \n",
+ " 2.000000 | \n",
+ " 2.000000 | \n",
+ " 2.000000 | \n",
+ " 2.000000 | \n",
+ " 2.000000 | \n",
+ " 2.000000 | \n",
+ " 2.000000 | \n",
+ " 2.000000 | \n",
+ "
\n",
+ " \n",
+ " max | \n",
+ " 87.000000 | \n",
+ " 2.000000 | \n",
+ " 2.000000 | \n",
+ " 2.000000 | \n",
+ " 2.000000 | \n",
+ " 2.000000 | \n",
+ " 2.000000 | \n",
+ " 2.000000 | \n",
+ " 2.000000 | \n",
+ " 2.000000 | \n",
+ " 2.000000 | \n",
+ " 2.000000 | \n",
+ " 2.000000 | \n",
+ " 2.000000 | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ " AGE SMOKING YELLOW_FINGERS ANXIETY PEER_PRESSURE \\\n",
+ "count 309.000000 309.000000 309.000000 309.000000 309.000000 \n",
+ "mean 62.673139 1.563107 1.569579 1.498382 1.501618 \n",
+ "std 8.210301 0.496806 0.495938 0.500808 0.500808 \n",
+ "min 21.000000 1.000000 1.000000 1.000000 1.000000 \n",
+ "25% 57.000000 1.000000 1.000000 1.000000 1.000000 \n",
+ "50% 62.000000 2.000000 2.000000 1.000000 2.000000 \n",
+ "75% 69.000000 2.000000 2.000000 2.000000 2.000000 \n",
+ "max 87.000000 2.000000 2.000000 2.000000 2.000000 \n",
+ "\n",
+ " CHRONIC DISEASE FATIGUE ALLERGY WHEEZING ALCOHOL CONSUMING \\\n",
+ "count 309.000000 309.000000 309.000000 309.000000 309.000000 \n",
+ "mean 1.504854 1.673139 1.556634 1.556634 1.556634 \n",
+ "std 0.500787 0.469827 0.497588 0.497588 0.497588 \n",
+ "min 1.000000 1.000000 1.000000 1.000000 1.000000 \n",
+ "25% 1.000000 1.000000 1.000000 1.000000 1.000000 \n",
+ "50% 2.000000 2.000000 2.000000 2.000000 2.000000 \n",
+ "75% 2.000000 2.000000 2.000000 2.000000 2.000000 \n",
+ "max 2.000000 2.000000 2.000000 2.000000 2.000000 \n",
+ "\n",
+ " COUGHING SHORTNESS OF BREATH SWALLOWING DIFFICULTY CHEST PAIN \n",
+ "count 309.000000 309.000000 309.000000 309.000000 \n",
+ "mean 1.579288 1.640777 1.469256 1.556634 \n",
+ "std 0.494474 0.480551 0.499863 0.497588 \n",
+ "min 1.000000 1.000000 1.000000 1.000000 \n",
+ "25% 1.000000 1.000000 1.000000 1.000000 \n",
+ "50% 2.000000 2.000000 1.000000 2.000000 \n",
+ "75% 2.000000 2.000000 2.000000 2.000000 \n",
+ "max 2.000000 2.000000 2.000000 2.000000 "
+ ]
+ },
+ "execution_count": 9,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "df.describe()"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "b4ee62a9",
+ "metadata": {},
+ "source": [
+ "### After sorting in ascending order: \n",
+ "\n",
+ "### Average age is around 62, minimum age is 21 and maximum age is 87. \n",
+ "### 25% of the patients are of 57 and lesser than 57 age. \n",
+ "### 50% of the patients are of 62 and lesser than 62 age. \n",
+ "### 75% of the patients are of 69 and lesser than 69 age. \n",
+ "### Rest other features and label are categorical data, each having two sub categories."
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "5a62df6e",
+ "metadata": {},
+ "source": [
+ "## Null values check"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 10,
+ "id": "5d592446",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "GENDER 0\n",
+ "AGE 0\n",
+ "SMOKING 0\n",
+ "YELLOW_FINGERS 0\n",
+ "ANXIETY 0\n",
+ "PEER_PRESSURE 0\n",
+ "CHRONIC DISEASE 0\n",
+ "FATIGUE 0\n",
+ "ALLERGY 0\n",
+ "WHEEZING 0\n",
+ "ALCOHOL CONSUMING 0\n",
+ "COUGHING 0\n",
+ "SHORTNESS OF BREATH 0\n",
+ "SWALLOWING DIFFICULTY 0\n",
+ "CHEST PAIN 0\n",
+ "LUNG_CANCER 0\n",
+ "dtype: int64"
+ ]
+ },
+ "execution_count": 10,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "df.isnull().sum()"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "44d3ac21",
+ "metadata": {},
+ "source": [
+ "### There are no null values in the dataset."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "c4995c1c",
+ "metadata": {},
+ "outputs": [],
+ "source": []
+ }
+ ],
+ "metadata": {
+ "kernelspec": {
+ "display_name": "Python 3 (ipykernel)",
+ "language": "python",
+ "name": "python3"
+ },
+ "language_info": {
+ "codemirror_mode": {
+ "name": "ipython",
+ "version": 3
+ },
+ "file_extension": ".py",
+ "mimetype": "text/x-python",
+ "name": "python",
+ "nbconvert_exporter": "python",
+ "pygments_lexer": "ipython3",
+ "version": "3.11.5"
+ }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 5
+}
diff --git a/Lung-cancer-detection-Exploration.ipynb b/Lung-cancer-detection-Exploration.ipynb
new file mode 100644
index 00000000..e69de29b