diff --git a/Endometrial Cancer Prediction/Endometrial Cancer Prediction Dataset.ipynb b/Endometrial Cancer Prediction/Endometrial Cancer Prediction Dataset.ipynb new file mode 100644 index 00000000..fc075d71 --- /dev/null +++ b/Endometrial Cancer Prediction/Endometrial Cancer Prediction Dataset.ipynb @@ -0,0 +1,1615 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "id": "8ea6b3f8", + "metadata": {}, + "source": [ + "# Endometrial Cancer Prediction Dataset\n", + "\n", + "This dataset contains information about endometrial cancer, also known as endometrial carcinoma, which is a type of cancer that starts in the cells of the inner lining of the uterus (the endometrium). Endometrial carcinomas can be categorized into different types based on cellular characteristics observed under a microscope." + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "id": "b9b66e04", + "metadata": {}, + "outputs": [], + "source": [ + "#importing libraries\n", + "\n", + "import numpy as np\n", + "import pandas as pd" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "id": "82c0c766", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + " | Patient ID | \n", + "Sample ID | \n", + "Cancer Type Detailed | \n", + "Overall Survival Status | \n", + "Disease Free Status | \n", + "Disease-specific Survival status | \n", + "Mutation Count | \n", + "Fraction Genome Altered | \n", + "Diagnosis Age | \n", + "MSI MANTIS Score | \n", + "MSIsensor Score | \n", + "Race Category | \n", + "Subtype | \n", + "Tumor Type | \n", + "
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | \n", + "TCGA-2E-A9G8 | \n", + "TCGA-2E-A9G8-01 | \n", + "Uterine Endometrioid Carcinoma | \n", + "0:LIVING | \n", + "NaN | \n", + "0:ALIVE OR DEAD TUMOR FREE | \n", + "65.0 | \n", + "0.3311 | \n", + "59.0 | \n", + "0.3234 | \n", + "0.85 | \n", + "Black or African American | \n", + "UCEC_CN_HIGH | \n", + "Endometrioid Endometrial Adenocarcinoma | \n", + "
1 | \n", + "TCGA-4E-A92E | \n", + "TCGA-4E-A92E-01 | \n", + "Uterine Endometrioid Carcinoma | \n", + "0:LIVING | \n", + "0:DiseaseFree | \n", + "0:ALIVE OR DEAD TUMOR FREE | \n", + "147.0 | \n", + "0.0341 | \n", + "54.0 | \n", + "0.3396 | \n", + "0.01 | \n", + "Black or African American | \n", + "UCEC_CN_LOW | \n", + "Endometrioid Endometrial Adenocarcinoma | \n", + "
2 | \n", + "TCGA-5B-A90C | \n", + "TCGA-5B-A90C-01 | \n", + "Uterine Endometrioid Carcinoma | \n", + "0:LIVING | \n", + "NaN | \n", + "0:ALIVE OR DEAD TUMOR FREE | \n", + "45.0 | \n", + "0.6903 | \n", + "69.0 | \n", + "0.3344 | \n", + "0.55 | \n", + "Black or African American | \n", + "UCEC_CN_HIGH | \n", + "Endometrioid Endometrial Adenocarcinoma | \n", + "
3 | \n", + "TCGA-5S-A9Q8 | \n", + "TCGA-5S-A9Q8-01 | \n", + "Uterine Endometrioid Carcinoma | \n", + "0:LIVING | \n", + "0:DiseaseFree | \n", + "0:ALIVE OR DEAD TUMOR FREE | \n", + "50.0 | \n", + "0.0581 | \n", + "51.0 | \n", + "0.3199 | \n", + "0.09 | \n", + "Black or African American | \n", + "UCEC_CN_LOW | \n", + "Endometrioid Endometrial Adenocarcinoma | \n", + "
4 | \n", + "TCGA-A5-A0G1 | \n", + "TCGA-A5-A0G1-01 | \n", + "Uterine Serous Carcinoma/Uterine Papillary Ser... | \n", + "1:DECEASED | \n", + "0:DiseaseFree | \n", + "0:ALIVE OR DEAD TUMOR FREE | \n", + "10808.0 | \n", + "0.0001 | \n", + "67.0 | \n", + "0.3108 | \n", + "1.74 | \n", + "White | \n", + "UCEC_POLE | \n", + "Serous Endometrial Adenocarcinoma | \n", + "
... | \n", + "... | \n", + "... | \n", + "... | \n", + "... | \n", + "... | \n", + "... | \n", + "... | \n", + "... | \n", + "... | \n", + "... | \n", + "... | \n", + "... | \n", + "... | \n", + "... | \n", + "
524 | \n", + "TCGA-QS-A8F1 | \n", + "TCGA-QS-A8F1-01 | \n", + "Uterine Serous Carcinoma/Uterine Papillary Ser... | \n", + "1:DECEASED | \n", + "NaN | \n", + "1:DEAD WITH TUMOR | \n", + "63.0 | \n", + "0.6549 | \n", + "85.0 | \n", + "0.3647 | \n", + "0.15 | \n", + "Black or African American | \n", + "UCEC_CN_HIGH | \n", + "Serous Endometrial Adenocarcinoma | \n", + "
525 | \n", + "TCGA-SJ-A6ZI | \n", + "TCGA-SJ-A6ZI-01 | \n", + "Uterine Endometrioid Carcinoma | \n", + "0:LIVING | \n", + "0:DiseaseFree | \n", + "0:ALIVE OR DEAD TUMOR FREE | \n", + "1306.0 | \n", + "0.0275 | \n", + "64.0 | \n", + "0.6138 | \n", + "14.35 | \n", + "Black or African American | \n", + "UCEC_MSI | \n", + "Endometrioid Endometrial Adenocarcinoma | \n", + "
526 | \n", + "TCGA-SJ-A6ZJ | \n", + "TCGA-SJ-A6ZJ-01 | \n", + "Uterine Endometrioid Carcinoma | \n", + "0:LIVING | \n", + "0:DiseaseFree | \n", + "0:ALIVE OR DEAD TUMOR FREE | \n", + "56.0 | \n", + "0.0466 | \n", + "61.0 | \n", + "0.3382 | \n", + "0.00 | \n", + "Black or African American | \n", + "UCEC_CN_LOW | \n", + "Endometrioid Endometrial Adenocarcinoma | \n", + "
527 | \n", + "TCGA-SL-A6J9 | \n", + "TCGA-SL-A6J9-01 | \n", + "Uterine Endometrioid Carcinoma | \n", + "0:LIVING | \n", + "NaN | \n", + "0:ALIVE OR DEAD TUMOR FREE | \n", + "48.0 | \n", + "0.4226 | \n", + "73.0 | \n", + "0.3503 | \n", + "0.03 | \n", + "Black or African American | \n", + "UCEC_CN_HIGH | \n", + "Endometrioid Endometrial Adenocarcinoma | \n", + "
528 | \n", + "TCGA-SL-A6JA | \n", + "TCGA-SL-A6JA-01 | \n", + "Uterine Endometrioid Carcinoma | \n", + "0:LIVING | \n", + "0:DiseaseFree | \n", + "0:ALIVE OR DEAD TUMOR FREE | \n", + "742.0 | \n", + "0.0488 | \n", + "77.0 | \n", + "0.9515 | \n", + "26.10 | \n", + "Black or African American | \n", + "UCEC_MSI | \n", + "Endometrioid Endometrial Adenocarcinoma | \n", + "
529 rows × 14 columns
\n", + "\n", + " | Patient ID | \n", + "Sample ID | \n", + "Cancer Type Detailed | \n", + "Overall Survival Status | \n", + "Disease Free Status | \n", + "Disease-specific Survival status | \n", + "Mutation Count | \n", + "Fraction Genome Altered | \n", + "Diagnosis Age | \n", + "MSI MANTIS Score | \n", + "MSIsensor Score | \n", + "Race Category | \n", + "Subtype | \n", + "Tumor Type | \n", + "
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | \n", + "TCGA-2E-A9G8 | \n", + "TCGA-2E-A9G8-01 | \n", + "Uterine Endometrioid Carcinoma | \n", + "0:LIVING | \n", + "NaN | \n", + "0:ALIVE OR DEAD TUMOR FREE | \n", + "65.0 | \n", + "0.3311 | \n", + "59.0 | \n", + "0.3234 | \n", + "0.85 | \n", + "Black or African American | \n", + "UCEC_CN_HIGH | \n", + "Endometrioid Endometrial Adenocarcinoma | \n", + "
1 | \n", + "TCGA-4E-A92E | \n", + "TCGA-4E-A92E-01 | \n", + "Uterine Endometrioid Carcinoma | \n", + "0:LIVING | \n", + "0:DiseaseFree | \n", + "0:ALIVE OR DEAD TUMOR FREE | \n", + "147.0 | \n", + "0.0341 | \n", + "54.0 | \n", + "0.3396 | \n", + "0.01 | \n", + "Black or African American | \n", + "UCEC_CN_LOW | \n", + "Endometrioid Endometrial Adenocarcinoma | \n", + "
2 | \n", + "TCGA-5B-A90C | \n", + "TCGA-5B-A90C-01 | \n", + "Uterine Endometrioid Carcinoma | \n", + "0:LIVING | \n", + "NaN | \n", + "0:ALIVE OR DEAD TUMOR FREE | \n", + "45.0 | \n", + "0.6903 | \n", + "69.0 | \n", + "0.3344 | \n", + "0.55 | \n", + "Black or African American | \n", + "UCEC_CN_HIGH | \n", + "Endometrioid Endometrial Adenocarcinoma | \n", + "
3 | \n", + "TCGA-5S-A9Q8 | \n", + "TCGA-5S-A9Q8-01 | \n", + "Uterine Endometrioid Carcinoma | \n", + "0:LIVING | \n", + "0:DiseaseFree | \n", + "0:ALIVE OR DEAD TUMOR FREE | \n", + "50.0 | \n", + "0.0581 | \n", + "51.0 | \n", + "0.3199 | \n", + "0.09 | \n", + "Black or African American | \n", + "UCEC_CN_LOW | \n", + "Endometrioid Endometrial Adenocarcinoma | \n", + "
4 | \n", + "TCGA-A5-A0G1 | \n", + "TCGA-A5-A0G1-01 | \n", + "Uterine Serous Carcinoma/Uterine Papillary Ser... | \n", + "1:DECEASED | \n", + "0:DiseaseFree | \n", + "0:ALIVE OR DEAD TUMOR FREE | \n", + "10808.0 | \n", + "0.0001 | \n", + "67.0 | \n", + "0.3108 | \n", + "1.74 | \n", + "White | \n", + "UCEC_POLE | \n", + "Serous Endometrial Adenocarcinoma | \n", + "
\n", + " | Patient ID | \n", + "Sample ID | \n", + "Cancer Type Detailed | \n", + "Overall Survival Status | \n", + "Disease Free Status | \n", + "Disease-specific Survival status | \n", + "Mutation Count | \n", + "Fraction Genome Altered | \n", + "Diagnosis Age | \n", + "MSI MANTIS Score | \n", + "MSIsensor Score | \n", + "Race Category | \n", + "Subtype | \n", + "Tumor Type | \n", + "
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
524 | \n", + "TCGA-QS-A8F1 | \n", + "TCGA-QS-A8F1-01 | \n", + "Uterine Serous Carcinoma/Uterine Papillary Ser... | \n", + "1:DECEASED | \n", + "NaN | \n", + "1:DEAD WITH TUMOR | \n", + "63.0 | \n", + "0.6549 | \n", + "85.0 | \n", + "0.3647 | \n", + "0.15 | \n", + "Black or African American | \n", + "UCEC_CN_HIGH | \n", + "Serous Endometrial Adenocarcinoma | \n", + "
525 | \n", + "TCGA-SJ-A6ZI | \n", + "TCGA-SJ-A6ZI-01 | \n", + "Uterine Endometrioid Carcinoma | \n", + "0:LIVING | \n", + "0:DiseaseFree | \n", + "0:ALIVE OR DEAD TUMOR FREE | \n", + "1306.0 | \n", + "0.0275 | \n", + "64.0 | \n", + "0.6138 | \n", + "14.35 | \n", + "Black or African American | \n", + "UCEC_MSI | \n", + "Endometrioid Endometrial Adenocarcinoma | \n", + "
526 | \n", + "TCGA-SJ-A6ZJ | \n", + "TCGA-SJ-A6ZJ-01 | \n", + "Uterine Endometrioid Carcinoma | \n", + "0:LIVING | \n", + "0:DiseaseFree | \n", + "0:ALIVE OR DEAD TUMOR FREE | \n", + "56.0 | \n", + "0.0466 | \n", + "61.0 | \n", + "0.3382 | \n", + "0.00 | \n", + "Black or African American | \n", + "UCEC_CN_LOW | \n", + "Endometrioid Endometrial Adenocarcinoma | \n", + "
527 | \n", + "TCGA-SL-A6J9 | \n", + "TCGA-SL-A6J9-01 | \n", + "Uterine Endometrioid Carcinoma | \n", + "0:LIVING | \n", + "NaN | \n", + "0:ALIVE OR DEAD TUMOR FREE | \n", + "48.0 | \n", + "0.4226 | \n", + "73.0 | \n", + "0.3503 | \n", + "0.03 | \n", + "Black or African American | \n", + "UCEC_CN_HIGH | \n", + "Endometrioid Endometrial Adenocarcinoma | \n", + "
528 | \n", + "TCGA-SL-A6JA | \n", + "TCGA-SL-A6JA-01 | \n", + "Uterine Endometrioid Carcinoma | \n", + "0:LIVING | \n", + "0:DiseaseFree | \n", + "0:ALIVE OR DEAD TUMOR FREE | \n", + "742.0 | \n", + "0.0488 | \n", + "77.0 | \n", + "0.9515 | \n", + "26.10 | \n", + "Black or African American | \n", + "UCEC_MSI | \n", + "Endometrioid Endometrial Adenocarcinoma | \n", + "
\n", + " | count | \n", + "mean | \n", + "std | \n", + "min | \n", + "25% | \n", + "50% | \n", + "75% | \n", + "max | \n", + "
---|---|---|---|---|---|---|---|---|
Mutation Count | \n", + "515.0 | \n", + "1046.423301 | \n", + "2734.365494 | \n", + "7.000 | \n", + "47.00000 | \n", + "76.0000 | \n", + "563.500000 | \n", + "25696.0000 | \n", + "
Fraction Genome Altered | \n", + "519.0 | \n", + "0.185959 | \n", + "0.226088 | \n", + "0.000 | \n", + "0.00360 | \n", + "0.0678 | \n", + "0.314450 | \n", + "0.9487 | \n", + "
Diagnosis Age | \n", + "526.0 | \n", + "63.766160 | \n", + "11.060030 | \n", + "31.000 | \n", + "57.00000 | \n", + "64.0000 | \n", + "71.000000 | \n", + "90.0000 | \n", + "
MSI MANTIS Score | \n", + "526.0 | \n", + "0.438948 | \n", + "0.239666 | \n", + "0.237 | \n", + "0.28395 | \n", + "0.3113 | \n", + "0.543225 | \n", + "1.3049 | \n", + "
MSIsensor Score | \n", + "528.0 | \n", + "6.395890 | \n", + "10.175853 | \n", + "0.000 | \n", + "0.11000 | \n", + "0.4350 | \n", + "10.117500 | \n", + "40.4300 | \n", + "