03_comorbidity-preprocess.Rmd

---
title: "Comorbidity Pre-processing"
author: "Meg Hutch"
date: "2022-11-16"
output: html_document
---

Pre-process comorbidity data for use in downstream analysis.

```{r message=FALSE, warning=FALSE}
library(tidyverse)
library(DT)
```

# **Import Data from each Healthcare system**

```{r message=FALSE, warning=FALSE}
# read in files from results folder 
# this folder contains all of the local healthcare system level analyses
rdas <- list.files(
  path = "results_comorbidity",
  pattern = ".rda",
  full.names = TRUE
)
for (rda in rdas) {
  load(rda)
}

rm(rdas, rda)

# create a list of participating healthcare systems from our study tracking spreadsheet
site_google_url <- "https://docs.google.com/spreadsheets/d/1epcYNd_0jCUMktOHf8mz5v651zy1JALD6PgzobrGWDY/edit?usp=sharing"

# load site parameters
site_params <- googlesheets4::read_sheet(site_google_url, sheet = 1)
site_avails <- googlesheets4::read_sheet(site_google_url, sheet = 2)

# filter the list of sites who ran the analysis
sorted_sites <- site_avails %>%
  filter(comorb_results == "completed") %>%
  pull(siteid) %>%
  paste("comorb_results", sep = "_")

# combine all rda files with 'results' in name
results_comorbidities <- mget(ls(pattern = "results"))

comorb_results <- list()

for(i in sorted_sites) {
    
    tmp <- get(i)
    comorb <- tmp[[c(
      "first_hosp_results",
      "tableone_comorbidity_results"
    )]]
    
    comorb_results[[i]] <- comorb
}
```

### **Calculate total patient counts by neurological status**

These counts will be used for Table 1 and determining denominator for comorbidity prevalence

```{r}
# load in previously generated patient count table by site
pt_counts <- read.csv('tables/site_pt_counts.csv') 

adult_count <- pt_counts %>% 
  filter(population == "Adult") %>% 
  mutate(None_n = sum(n_var_None, na.rm = TRUE),
         PNS_n = sum(n_var_Peripheral, na.rm = TRUE),
         CNS_n = sum(n_var_Central, na.rm = TRUE))

ped_count <- pt_counts %>% 
  filter(population == "Pediatric") %>% 
  mutate(None_n = sum(n_var_None, na.rm = TRUE),
         PNS_n = sum(n_var_Peripheral, na.rm = TRUE),
         CNS_n = sum(n_var_Central, na.rm = TRUE))

# combine adult and pediatric counts
neuro_pt_counts <- adult_count %>% 
  select(population, None_n, CNS_n, PNS_n) %>% 
  distinct() %>% 
  rbind(., ped_count %>% 
          select(population, None_n, CNS_n, PNS_n) %>% 
  distinct())

```

### **Abstract and process comorbidity data across healthcare systems**

```{r}
# create empty lists to store comorbidities
comorb_table_list_adult <- list()
comorb_table_list_pediatric <- list()

## adults
for (i in sorted_sites[!sorted_sites %in% c("BCH_comorb_results", "GOSH_comorb_results")]) {
  tmp <- get(i)
  tmp_comorb <- tmp[[c(
    "first_hosp_results",
    "tableone_comorbidity_results",
    "comorbidity_neuro_adults"
  )]]
  tmp_comorb$site <- tmp[["site"]]
  comorb_table_list_adult[[i]] <- tmp_comorb %>%
    # because Elixhauser comorbidity index has two diabetes categories which capture the same diagnoses once we employ our truncation method, we will remove one of the diabetes categories
    filter(!Comorbidity == "Diabetes, uncomplicated") %>%
        mutate(Comorbidity = if_else(Comorbidity == "Diabetes, complicated", "Diabetes", Comorbidity),
              # rename hypertension 
              Comorbidity = if_else(Comorbidity == "Hypertension, combined", "Hypertension", Comorbidity),
              # rename neurological disorders
              Comorbidity = if_else(Comorbidity == "Other neurological disorders", "Neurological disorders", Comorbidity)) %>% 
    select(site, Comorbidity,  n_Total, n_None, n_Central, n_Peripheral) 
}

## pediatrics
for (i in sorted_sites[!sorted_sites %in% c("VA1_comorb_results", "VA2_comorb_results", "VA3_comorb_results", "VA4_comorb_results", "VA5_comorb_results")]) {
  #print(i)
  tmp <- get(i)
  tmp_comorb <- tmp[[c(
    "first_hosp_results",
    "tableone_comorbidity_results",
    "comorbidity_neuro_pediatrics"
  )]]
  tmp_comorb$site <- tmp[["site"]]
  comorb_table_list_pediatric[[i]] <- tmp_comorb %>%
        # because Elixhauser comorbidity index has two diabetes categories which capture the same diagnoses once we employ our truncation method, we will remove one of the diabetes categories
    filter(!Comorbidity == "Diabetes, uncomplicated") %>%
        mutate(Comorbidity = if_else(Comorbidity == "Diabetes, complicated", "Diabetes", Comorbidity),
               # rename hypertension
               Comorbidity = if_else(Comorbidity == "Hypertension, combined", "Hypertension", Comorbidity),
               # rename neurological disorders
              Comorbidity = if_else(Comorbidity == "Other neurological disorders", "Neurological disorders", Comorbidity)) %>% 
    select(site, Comorbidity,  n_Total, n_None, n_Central, n_Peripheral)
}

# combine all lists
comorb_table_adult <- comorb_table_list_adult %>% 
  bind_rows() %>% 
  mutate(population = "Adult")

comorb_table_pediatric <- comorb_table_list_pediatric %>% 
  bind_rows() %>% 
  mutate(population = "Pediatric")

comorb_table_all <- rbind(comorb_table_adult, comorb_table_pediatric)

# correct electrolyte typo
comorb_table_all = comorb_table_all %>% 
  mutate(Comorbidity = if_else(Comorbidity == "Fluid and electrolye disorders", "Fluid and electrolyte disorders", Comorbidity))

# calculate total number of patients with each comorbidity across neurological status
comorb_table_wide <- comorb_table_all %>%
  data.frame() %>%
  # join neuro patient counts to add denominator of total neuro population
  left_join(., neuro_pt_counts, by = 'population') %>% 
  group_by(Comorbidity, population) %>%
  mutate(Comorb_Total = sum(n_Total, na.rm = TRUE),
         Comorb_Perc = round(Comorb_Total/(None_n + CNS_n + PNS_n)*100, 1),
         None_Total = sum(n_None, na.rm = TRUE),
         None_Perc = round(None_Total/None_n*100, 1),
         CNS_Total = sum(n_Central, na.rm = TRUE),
         CNS_Perc = round(CNS_Total/CNS_n*100, 1),
         PNS_Total = sum(n_Peripheral, na.rm = TRUE),
         PNS_Perc = round(PNS_Total/PNS_n*100, 1),
         `Comorb_N_%` = paste(Comorb_Total, "(", Comorb_Perc, "%)"),
         `NNC_N_%` = paste(None_Total, "(", None_Perc, "%)"),
         `CNS_N_%` = paste(CNS_Total, "(", CNS_Perc, "%)"),
         `PNS_N_%` = paste(PNS_Total, "(", PNS_Perc, "%)")) %>% 
  ungroup() %>%
  distinct(Comorbidity, population, Comorb_Total, None_Total, CNS_Total, PNS_Total,
           Comorb_Perc, None_Perc, CNS_Perc, PNS_Perc, `Comorb_N_%`, `NNC_N_%`, `CNS_N_%`, `PNS_N_%`)

comorb_table_plot <- comorb_table_wide %>%
  distinct(Comorbidity, population, Comorb_Total, `Comorb_N_%`, `NNC_N_%`, `CNS_N_%`, `PNS_N_%`)

datatable(comorb_table_plot %>% arrange(desc(Comorb_Total)), caption = "Number of Patients with each Pre-existing Comorbidity by Neurological Status")
```

**Save processed comorbidity data**

```{r}
write.csv(comorb_table_all, "processed/comorbidity_table_by_site.csv", row.names = FALSE)
write.csv(comorb_table_wide, "processed/comorbidity_table_cnps.csv", row.names = FALSE)
save(comorb_table_wide, file = "processed/comorbidity_table_cnps.rda")
save(neuro_pt_counts, file = "processed/neuro_pt_counts.rda")
```