Fine_Mapping_BD.Rmd

 ---
title: "<center><h1>Fine Mapping:</h1>Bipolar Disorder</h1></center>" 
author: 
    "<div class='container'>
     <h3>Brian M. Schilder, Bioinformatician II<br>
     Raj Lab<br>
     Department of Neuroscience<br>
     Icahn School of Medicine at Mount Sinai<br>
     NYC, New York<br>
     </h3>"
date: "<br>Most Recent Update:<br> `r Sys.Date()`"
output:
 html_document:
    theme: cerulean
    highlight: zenburn
    code_folding: show
    toc: true
    toc_float: true
    smooth_scroll: true
    number_sections: false
    self_contained: true
    css: ./web/css/style.css
editor_options: 
  chunk_output_type: inline
--- 


```{r setup, message=F, warning=F, dpi = 600, class.output = "pre"} 
source("echolocatoR/R/MAIN.R") 
setwd("~/Desktop/Fine_Mapping/")
finemap_results <- list() 
```


```{r multi-finemap, results = 'asis', fig.height=10, fig.width=7}
# fig.show='hold'
dataset_name <- "Daner_2020"
top_SNPs <- import_topSNPs(
  # topSS_path = "Data/GWAS/Daner_2020/PGC3_BIP_5loci_finemap.xlsx",
  Directory_info(dataset_name, "topSS"),
  chrom_col = "CHR", 
  position_col = "BP", 
  snp_col="SNP",
  pval_col="P", 
  effect_col="OR", 
  gene_col="Gene",  
  locus_col = "Locus")   

completed <- list.files("Data/GWAS/Daner_2020/", 
                        pattern = "Multi-finemap_results.txt", recursive = T)
loci <- top_SNPs$Locus[!top_SNPs$Locus %in% dirname(dirname(completed))]

 
finemap_results[[dataset_name]] <- finemap_loci(top_SNPs,  
                                               loci = loci,
                                               dataset_name = dataset_name,
                                               dataset_type = "GWAS",
                                               query_by ="coordinates", # coordinates
                                               subset_path = "auto", 
                                               finemap_methods = c("ABF","SUSIE","POLYFUN_SUSIE","FINEMAP"),  
                                               force_new_subset = F,
                                               force_new_LD = T,
                                               force_new_finemap = T, 
                 fullSS_path = Directory_info(dataset_name, "fullSS.local"),
                 chrom_col = "CHR", position_col = "BP", snp_col = "SNP",
                 pval_col = "P", effect_col = "OR", stderr_col = "SE", 
                 freq_col = "FRQ_A_41917", MAF_col = "calculate",
                 N_cases_col = "Nca", N_controls_col = "Nco", 
                 proportion_cases = "calculate",
                 A1_col = "A1",
                 A2_col = "A2",
                 bp_distance = 500000*2,
                 plot_window = 500000,
                 # max_snps=1000,
                 download_reference = T, 
                 LD_reference = "UKB", 
                 superpopulation = "EUR",  
                 min_MAF = 0.001, 
                 conditioned_snps = "auto", # Use the lead SNP for each locus  
                 n_causal = 5, 
                 plot_types = "simple",  
                 PP_threshold = .95,
                 remove_tmps = T )  
```


```{r}
merged_DT <- merge_finemapping_results(minimum_support=0,
                                      include_leadSNPs=T,
                                      dataset = "./Data/GWAS/Daner_2020/",
                                      xlsx_path="./Data/Daner_2020_results.xlsx",
                                      from_storage=T,
                                      consensus_thresh = 2,
                                      haploreg_annotation=F,
                                      biomart_annotation=F, 
                                      verbose = F)

merged_stats <- merged_DT %>% dplyr::group_by(Gene) %>% 
  dplyr::summarise(count.consensus = n_distinct(SNP[Consensus_SNP==T]),
                   meanPP.consensus = mean(mean.PP[Consensus_SNP==T]),
                   meanPP.leadGWAS = mean(mean.PP[leadSNP==T]),
                   leadGWAS.x.consensus = n_distinct(SNP[Consensus_SNP==T & leadSNP==T & mean.PP==max(mean.PP)]))

(subset(merged_DT, P<.05) %>% dplyr::group_by(Gene) %>% count())$n %>% mean()

median(merged_stats$Consensus_count)  
max(merged_stats$Consensus_count) 
mean(merged_stats$meanPP.consensus)
mean(merged_stats$meanPP.leadGWAS) 
mean(merged_stats$leadGWAS.x.consensus)

```