-
Notifications
You must be signed in to change notification settings - Fork 4
/
Copy pathFine_Mapping_BD.Rmd
118 lines (99 loc) · 4.24 KB
/
Fine_Mapping_BD.Rmd
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
---
title: "<center><h1>Fine Mapping:</h1>Bipolar Disorder</h1></center>"
author:
"<div class='container'>
<h3>Brian M. Schilder, Bioinformatician II<br>
Raj Lab<br>
Department of Neuroscience<br>
Icahn School of Medicine at Mount Sinai<br>
NYC, New York<br>
</h3>"
date: "<br>Most Recent Update:<br> `r Sys.Date()`"
output:
html_document:
theme: cerulean
highlight: zenburn
code_folding: show
toc: true
toc_float: true
smooth_scroll: true
number_sections: false
self_contained: true
css: ./web/css/style.css
editor_options:
chunk_output_type: inline
---
```{r setup, message=F, warning=F, dpi = 600, class.output = "pre"}
source("echolocatoR/R/MAIN.R")
setwd("~/Desktop/Fine_Mapping/")
finemap_results <- list()
```
```{r multi-finemap, results = 'asis', fig.height=10, fig.width=7}
# fig.show='hold'
dataset_name <- "Daner_2020"
top_SNPs <- import_topSNPs(
# topSS_path = "Data/GWAS/Daner_2020/PGC3_BIP_5loci_finemap.xlsx",
Directory_info(dataset_name, "topSS"),
chrom_col = "CHR",
position_col = "BP",
snp_col="SNP",
pval_col="P",
effect_col="OR",
gene_col="Gene",
locus_col = "Locus")
completed <- list.files("Data/GWAS/Daner_2020/",
pattern = "Multi-finemap_results.txt", recursive = T)
loci <- top_SNPs$Locus[!top_SNPs$Locus %in% dirname(dirname(completed))]
finemap_results[[dataset_name]] <- finemap_loci(top_SNPs,
loci = loci,
dataset_name = dataset_name,
dataset_type = "GWAS",
query_by ="coordinates", # coordinates
subset_path = "auto",
finemap_methods = c("ABF","SUSIE","POLYFUN_SUSIE","FINEMAP"),
force_new_subset = F,
force_new_LD = T,
force_new_finemap = T,
fullSS_path = Directory_info(dataset_name, "fullSS.local"),
chrom_col = "CHR", position_col = "BP", snp_col = "SNP",
pval_col = "P", effect_col = "OR", stderr_col = "SE",
freq_col = "FRQ_A_41917", MAF_col = "calculate",
N_cases_col = "Nca", N_controls_col = "Nco",
proportion_cases = "calculate",
A1_col = "A1",
A2_col = "A2",
bp_distance = 500000*2,
plot_window = 500000,
# max_snps=1000,
download_reference = T,
LD_reference = "UKB",
superpopulation = "EUR",
min_MAF = 0.001,
conditioned_snps = "auto", # Use the lead SNP for each locus
n_causal = 5,
plot_types = "simple",
PP_threshold = .95,
remove_tmps = T )
```
```{r}
merged_DT <- merge_finemapping_results(minimum_support=0,
include_leadSNPs=T,
dataset = "./Data/GWAS/Daner_2020/",
xlsx_path="./Data/Daner_2020_results.xlsx",
from_storage=T,
consensus_thresh = 2,
haploreg_annotation=F,
biomart_annotation=F,
verbose = F)
merged_stats <- merged_DT %>% dplyr::group_by(Gene) %>%
dplyr::summarise(count.consensus = n_distinct(SNP[Consensus_SNP==T]),
meanPP.consensus = mean(mean.PP[Consensus_SNP==T]),
meanPP.leadGWAS = mean(mean.PP[leadSNP==T]),
leadGWAS.x.consensus = n_distinct(SNP[Consensus_SNP==T & leadSNP==T & mean.PP==max(mean.PP)]))
(subset(merged_DT, P<.05) %>% dplyr::group_by(Gene) %>% count())$n %>% mean()
median(merged_stats$Consensus_count)
max(merged_stats$Consensus_count)
mean(merged_stats$meanPP.consensus)
mean(merged_stats$meanPP.leadGWAS)
mean(merged_stats$leadGWAS.x.consensus)
```